def test_strip_split(): ' args: item ' from metriqueu.utils import strip_split a_lst = ['a', 'b', 'c', 'd', 'e'] a_str = 'a, b, c, d , e' assert strip_split(a_str) == a_lst assert strip_split(None) == [] assert strip_split(a_lst) == a_lst try: strip_split({}) except TypeError: pass
def extract(self, exclude_fields=None, force=False, last_update=None, parse_timestamp=None, **kwargs): ''' Extract routine for SQL based cubes. ... docs coming soon ... :param force: If False (default), then it will try to extract only the objects that have changed since the last extract. If True, then it will try to extract all the objects. If it is a list of oids, then it will try to extract only those objects with oids from the list. Accept, but ignore unknown kwargs. ''' if parse_timestamp is None: parse_timestamp = self.get_property('parse_timestamp', None, True) exclude_fields = strip_split(exclude_fields) oids = [] if force is True: # get a list of all known object ids table = self.get_property('table') _id = self.get_property('column') sql = 'SELECT DISTINCT %s.%s FROM %s.%s' % (table, _id, self.db, table) rows = self.proxy.fetchall(sql) oids = self._extract_row_ids(rows) if force is False and self.get_property('delta', None, True): # include objects updated since last mtime too # apply delta sql clause's if we're not forcing a full run if self.get_property('delta_mtime', None, False): oids.extend(self._get_mtime_id_delta(last_update, parse_timestamp)) if self.get_property('delta_new_ids', None, True): oids.extend(self._get_new_ids()) if isinstance(force, list): oids = force oids = sorted(set(oids)) # this is to set the 'index' of sql columns so we can extract # out the sql rows and know which column : field field_order = list(set(self.fields) - set(exclude_fields)) if self.config.batch_size <= 0: return self._extract(oids, field_order) else: return self._extract_threaded(oids, field_order)
def get_fields(self, owner, cube, fields=None): ''' Return back a dict of (field, 0/1) pairs, where the matching fields have 1. ''' if not (owner and cube): self._raise(400, "owner and cube required") logger.debug('... fields: %s' % fields) if fields in ['__all__', '~']: # None will make pymongo return back entire objects _fields = None else: # to return `_id`, it must be included in fields _fields = {'_id': 0, '_oid': 1, '_start': 1, '_end': 1} _split_fields = [f for f in strip_split(fields)] _fields.update(dict([(f, 1) for f in set(_split_fields)])) return _fields
def get_fields(self, owner, cube, fields=None): ''' Return back a dict of (field, 0/1) pairs, where the matching fields have 1. :param cube: cube name :param owner: username of cube owner :param fields: list of fields to query ''' if not (owner and cube): self._raise(400, "owner and cube required") logger.debug('... fields: %s' % fields) if fields in ['__all__', '~']: # None indicates a request should return back whole objs _fields = None else: # to return `_id`, it must be included in fields _fields = {'_id': 0, '_oid': 1, '_start': 1, '_end': 1} _split_fields = [f for f in strip_split(fields)] _fields.update(dict([(f, 1) for f in set(_split_fields)])) return _fields