def _type_single(self, value, _type): ' apply type to the single value ' if value is None or _type in (None, NoneType): # don't convert null values # default type is the original type if none set pass elif isinstance(value, _type): # or values already of correct type # normalize all dates to epochs value = dt2ts(value) if _type in [datetime, date] else value else: if _type in (datetime, date): # normalize all dates to epochs value = dt2ts(value) elif _type in (unicode, str): # make sure all string types are properly unicoded value = to_encoding(value) else: try: value = _type(value) except Exception: value = to_encoding(value) logger.error("typecast failed: %s(value=%s)" % (_type.__name__, value)) raise return value
def _type_single(self, value, _type): ' apply type to the single value ' if value is None or _type in (None, NoneType): # don't convert null values # default type is the original type if none set pass elif isinstance(value, _type): # or values already of correct type # normalize all dates to epochs value = dt2ts(value) if _type in [datetime, date] else value else: if _type in (datetime, date): # normalize all dates to epochs value = dt2ts(value) elif _type in (unicode, str): # make sure all string types are properly unicoded value = to_encoding(value) else: try: value = _type(value) except Exception: value = to_encoding(value) logger.error("typecast failed: %s(value=%s)" % ( _type.__name__, value)) raise return value
def metrique_object(_oid, _id=None, _hash=None, _start=None, _end=None, _e=None, _v=None, id=None, __v__=None, **kwargs): ''' Function which takes a dictionary (Mapping) object as input and returns return back a metrique object. Special meta property are added to each object:: _oid: ... _start: ... ... FIXME ''' # NOTE: we completely ignore incoming 'id' keys! # id is RESERVED and ALWAYS expected to be 'autoincrement' # upon insertion into DB (though, its optional, depending # on backend storage behaivor). if id: warnings.warn('non-null "id" keys detected, ignoring them!') _e = dict(_e or {}) # expecting a dict with copy() atr _v = int(_v or 0) if not isinstance(_start, float): _start = dt2ts(_start) if _start else utcnow(as_datetime=False) assert _start is not None, "_start (%s) must be set!" % _start if not isinstance(_end, float): _end = dt2ts(_end) if _end else None _err_msg = "_end(%s) must be >= _start(%s) or None!" % (_end, _start) assert _end is None or bool(_end >= _start), _err_msg # these meta fields are used to generate unique object _hash kwargs['_oid'] = _oid kwargs['_v'] = _v kwargs['_id'] = gen_id(_oid, _start, _end) # ignore passed in _id # generate unique, consistent object _hash based on 'frozen' obj contents # FIXME: make _hash == None valid #kwargs['_hash'] = jsonhash(kwargs) if _hash else None kwargs['_hash'] = jsonhash(kwargs) # add some additional non-hashable meta data kwargs['_start'] = _start kwargs['_end'] = _end kwargs['__v__'] = __v__ or __version__ kwargs['_e'] = _e return kwargs
def test_dt2ts(): ''' ''' from metrique.utils import dt2ts # FIXME: millisecond precision, better? now_time = int(time()) now_date = datetime.utcfromtimestamp(now_time) now_date_iso = now_date.isoformat() assert dt2ts(None) is None assert dt2ts(now_time) == now_time assert dt2ts(now_date) == now_time assert dt2ts(now_date_iso) == now_time
def _linreg_future(self, series, since, days=20): ''' Predicts future using linear regression. :param series: A series in which the values will be places. The index will not be touched. Only the values on dates > `since` will be predicted. :param since: The starting date from which the future will be predicted. :param days: Specifies how many past days should be used in the linear regression. ''' last_days = pd.date_range(end=since, periods=days) hist = self.history(last_days) xi = np.array(map(dt2ts, hist.index)) A = np.array([xi, np.ones(len(hist))]) y = hist.values w = np.linalg.lstsq(A.T, y)[0] for d in series.index[series.index > since]: series[d] = w[0] * dt2ts(d) + w[1] series[d] = 0 if series[d] < 0 else series[d] return series
def _activity_import_doc(self, time_doc, activities): ''' Import activities for a single document into timeline. ''' batch_updates = [time_doc] # We want to consider only activities that happend before time_doc # do not move this, because time_doc._start changes # time_doc['_start'] is a timestamp, whereas act[0] is a datetime # we need to be sure to convert act[0] (when) to timestamp! td_start = time_doc['_start'] activities = filter( lambda act: (act[0] < td_start and act[1] in time_doc), activities) creation_field = self.lconfig.get('cfield') # make sure that activities are sorted by when descending activities.sort(reverse=True, key=lambda o: o[0]) new_doc = {} for when, field, removed, added in activities: last_doc = batch_updates.pop() # check if this activity happened at the same time as the last one, # if it did then we need to group them together if last_doc['_end'] == when: new_doc = deepcopy(last_doc) last_doc = batch_updates.pop() else: new_doc = deepcopy(last_doc) new_doc['_start'] = when new_doc['_end'] = when last_doc['_start'] = when last_val = last_doc[field] new_val, inconsistent = self._activity_backwards( new_doc[field], removed, added) new_doc[field] = new_val # Check if the object has the correct field value. if inconsistent: self._log_inconsistency(last_doc, last_val, field, removed, added, when) new_doc['_e'] = {} if not new_doc.get('_e') else new_doc['_e'] # set curreupted field value to the the value that was added # and continue processing as if that issue didn't exist new_doc['_e'][field] = added # Add the objects to the batch batch_updates.extend([last_doc, new_doc]) # try to set the _start of the first version to the creation time try: # set start to creation time if available last_doc = batch_updates[-1] if creation_field: # again, we expect _start to be epoch float... creation_ts = dt2ts(last_doc[creation_field]) if creation_ts < last_doc['_start']: last_doc['_start'] = creation_ts elif len(batch_updates) == 1: # we have only one version, that we did not change return [] else: pass # leave as-is except Exception as e: logger.error('Error updating creation time; %s' % e) return batch_updates
def _activity_import_doc(self, time_doc, activities): ''' Import activities for a single document into timeline. ''' batch_updates = [time_doc] # We want to consider only activities that happend before time_doc # do not move this, because time_doc._start changes # time_doc['_start'] is a timestamp, whereas act[0] is a datetime # we need to be sure to convert act[0] (when) to timestamp! td_start = time_doc['_start'] activities = filter(lambda act: (act[0] < td_start and act[1] in time_doc), activities) creation_field = self.lconfig.get('cfield') # make sure that activities are sorted by when descending activities.sort(reverse=True, key=lambda o: o[0]) new_doc = {} for when, field, removed, added in activities: last_doc = batch_updates.pop() # check if this activity happened at the same time as the last one, # if it did then we need to group them together if last_doc['_end'] == when: new_doc = deepcopy(last_doc) last_doc = batch_updates.pop() else: new_doc = deepcopy(last_doc) new_doc['_start'] = when new_doc['_end'] = when last_doc['_start'] = when last_val = last_doc[field] new_val, inconsistent = self._activity_backwards(new_doc[field], removed, added) new_doc[field] = new_val # Check if the object has the correct field value. if inconsistent: self._log_inconsistency(last_doc, last_val, field, removed, added, when) new_doc['_e'] = {} if not new_doc.get('_e') else new_doc['_e'] # set curreupted field value to the the value that was added # and continue processing as if that issue didn't exist new_doc['_e'][field] = added # Add the objects to the batch batch_updates.extend([last_doc, new_doc]) # try to set the _start of the first version to the creation time try: # set start to creation time if available last_doc = batch_updates[-1] if creation_field: # again, we expect _start to be epoch float... creation_ts = dt2ts(last_doc[creation_field]) if creation_ts < last_doc['_start']: last_doc['_start'] = creation_ts elif len(batch_updates) == 1: # we have only one version, that we did not change return [] else: pass # leave as-is except Exception as e: logger.error('Error updating creation time; %s' % e) return batch_updates
def test__get_datetime(): from metrique.utils import _get_datetime, utcnow, dt2ts now_tz = utcnow(tz_aware=True, as_datetime=True) now = now_tz.replace(tzinfo=None) try: now_tz == now # can't compare tz_aware <> naive except TypeError: pass else: assert False # default is tz_aware=False assert _get_datetime(now_tz) == now assert _get_datetime(now) == now assert _get_datetime(now_tz, tz_aware=True) == now_tz assert _get_datetime(now, tz_aware=True) == now_tz assert _get_datetime(dt2ts(now), tz_aware=True) == now_tz
def p_Call(self, node): if node.func.id == 'empty': if len(node.args) != 1: raise ValueError('empty expects 1 argument.') name = self.p_array_name(node.args[0]) return name == '{}' elif node.func.id == 'date': if len(node.args) != 1: raise ValueError('date expects 1 argument.') else: # convert all datetimes to float epoch node.args[0].s = dt2ts(node.args[0].s) return self.p(node.args[0]) elif node.func.id in ['regex', 'iregex']: return (node.func.id, self.p(node.args[0])) else: raise ValueError('Unknown function: %s' % node.func.id)
def test_api(): from metrique import MetriqueContainer, metrique_object from metrique.utils import utcnow, remove_file, dt2ts, ts2dt _start = ts2dt('2001-01-01') _end = ts2dt('2001-01-02') a = {'_oid': 1, 'col_1': 1, 'col_2': utcnow(), '_start': _start} b = {'_oid': 2, 'col_1': 2, 'col_2': utcnow(), '_start': _start} ma = metrique_object(**a) mb = metrique_object(**b) objs_list = [a, b] r_objs_dict = {u'1': ma, u'2': mb} c = MetriqueContainer() assert not c.name assert not c._proxy MetriqueContainer() # check various forms of passing in objects results in expected # container contents assert c == {} assert MetriqueContainer(objects=c) == {} assert MetriqueContainer(objects=objs_list) == r_objs_dict mc = MetriqueContainer(objects=objs_list) assert MetriqueContainer(objects=mc) == r_objs_dict # setting version should result in all objects added having that version # note: version -> _v in metrique_object assert mc.version == 0 assert mc['1']['_v'] == 0 mc = MetriqueContainer(objects=objs_list, version=3) assert mc.version == 3 assert mc['1']['_v'] == 3 # setting converts key to _id of value after being passed # through metrique_object(); notice key int(5) -> str('5') mc[5] = {'_oid': 5} assert mc['5']['_oid'] == 5 # also note, that it doesn't actually matter what key we use # to set the object... since we always set based on value's # auto-generated _id value, anyway mc[42] = {'_oid': 5} assert mc['5']['_oid'] == 5 # should have 3 objects, first two, plus the last one assert len(mc) == 3 assert len(mc.values()) == 3 assert sorted(mc._ids) == ['1', '2', '5'] assert sorted(mc._oids) == [1, 2, 5] try: mc.ls() except NotImplementedError: pass else: assert False mc.extend([{'_oid': 6}, {'_oid': 7}]) assert sorted(mc._oids) == [1, 2, 5, 6, 7] mc.add({'_oid': 8, '_start': _start, '_end': _end, 'col_1': True}) mc.add({'_oid': 8, '_end': None, 'col_1': False}) assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] r = mc.filter(where={'_oid': 8}) assert len(r) == 2 assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] mc.pop('7') assert sorted(mc._oids) == [1, 2, 5, 6, 8] mc.pop(6) assert sorted(mc._oids) == [1, 2, 5, 8] del mc[5] assert sorted(mc._oids) == [1, 2, 8] assert '1' in mc mc.clear() assert mc == {} db = 'admin' name = 'container_test' c = MetriqueContainer(name=name, db=db) _expected_db_path = os.path.join(cache_dir, 'admin.sqlite') # test drop c.drop(True) assert c.proxy._sqlite_path == _expected_db_path # make sure we're working with a clean db remove_file(_expected_db_path) mc = MetriqueContainer(name=name, db=db, objects=objs_list) assert mc.df() is not None assert mc.df().empty is False # local persistence; filter method queries .objects buffer # .upsert dumps data to proxy db; but leaves the data in the buffer # .flush dumps data and removes all objects dumped # count queries proxy db mc = MetriqueContainer(name=name, db=db, objects=objs_list) _store = deepcopy(mc.store) assert len(mc.filter({'col_1': 1})) == 1 _ids = mc.upsert() assert _ids == ['1', '2'] assert mc.store == _store assert len(mc.filter({'col_1': 1})) == 1 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # persisting again shouldn't result in new rows _ids = mc.upsert() assert _ids == ['1', '2'] assert mc.store == _store assert len(mc.filter({'col_1': 1})) == 1 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # flushing now shouldn't result in new rows; but store should be empty _ids = mc.flush() assert _ids == ['1', '2'] assert mc.store == {} assert len(mc.filter({'col_1': 1})) == 0 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # adding the same object shouldn't result in new rows a.update({'col_1': 42}) mc.add(a) assert len(mc.filter({'col_1': 1})) == 0 assert len(mc.filter({'col_1': 42})) == 1 _ids = mc.flush() assert mc.count(date='~') == 3 assert mc.count(date=None) == 2 assert mc.count('col_1 == 1', date=None) == 0 assert mc.count('col_1 == 1', date='~') == 1 assert mc.count('col_1 == 42') == 1 assert mc.count('col_1 == 42', date='~') == 1 # adjust for local time... #_ts = dt2ts(convert(_start)) _ts = dt2ts(_start) assert _ids == ['1', '1:%s' % _ts] # remove the db remove_file(_expected_db_path)
def process_bind_param(self, value, dialect): return dt2ts(value)