def date_range(date, func='date'): ''' Return back start and end dates given date string :param date: metrique date (range) to apply to pql query The tilde '~' symbol is used as a date range separated. A tilde by itself will mean 'all dates ranges possible' and will therefore search all objects irrelevant of it's _end date timestamp. A date on the left with a tilde but no date on the right will generate a query where the date range starts at the date provide and ends 'today'. ie, from date -> now. A date on the right with a tilde but no date on the left will generate a query where the date range starts from the first date available in the past (oldest) and ends on the date provided. ie, from beginning of known time -> date. A date on both the left and right will be a simple date range query where the date range starts from the date on the left and ends on the date on the right. ie, from date to date. ''' if isinstance(date, basestring): date = date.strip() if not date: return '_end == None' if date == '~': return '' # don't include objects which have start EXACTLY on the # date in question, since we're looking for objects # which were true BEFORE the given date, not before or on. before = lambda d: '_start < %s("%s")' % (func, ts2dt(d) if d else None) after = lambda d: '(_end >= %s("%s") or _end == None)' % \ (func, ts2dt(d) if d else None) split = date.split('~') # replace all occurances of 'T' with ' ' # this is used for when datetime is passed in # like YYYY-MM-DDTHH:MM:SS instead of # YYYY-MM-DD HH:MM:SS as expected # and drop all occurances of 'timezone' like substring # FIXME: need to adjust (to UTC) for the timezone info we're dropping! split = [re.sub('\+\d\d:\d\d', '', d.replace('T', ' ')) for d in split] if len(split) == 1: # 'dt' return '%s and %s' % (before(split[0]), after(split[0])) elif split[0] in ['', None]: # '~dt' return before(split[1]) elif split[1] in ['', None]: # 'dt~' return after(split[0]) else: # 'dt~dt' return '%s and %s' % (before(split[1]), after(split[0]))
def test_ts2dt(): ''' args: ts, milli=False, tz_aware=False''' from metrique.utils import ts2dt # FIXME: millisecond precision, better? now_time = int(time()) now_time_milli = int(time()) * 1000 now_date = datetime.utcfromtimestamp(now_time) now_date_iso = now_date.isoformat() ' datetime already, return it back' assert ts2dt(now_date) == now_date ' tz_aware defaults to false ' try: ' cant compare offset-naive and offset-aware datetimes ' ts2dt(now_time, tz_aware=True) == now_date except TypeError: pass else: assert False, "Managed to compare offset-naive and offset-aware "\ "datetimes" assert ts2dt(now_date, tz_aware=False) == now_date assert ts2dt(now_time_milli, milli=True, tz_aware=False) == now_date assert ts2dt(now_date_iso) == now_date try: ts2dt('not a valid datetime str') == now_date except TypeError: pass else: assert False, "Managed to convert an invalid timestamp to datetime"
def set_date_bounds(self, date): ''' Pass in the date used in the original query. :param date: Date (date range) that was queried: date -> 'd', '~d', 'd~', 'd~d' d -> '%Y-%m-%d %H:%M:%S,%f', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d' ''' if date is not None: split = date.split('~') if len(split) == 1: self._lbound = ts2dt(date) self._rbound = ts2dt(date) elif len(split) == 2: if split[0] != '': self._lbound = ts2dt(split[0]) if split[1] != '': self._rbound = ts2dt(split[1]) else: raise Exception('Date %s is not in the correct format' % date)
def _fetch_mtime(self, last_update=None): if not last_update: last_update = self.container.proxy.get_delta_ts() or \ self.container.get_last_field(field='_start') # We need the timezone, to readjust relative to the server's tz mtime = ts2dt(last_update, tz_aware=True) mtime = mtime.strftime('%Y-%m-%d %H:%M:%S %z') if mtime else mtime logger.debug("Last update mtime: %s" % mtime) if mtime: if self.lconfig.get('parse_timestamp', True): dt_format = "yyyy-MM-dd HH:mm:ss z" mtime = "parseTimestamp('%s', '%s')" % (mtime, dt_format) else: mtime = "'%s'" % mtime return mtime
def test_api(): from metrique import MetriqueContainer, metrique_object from metrique.utils import utcnow, remove_file, dt2ts, ts2dt _start = ts2dt('2001-01-01') _end = ts2dt('2001-01-02') a = {'_oid': 1, 'col_1': 1, 'col_2': utcnow(), '_start': _start} b = {'_oid': 2, 'col_1': 2, 'col_2': utcnow(), '_start': _start} ma = metrique_object(**a) mb = metrique_object(**b) objs_list = [a, b] r_objs_dict = {u'1': ma, u'2': mb} c = MetriqueContainer() assert not c.name assert not c._proxy MetriqueContainer() # check various forms of passing in objects results in expected # container contents assert c == {} assert MetriqueContainer(objects=c) == {} assert MetriqueContainer(objects=objs_list) == r_objs_dict mc = MetriqueContainer(objects=objs_list) assert MetriqueContainer(objects=mc) == r_objs_dict # setting version should result in all objects added having that version # note: version -> _v in metrique_object assert mc.version == 0 assert mc['1']['_v'] == 0 mc = MetriqueContainer(objects=objs_list, version=3) assert mc.version == 3 assert mc['1']['_v'] == 3 # setting converts key to _id of value after being passed # through metrique_object(); notice key int(5) -> str('5') mc[5] = {'_oid': 5} assert mc['5']['_oid'] == 5 # also note, that it doesn't actually matter what key we use # to set the object... since we always set based on value's # auto-generated _id value, anyway mc[42] = {'_oid': 5} assert mc['5']['_oid'] == 5 # should have 3 objects, first two, plus the last one assert len(mc) == 3 assert len(mc.values()) == 3 assert sorted(mc._ids) == ['1', '2', '5'] assert sorted(mc._oids) == [1, 2, 5] try: mc.ls() except NotImplementedError: pass else: assert False mc.extend([{'_oid': 6}, {'_oid': 7}]) assert sorted(mc._oids) == [1, 2, 5, 6, 7] mc.add({'_oid': 8, '_start': _start, '_end': _end, 'col_1': True}) mc.add({'_oid': 8, '_end': None, 'col_1': False}) assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] r = mc.filter(where={'_oid': 8}) assert len(r) == 2 assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] mc.pop('7') assert sorted(mc._oids) == [1, 2, 5, 6, 8] mc.pop(6) assert sorted(mc._oids) == [1, 2, 5, 8] del mc[5] assert sorted(mc._oids) == [1, 2, 8] assert '1' in mc mc.clear() assert mc == {} db = 'admin' name = 'container_test' c = MetriqueContainer(name=name, db=db) _expected_db_path = os.path.join(cache_dir, 'admin.sqlite') # test drop c.drop(True) assert c.proxy._sqlite_path == _expected_db_path # make sure we're working with a clean db remove_file(_expected_db_path) mc = MetriqueContainer(name=name, db=db, objects=objs_list) assert mc.df() is not None assert mc.df().empty is False # local persistence; filter method queries .objects buffer # .upsert dumps data to proxy db; but leaves the data in the buffer # .flush dumps data and removes all objects dumped # count queries proxy db mc = MetriqueContainer(name=name, db=db, objects=objs_list) _store = deepcopy(mc.store) assert len(mc.filter({'col_1': 1})) == 1 _ids = mc.upsert() assert _ids == ['1', '2'] assert mc.store == _store assert len(mc.filter({'col_1': 1})) == 1 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # persisting again shouldn't result in new rows _ids = mc.upsert() assert _ids == ['1', '2'] assert mc.store == _store assert len(mc.filter({'col_1': 1})) == 1 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # flushing now shouldn't result in new rows; but store should be empty _ids = mc.flush() assert _ids == ['1', '2'] assert mc.store == {} assert len(mc.filter({'col_1': 1})) == 0 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # adding the same object shouldn't result in new rows a.update({'col_1': 42}) mc.add(a) assert len(mc.filter({'col_1': 1})) == 0 assert len(mc.filter({'col_1': 42})) == 1 _ids = mc.flush() assert mc.count(date='~') == 3 assert mc.count(date=None) == 2 assert mc.count('col_1 == 1', date=None) == 0 assert mc.count('col_1 == 1', date='~') == 1 assert mc.count('col_1 == 42') == 1 assert mc.count('col_1 == 42', date='~') == 1 # adjust for local time... #_ts = dt2ts(convert(_start)) _ts = dt2ts(_start) assert _ids == ['1', '1:%s' % _ts] # remove the db remove_file(_expected_db_path)
def db_tester(proxy): from metrique.utils import ts2dt from metrique import metrique_object as O _start = ts2dt("2001-01-01 00:00:00") _start_plus = ts2dt("2001-01-01 00:00:01") _end = ts2dt("2001-01-02 00:00:00") _before = ts2dt("2000-12-31 00:00:00") _after = ts2dt("2001-01-03 00:00:00") _date = ts2dt("2014-01-01 00:00:00") TABLE = 'bla' p = proxy # Clear out ALL tables in the database! p.drop(True) assert p.ls() == [] # must pass _oid as kwarg obj = {'col_1': 1, 'col_3': _date} try: O(**obj) except TypeError: pass else: assert False # _oid can't be null obj = {'_oid': None, 'col_1': 1, 'col_3': _date} try: O(**obj) except ValueError: pass else: assert False _obj_1 = {'_oid': 1, 'col_1': 1, 'col_3': _date} obj_1 = [O(**_obj_1)] schema = { '_oid': {'type': int}, 'col_1': {'type': int}, 'col_3': {'type': datetime}, } autoschema = p.autoschema(obj_1) assert dict(autoschema) == dict(schema) table = p.autotable(name=TABLE, schema=schema, create=True) assert table is not None assert p.count() == 0 expected_fields = ['__v__', '_e', '_end', '_hash', '_id', '_start', '_v', 'id'] _exp = expected_fields + _obj_1.keys() assert sorted(p.columns()) == sorted(_exp) print 'Inserting %s' % obj_1 p.insert(obj_1) assert p.count() == 1 assert p.find('_oid == 1', raw=True, date=None) # should be one object with col_1 == 1 (_oids: 1) assert p.count('col_1 == 1', date='~') == 1 _obj_2 = {'_oid': 2, 'col_1': 1, 'col_3': _date, '_start': _start, '_end': _end} obj_2 = [O(**_obj_2)] print 'Inserting %s' % obj_2 p.insert(obj_2) assert p.count('_oid == 2') == 0 assert p.count('_oid == 2', date=None) == 0 assert p.count('_oid == 2', date='%s~' % _start) == 1 # ~DATE does NOT include objects existing on DATE, but only UP TO/BEFORE assert p.count('_oid == 2', date='~%s' % _start) == 0 assert p.count('_oid == 2', date='~%s' % _start_plus) == 1 assert p.count('_oid == 2', date='~') == 1 assert p.count('_oid == 2', date='~%s' % _before) == 0 assert p.count('_oid == 2', date='%s~' % _after) == 0 # should be two objects with col_1 == 1 (_oids: 1, 2) assert p.count('col_1 == 1', date='~') == 2 assert p.distinct('_oid') == [1, 2] # insert new obj, then update col_3's values # note, working with the obj individually, but passing as a sigleton list # to insert(), etc _obj_3 = {'_oid': 3, 'col_1': 1, 'col_3': _date, '_start': _start, '_end': None} obj_3 = O(**_obj_3) print 'Inserting %s' % obj_3 p.insert([obj_3]) assert p.count('_oid == 3', date='~') == 1 obj_3['col_1'] = 42 print '... Update 1: %s' % obj_3 obj_3 = O(**obj_3) p.upsert([obj_3]) # should be two versions of _oid:3 assert p.count('_oid == 3', date='~') == 2 # should be three objects with col_1 == 1 (_oids: 1, 2, 3) assert p.count('col_1 == 1', date='~') == 3 assert p.count('col_1 == 42', date='~') == 1 # should be four object versions in total at this point assert p.count(date='~') == 4 # last _oid should be 3 assert p.get_last_field('_oid') == 3 try: p.insert([obj_3]) except Exception: pass else: assert False, "shouldn't be able to insert same object twice" _obj_4 = {'_oid': -1} obj_4 = O(**_obj_4) print '... Update 2: %s' % obj_4 p.insert([obj_4]) # 3 should still be highest _oid assert p.get_last_field('_oid') == 3 _obj_5 = {'_oid': 42} obj_5 = O(**_obj_5) p.insert([obj_5]) # now, 42 should be highest assert p.get_last_field('_oid') == 42 assert p.ls() == [TABLE] # Indexes ix = [i['name'] for i in p.index_list().get(TABLE)] assert 'ix_col_1' not in ix p.index('col_1') ix = [i['name'] for i in p.index_list().get(TABLE)] assert 'ix_bla_col_1' in ix
def get_objects(self, uri, pull=True, **kwargs): ''' Walk through repo commits to generate a list of repo commit objects. Each object has the following properties: * repo uri * general commit info * files added, removed fnames * lines added, removed * acked_by * signed_off_by * resolves * related ''' self.repo = repo = git_clone(uri, pull=pull, reflect=True) # get a full list of all commit SHAs in the repo (all branches) cmd = 'git rev-list --all' output = sys_call(cmd, cwd=repo.path) repo_shas = set(x.strip() for x in output.split('\n') if x) logger.debug("Total Commits: %s" % len(repo_shas)) cmd = 'git --no-pager log --all --format=sha:%H --numstat' output = sys_call(cmd) all_logs = re.sub('\n+', '\n', output) c_logs = [x for x in [s.strip() for s in all_logs.split('sha:')] if x] _end = None # once was true, always is true... objs = [] for c_log in c_logs: sha, s, all_changes = c_log.partition('\n') #try: c = repo.get_object(sha) # FIXME: not normalizing to UTC _start = ts2dt(c.commit_time) #except Exception as e: # _start = now # obj = dict(_oid=sha, _start=_start, _end=_end, # repo_uri=uri, _e={sha: to_encoding(e)}) # self.objects.add(obj) # continue # and some basic stuff... obj = dict(_oid=sha, _start=_start, _end=_end, repo_uri=uri, tree=c.tree, parents=c.parents, author=c.author, committer=c.committer, author_time=c.author_time, message=c.message, mergetag=c.mergetag, extra=c.extra) for _file in all_changes.split('\n'): _file = _file.strip() obj.setdefault('files', {}) if not _file: added, removed, fname = 0, 0, None else: added, removed, fname = _file.split('\t') added = 0 if added == '-' else int(added) removed = 0 if removed == '-' else int(removed) # FIXME: sql doesn't nest well.. changes = {'added': added, 'removed': removed} obj['files'][fname] = changes # file +/- totals obj['added'] = sum( [v.get('added', 0) for v in obj['files'].itervalues()]) obj['removed'] = sum( [v.get('removed', 0) for v in obj['files'].itervalues()]) # extract interesting bits from the message obj['acked_by'] = acked_by_re.findall(c.message) obj['signed_off_by'] = signed_off_by_re.findall(c.message) obj['resolves'] = resolves_re.findall(c.message) obj['related'] = related_re.findall(c.message) objs.append(obj) self.objects.extend(objs) return super(Commit, self).get_objects(**kwargs)
def get_objects(self, uri, pull=True, **kwargs): """ Walk through repo commits to generate a list of repo commit objects. Each object has the following properties: * repo uri * general commit info * files added, removed fnames * lines added, removed * acked_by * signed_off_by * resolves * related """ self.repo = repo = git_clone(uri, pull=pull, reflect=True) # get a full list of all commit SHAs in the repo (all branches) cmd = "git rev-list --all" output = sys_call(cmd, cwd=repo.path) repo_shas = set(x.strip() for x in output.split("\n") if x) logger.debug("Total Commits: %s" % len(repo_shas)) cmd = "git --no-pager log --all --format=sha:%H --numstat" output = sys_call(cmd) all_logs = re.sub("\n+", "\n", output) c_logs = [x for x in [s.strip() for s in all_logs.split("sha:")] if x] _end = None # once was true, always is true... objs = [] for c_log in c_logs: sha, s, all_changes = c_log.partition("\n") # try: c = repo.get_object(sha) # FIXME: not normalizing to UTC _start = ts2dt(c.commit_time) # except Exception as e: # _start = now # obj = dict(_oid=sha, _start=_start, _end=_end, # repo_uri=uri, _e={sha: to_encoding(e)}) # self.objects.add(obj) # continue # and some basic stuff... obj = dict( _oid=sha, _start=_start, _end=_end, repo_uri=uri, tree=c.tree, parents=c.parents, author=c.author, committer=c.committer, author_time=c.author_time, message=c.message, mergetag=c.mergetag, extra=c.extra, ) for _file in all_changes.split("\n"): _file = _file.strip() obj.setdefault("files", {}) if not _file: added, removed, fname = 0, 0, None else: added, removed, fname = _file.split("\t") added = 0 if added == "-" else int(added) removed = 0 if removed == "-" else int(removed) # FIXME: sql doesn't nest well.. changes = {"added": added, "removed": removed} obj["files"][fname] = changes # file +/- totals obj["added"] = sum([v.get("added", 0) for v in obj["files"].itervalues()]) obj["removed"] = sum([v.get("removed", 0) for v in obj["files"].itervalues()]) # extract interesting bits from the message obj["acked_by"] = acked_by_re.findall(c.message) obj["signed_off_by"] = signed_off_by_re.findall(c.message) obj["resolves"] = resolves_re.findall(c.message) obj["related"] = related_re.findall(c.message) objs.append(obj) self.objects.extend(objs) return super(Commit, self).get_objects(**kwargs)
def db_tester(proxy): from metrique.utils import ts2dt from metrique import metrique_object as O _start = ts2dt("2001-01-01 00:00:00") _start_plus = ts2dt("2001-01-01 00:00:01") _end = ts2dt("2001-01-02 00:00:00") _before = ts2dt("2000-12-31 00:00:00") _after = ts2dt("2001-01-03 00:00:00") _date = ts2dt("2014-01-01 00:00:00") TABLE = 'bla' p = proxy # Clear out ALL tables in the database! p.drop(True) assert p.ls() == [] # must pass _oid as kwarg obj = {'col_1': 1, 'col_3': _date} try: O(**obj) except TypeError: pass else: assert False # _oid can't be null obj = {'_oid': None, 'col_1': 1, 'col_3': _date} try: O(**obj) except ValueError: pass else: assert False _obj_1 = {'_oid': 1, 'col_1': 1, 'col_3': _date} obj_1 = [O(**_obj_1)] schema = { '_oid': { 'type': int }, 'col_1': { 'type': int }, 'col_3': { 'type': datetime }, } autoschema = p.autoschema(obj_1) assert dict(autoschema) == dict(schema) table = p.autotable(name=TABLE, schema=schema, create=True) assert table is not None assert p.count() == 0 expected_fields = [ '__v__', '_e', '_end', '_hash', '_id', '_start', '_v', 'id' ] _exp = expected_fields + _obj_1.keys() assert sorted(p.columns()) == sorted(_exp) print 'Inserting %s' % obj_1 p.insert(obj_1) assert p.count() == 1 assert p.find('_oid == 1', raw=True, date=None) # should be one object with col_1 == 1 (_oids: 1) assert p.count('col_1 == 1', date='~') == 1 _obj_2 = { '_oid': 2, 'col_1': 1, 'col_3': _date, '_start': _start, '_end': _end } obj_2 = [O(**_obj_2)] print 'Inserting %s' % obj_2 p.insert(obj_2) assert p.count('_oid == 2') == 0 assert p.count('_oid == 2', date=None) == 0 assert p.count('_oid == 2', date='%s~' % _start) == 1 # ~DATE does NOT include objects existing on DATE, but only UP TO/BEFORE assert p.count('_oid == 2', date='~%s' % _start) == 0 assert p.count('_oid == 2', date='~%s' % _start_plus) == 1 assert p.count('_oid == 2', date='~') == 1 assert p.count('_oid == 2', date='~%s' % _before) == 0 assert p.count('_oid == 2', date='%s~' % _after) == 0 # should be two objects with col_1 == 1 (_oids: 1, 2) assert p.count('col_1 == 1', date='~') == 2 assert p.distinct('_oid') == [1, 2] # insert new obj, then update col_3's values # note, working with the obj individually, but passing as a sigleton list # to insert(), etc _obj_3 = { '_oid': 3, 'col_1': 1, 'col_3': _date, '_start': _start, '_end': None } obj_3 = O(**_obj_3) print 'Inserting %s' % obj_3 p.insert([obj_3]) assert p.count('_oid == 3', date='~') == 1 obj_3['col_1'] = 42 print '... Update 1: %s' % obj_3 obj_3 = O(**obj_3) p.upsert([obj_3]) # should be two versions of _oid:3 assert p.count('_oid == 3', date='~') == 2 # should be three objects with col_1 == 1 (_oids: 1, 2, 3) assert p.count('col_1 == 1', date='~') == 3 assert p.count('col_1 == 42', date='~') == 1 # should be four object versions in total at this point assert p.count(date='~') == 4 # last _oid should be 3 assert p.get_last_field('_oid') == 3 try: p.insert([obj_3]) except Exception: pass else: assert False, "shouldn't be able to insert same object twice" _obj_4 = {'_oid': -1} obj_4 = O(**_obj_4) print '... Update 2: %s' % obj_4 p.insert([obj_4]) # 3 should still be highest _oid assert p.get_last_field('_oid') == 3 _obj_5 = {'_oid': 42} obj_5 = O(**_obj_5) p.insert([obj_5]) # now, 42 should be highest assert p.get_last_field('_oid') == 42 assert p.ls() == [TABLE] # Indexes ix = [i['name'] for i in p.index_list().get(TABLE)] assert 'ix_col_1' not in ix p.index('col_1') ix = [i['name'] for i in p.index_list().get(TABLE)] assert 'ix_bla_col_1' in ix