Example #1
0
def date_range(date, func='date'):
    '''
    Return back start and end dates given date string

    :param date: metrique date (range) to apply to pql query

    The tilde '~' symbol is used as a date range separated.

    A tilde by itself will mean 'all dates ranges possible'
    and will therefore search all objects irrelevant of it's
    _end date timestamp.

    A date on the left with a tilde but no date on the right
    will generate a query where the date range starts
    at the date provide and ends 'today'.
    ie, from date -> now.

    A date on the right with a tilde but no date on the left
    will generate a query where the date range starts from
    the first date available in the past (oldest) and ends
    on the date provided.
    ie, from beginning of known time -> date.

    A date on both the left and right will be a simple date
    range query where the date range starts from the date
    on the left and ends on the date on the right.
    ie, from date to date.
    '''
    if isinstance(date, basestring):
        date = date.strip()
    if not date:
        return '_end == None'
    if date == '~':
        return ''

    # don't include objects which have start EXACTLY on the
    # date in question, since we're looking for objects
    # which were true BEFORE the given date, not before or on.
    before = lambda d: '_start < %s("%s")' % (func, ts2dt(d) if d else None)
    after = lambda d: '(_end >= %s("%s") or _end == None)' % \
        (func, ts2dt(d) if d else None)
    split = date.split('~')
    # replace all occurances of 'T' with ' '
    # this is used for when datetime is passed in
    # like YYYY-MM-DDTHH:MM:SS instead of
    #      YYYY-MM-DD HH:MM:SS as expected
    # and drop all occurances of 'timezone' like substring
    # FIXME: need to adjust (to UTC) for the timezone info we're dropping!
    split = [re.sub('\+\d\d:\d\d', '', d.replace('T', ' ')) for d in split]
    if len(split) == 1:  # 'dt'
        return '%s and %s' % (before(split[0]), after(split[0]))
    elif split[0] in ['', None]:  # '~dt'
        return before(split[1])
    elif split[1] in ['', None]:  # 'dt~'
        return after(split[0])
    else:  # 'dt~dt'
        return '%s and %s' % (before(split[1]), after(split[0]))
Example #2
0
def date_range(date, func='date'):
    '''
    Return back start and end dates given date string

    :param date: metrique date (range) to apply to pql query

    The tilde '~' symbol is used as a date range separated.

    A tilde by itself will mean 'all dates ranges possible'
    and will therefore search all objects irrelevant of it's
    _end date timestamp.

    A date on the left with a tilde but no date on the right
    will generate a query where the date range starts
    at the date provide and ends 'today'.
    ie, from date -> now.

    A date on the right with a tilde but no date on the left
    will generate a query where the date range starts from
    the first date available in the past (oldest) and ends
    on the date provided.
    ie, from beginning of known time -> date.

    A date on both the left and right will be a simple date
    range query where the date range starts from the date
    on the left and ends on the date on the right.
    ie, from date to date.
    '''
    if isinstance(date, basestring):
        date = date.strip()
    if not date:
        return '_end == None'
    if date == '~':
        return ''

    # don't include objects which have start EXACTLY on the
    # date in question, since we're looking for objects
    # which were true BEFORE the given date, not before or on.
    before = lambda d: '_start < %s("%s")' % (func, ts2dt(d) if d else None)
    after = lambda d: '(_end >= %s("%s") or _end == None)' % \
        (func, ts2dt(d) if d else None)
    split = date.split('~')
    # replace all occurances of 'T' with ' '
    # this is used for when datetime is passed in
    # like YYYY-MM-DDTHH:MM:SS instead of
    #      YYYY-MM-DD HH:MM:SS as expected
    # and drop all occurances of 'timezone' like substring
    # FIXME: need to adjust (to UTC) for the timezone info we're dropping!
    split = [re.sub('\+\d\d:\d\d', '', d.replace('T', ' ')) for d in split]
    if len(split) == 1:  # 'dt'
        return '%s and %s' % (before(split[0]), after(split[0]))
    elif split[0] in ['', None]:  # '~dt'
        return before(split[1])
    elif split[1] in ['', None]:  # 'dt~'
        return after(split[0])
    else:  # 'dt~dt'
        return '%s and %s' % (before(split[1]), after(split[0]))
Example #3
0
def test_ts2dt():
    ''' args: ts, milli=False, tz_aware=False'''
    from metrique.utils import ts2dt

    # FIXME: millisecond precision, better?
    now_time = int(time())
    now_time_milli = int(time()) * 1000
    now_date = datetime.utcfromtimestamp(now_time)
    now_date_iso = now_date.isoformat()

    ' datetime already, return it back'
    assert ts2dt(now_date) == now_date

    ' tz_aware defaults to false '
    try:
        ' cant compare offset-naive and offset-aware datetimes '
        ts2dt(now_time, tz_aware=True) == now_date
    except TypeError:
        pass
    else:
        assert False, "Managed to compare offset-naive and offset-aware "\
            "datetimes"

    assert ts2dt(now_date, tz_aware=False) == now_date

    assert ts2dt(now_time_milli, milli=True, tz_aware=False) == now_date

    assert ts2dt(now_date_iso) == now_date
    try:
        ts2dt('not a valid datetime str') == now_date
    except TypeError:
        pass
    else:
        assert False, "Managed to convert an invalid timestamp to datetime"
Example #4
0
def test_ts2dt():
    ''' args: ts, milli=False, tz_aware=False'''
    from metrique.utils import ts2dt

    # FIXME: millisecond precision, better?
    now_time = int(time())
    now_time_milli = int(time()) * 1000
    now_date = datetime.utcfromtimestamp(now_time)
    now_date_iso = now_date.isoformat()

    ' datetime already, return it back'
    assert ts2dt(now_date) == now_date

    ' tz_aware defaults to false '
    try:
        ' cant compare offset-naive and offset-aware datetimes '
        ts2dt(now_time, tz_aware=True) == now_date
    except TypeError:
        pass
    else:
        assert False, "Managed to compare offset-naive and offset-aware "\
            "datetimes"

    assert ts2dt(now_date, tz_aware=False) == now_date

    assert ts2dt(now_time_milli, milli=True, tz_aware=False) == now_date

    assert ts2dt(now_date_iso) == now_date
    try:
        ts2dt('not a valid datetime str') == now_date
    except TypeError:
        pass
    else:
        assert False, "Managed to convert an invalid timestamp to datetime"
Example #5
0
    def set_date_bounds(self, date):
        '''
        Pass in the date used in the original query.

        :param date: Date (date range) that was queried:
            date -> 'd', '~d', 'd~', 'd~d'
            d -> '%Y-%m-%d %H:%M:%S,%f', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d'
        '''
        if date is not None:
            split = date.split('~')
            if len(split) == 1:
                self._lbound = ts2dt(date)
                self._rbound = ts2dt(date)
            elif len(split) == 2:
                if split[0] != '':
                    self._lbound = ts2dt(split[0])
                if split[1] != '':
                    self._rbound = ts2dt(split[1])
            else:
                raise Exception('Date %s is not in the correct format' % date)
Example #6
0
    def set_date_bounds(self, date):
        '''
        Pass in the date used in the original query.

        :param date: Date (date range) that was queried:
            date -> 'd', '~d', 'd~', 'd~d'
            d -> '%Y-%m-%d %H:%M:%S,%f', '%Y-%m-%d %H:%M:%S', '%Y-%m-%d'
        '''
        if date is not None:
            split = date.split('~')
            if len(split) == 1:
                self._lbound = ts2dt(date)
                self._rbound = ts2dt(date)
            elif len(split) == 2:
                if split[0] != '':
                    self._lbound = ts2dt(split[0])
                if split[1] != '':
                    self._rbound = ts2dt(split[1])
            else:
                raise Exception('Date %s is not in the correct format' % date)
Example #7
0
 def _fetch_mtime(self, last_update=None):
     if not last_update:
         last_update = self.container.proxy.get_delta_ts() or \
             self.container.get_last_field(field='_start')
     # We need the timezone, to readjust relative to the server's tz
     mtime = ts2dt(last_update, tz_aware=True)
     mtime = mtime.strftime('%Y-%m-%d %H:%M:%S %z') if mtime else mtime
     logger.debug("Last update mtime: %s" % mtime)
     if mtime:
         if self.lconfig.get('parse_timestamp', True):
             dt_format = "yyyy-MM-dd HH:mm:ss z"
             mtime = "parseTimestamp('%s', '%s')" % (mtime, dt_format)
         else:
             mtime = "'%s'" % mtime
     return mtime
Example #8
0
 def _fetch_mtime(self, last_update=None):
     if not last_update:
         last_update = self.container.proxy.get_delta_ts() or \
             self.container.get_last_field(field='_start')
     # We need the timezone, to readjust relative to the server's tz
     mtime = ts2dt(last_update, tz_aware=True)
     mtime = mtime.strftime('%Y-%m-%d %H:%M:%S %z') if mtime else mtime
     logger.debug("Last update mtime: %s" % mtime)
     if mtime:
         if self.lconfig.get('parse_timestamp', True):
             dt_format = "yyyy-MM-dd HH:mm:ss z"
             mtime = "parseTimestamp('%s', '%s')" % (mtime, dt_format)
         else:
             mtime = "'%s'" % mtime
     return mtime
def test_api():
    from metrique import MetriqueContainer, metrique_object
    from metrique.utils import utcnow, remove_file, dt2ts, ts2dt

    _start = ts2dt('2001-01-01')
    _end = ts2dt('2001-01-02')
    a = {'_oid': 1, 'col_1': 1, 'col_2': utcnow(), '_start': _start}
    b = {'_oid': 2, 'col_1': 2, 'col_2': utcnow(), '_start': _start}
    ma = metrique_object(**a)
    mb = metrique_object(**b)
    objs_list = [a, b]
    r_objs_dict = {u'1': ma, u'2': mb}

    c = MetriqueContainer()
    assert not c.name
    assert not c._proxy

    MetriqueContainer()

    # check various forms of passing in objects results in expected
    # container contents

    assert c == {}
    assert MetriqueContainer(objects=c) == {}
    assert MetriqueContainer(objects=objs_list) == r_objs_dict
    mc = MetriqueContainer(objects=objs_list)
    assert MetriqueContainer(objects=mc) == r_objs_dict

    # setting version should result in all objects added having that version
    # note: version -> _v in metrique_object
    assert mc.version == 0
    assert mc['1']['_v'] == 0
    mc = MetriqueContainer(objects=objs_list, version=3)
    assert mc.version == 3
    assert mc['1']['_v'] == 3

    # setting converts key to _id of value after being passed
    # through metrique_object(); notice key int(5) -> str('5')
    mc[5] = {'_oid': 5}
    assert mc['5']['_oid'] == 5
    # also note, that it doesn't actually matter what key we use
    # to set the object... since we always set based on value's
    # auto-generated _id value, anyway
    mc[42] = {'_oid': 5}
    assert mc['5']['_oid'] == 5

    # should have 3 objects, first two, plus the last one
    assert len(mc) == 3
    assert len(mc.values()) == 3
    assert sorted(mc._ids) == ['1', '2', '5']

    assert sorted(mc._oids) == [1, 2, 5]
    try:
        mc.ls()
    except NotImplementedError:
        pass
    else:
        assert False

    mc.extend([{'_oid': 6}, {'_oid': 7}])
    assert sorted(mc._oids) == [1, 2, 5, 6, 7]

    mc.add({'_oid': 8, '_start': _start, '_end': _end, 'col_1': True})
    mc.add({'_oid': 8, '_end': None, 'col_1': False})
    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]

    r = mc.filter(where={'_oid': 8})
    assert len(r) == 2
    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]

    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]
    mc.pop('7')
    assert sorted(mc._oids) == [1, 2, 5, 6, 8]
    mc.pop(6)
    assert sorted(mc._oids) == [1, 2, 5, 8]
    del mc[5]
    assert sorted(mc._oids) == [1, 2, 8]

    assert '1' in mc

    mc.clear()
    assert mc == {}

    db = 'admin'
    name = 'container_test'
    c = MetriqueContainer(name=name, db=db)

    _expected_db_path = os.path.join(cache_dir, 'admin.sqlite')
    # test drop
    c.drop(True)
    assert c.proxy._sqlite_path == _expected_db_path
    # make sure we're working with a clean db
    remove_file(_expected_db_path)

    mc = MetriqueContainer(name=name, db=db, objects=objs_list)
    assert mc.df() is not None
    assert mc.df().empty is False

    # local persistence; filter method queries .objects buffer
    # .upsert dumps data to proxy db; but leaves the data in the buffer
    # .flush dumps data and removes all objects dumped
    # count queries proxy db
    mc = MetriqueContainer(name=name, db=db, objects=objs_list)
    _store = deepcopy(mc.store)

    assert len(mc.filter({'col_1': 1})) == 1
    _ids = mc.upsert()
    assert _ids == ['1', '2']
    assert mc.store == _store
    assert len(mc.filter({'col_1': 1})) == 1
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # persisting again shouldn't result in new rows
    _ids = mc.upsert()
    assert _ids == ['1', '2']
    assert mc.store == _store
    assert len(mc.filter({'col_1': 1})) == 1
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # flushing now shouldn't result in new rows; but store should be empty
    _ids = mc.flush()
    assert _ids == ['1', '2']
    assert mc.store == {}
    assert len(mc.filter({'col_1': 1})) == 0
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # adding the same object shouldn't result in new rows
    a.update({'col_1': 42})
    mc.add(a)
    assert len(mc.filter({'col_1': 1})) == 0
    assert len(mc.filter({'col_1': 42})) == 1
    _ids = mc.flush()
    assert mc.count(date='~') == 3
    assert mc.count(date=None) == 2
    assert mc.count('col_1 == 1', date=None) == 0
    assert mc.count('col_1 == 1', date='~') == 1
    assert mc.count('col_1 == 42') == 1
    assert mc.count('col_1 == 42', date='~') == 1
    # adjust for local time...
    #_ts = dt2ts(convert(_start))
    _ts = dt2ts(_start)
    assert _ids == ['1', '1:%s' % _ts]

    # remove the db
    remove_file(_expected_db_path)
Example #10
0
def db_tester(proxy):
    from metrique.utils import ts2dt
    from metrique import metrique_object as O

    _start = ts2dt("2001-01-01 00:00:00")
    _start_plus = ts2dt("2001-01-01 00:00:01")
    _end = ts2dt("2001-01-02 00:00:00")
    _before = ts2dt("2000-12-31 00:00:00")
    _after = ts2dt("2001-01-03 00:00:00")
    _date = ts2dt("2014-01-01 00:00:00")
    TABLE = 'bla'
    p = proxy

    # Clear out ALL tables in the database!
    p.drop(True)

    assert p.ls() == []

    # must pass _oid as kwarg
    obj = {'col_1': 1, 'col_3': _date}
    try:
        O(**obj)
    except TypeError:
        pass
    else:
        assert False

    # _oid can't be null
    obj = {'_oid': None, 'col_1': 1, 'col_3': _date}
    try:
        O(**obj)
    except ValueError:
        pass
    else:
        assert False

    _obj_1 = {'_oid': 1, 'col_1': 1, 'col_3': _date}
    obj_1 = [O(**_obj_1)]

    schema = {
        '_oid': {'type': int},
        'col_1': {'type': int},
        'col_3': {'type': datetime},
    }

    autoschema = p.autoschema(obj_1)
    assert dict(autoschema) == dict(schema)

    table = p.autotable(name=TABLE, schema=schema, create=True)
    assert table is not None

    assert p.count() == 0

    expected_fields = ['__v__', '_e', '_end', '_hash', '_id',
                       '_start', '_v', 'id']

    _exp = expected_fields + _obj_1.keys()
    assert sorted(p.columns()) == sorted(_exp)

    print 'Inserting %s' % obj_1
    p.insert(obj_1)

    assert p.count() == 1
    assert p.find('_oid == 1', raw=True, date=None)
    # should be one object with col_1 == 1 (_oids: 1)
    assert p.count('col_1 == 1', date='~') == 1

    _obj_2 = {'_oid': 2, 'col_1': 1, 'col_3': _date,
              '_start': _start, '_end': _end}
    obj_2 = [O(**_obj_2)]
    print 'Inserting %s' % obj_2
    p.insert(obj_2)
    assert p.count('_oid == 2') == 0
    assert p.count('_oid == 2', date=None) == 0
    assert p.count('_oid == 2', date='%s~' % _start) == 1
    # ~DATE does NOT include objects existing on DATE, but only UP TO/BEFORE
    assert p.count('_oid == 2', date='~%s' % _start) == 0
    assert p.count('_oid == 2', date='~%s' % _start_plus) == 1
    assert p.count('_oid == 2', date='~') == 1
    assert p.count('_oid == 2', date='~%s' % _before) == 0
    assert p.count('_oid == 2', date='%s~' % _after) == 0
    # should be two objects with col_1 == 1 (_oids: 1, 2)
    assert p.count('col_1 == 1', date='~') == 2

    assert p.distinct('_oid') == [1, 2]

    # insert new obj, then update col_3's values
    # note, working with the obj individually, but passing as a sigleton list
    # to insert(), etc
    _obj_3 = {'_oid': 3, 'col_1': 1, 'col_3': _date,
              '_start': _start, '_end': None}
    obj_3 = O(**_obj_3)
    print 'Inserting %s' % obj_3
    p.insert([obj_3])
    assert p.count('_oid == 3', date='~') == 1

    obj_3['col_1'] = 42
    print '... Update 1: %s' % obj_3
    obj_3 = O(**obj_3)
    p.upsert([obj_3])

    # should be two versions of _oid:3
    assert p.count('_oid == 3', date='~') == 2
    # should be three objects with col_1 == 1 (_oids: 1, 2, 3)
    assert p.count('col_1 == 1', date='~') == 3
    assert p.count('col_1 == 42', date='~') == 1

    # should be four object versions in total at this point
    assert p.count(date='~') == 4

    # last _oid should be 3
    assert p.get_last_field('_oid') == 3
    try:
        p.insert([obj_3])
    except Exception:
        pass
    else:
        assert False, "shouldn't be able to insert same object twice"

    _obj_4 = {'_oid': -1}
    obj_4 = O(**_obj_4)
    print '... Update 2: %s' % obj_4
    p.insert([obj_4])
    # 3 should still be highest _oid
    assert p.get_last_field('_oid') == 3

    _obj_5 = {'_oid': 42}
    obj_5 = O(**_obj_5)
    p.insert([obj_5])
    # now, 42 should be highest
    assert p.get_last_field('_oid') == 42

    assert p.ls() == [TABLE]

    # Indexes
    ix = [i['name'] for i in p.index_list().get(TABLE)]
    assert 'ix_col_1' not in ix
    p.index('col_1')
    ix = [i['name'] for i in p.index_list().get(TABLE)]
    assert 'ix_bla_col_1' in ix
Example #11
0
    def get_objects(self, uri, pull=True, **kwargs):
        '''
        Walk through repo commits to generate a list of repo commit
        objects.

        Each object has the following properties:
            * repo uri
            * general commit info
            * files added, removed fnames
            * lines added, removed
            * acked_by
            * signed_off_by
            * resolves
            * related
        '''
        self.repo = repo = git_clone(uri, pull=pull, reflect=True)
        # get a full list of all commit SHAs in the repo (all branches)
        cmd = 'git rev-list --all'
        output = sys_call(cmd, cwd=repo.path)
        repo_shas = set(x.strip() for x in output.split('\n') if x)
        logger.debug("Total Commits: %s" % len(repo_shas))

        cmd = 'git --no-pager log --all --format=sha:%H --numstat'
        output = sys_call(cmd)
        all_logs = re.sub('\n+', '\n', output)
        c_logs = [x for x in [s.strip() for s in all_logs.split('sha:')] if x]

        _end = None  # once was true, always is true...
        objs = []
        for c_log in c_logs:
            sha, s, all_changes = c_log.partition('\n')
            #try:
            c = repo.get_object(sha)
            # FIXME: not normalizing to UTC
            _start = ts2dt(c.commit_time)
            #except Exception as e:
            #    _start = now
            #    obj = dict(_oid=sha, _start=_start, _end=_end,
            #               repo_uri=uri, _e={sha: to_encoding(e)})
            #    self.objects.add(obj)
            #    continue

            # and some basic stuff...
            obj = dict(_oid=sha,
                       _start=_start,
                       _end=_end,
                       repo_uri=uri,
                       tree=c.tree,
                       parents=c.parents,
                       author=c.author,
                       committer=c.committer,
                       author_time=c.author_time,
                       message=c.message,
                       mergetag=c.mergetag,
                       extra=c.extra)

            for _file in all_changes.split('\n'):
                _file = _file.strip()
                obj.setdefault('files', {})
                if not _file:
                    added, removed, fname = 0, 0, None
                else:
                    added, removed, fname = _file.split('\t')
                    added = 0 if added == '-' else int(added)
                    removed = 0 if removed == '-' else int(removed)
                    # FIXME: sql doesn't nest well..
                    changes = {'added': added, 'removed': removed}
                    obj['files'][fname] = changes

            # file +/- totals
            obj['added'] = sum(
                [v.get('added', 0) for v in obj['files'].itervalues()])
            obj['removed'] = sum(
                [v.get('removed', 0) for v in obj['files'].itervalues()])

            # extract interesting bits from the message
            obj['acked_by'] = acked_by_re.findall(c.message)
            obj['signed_off_by'] = signed_off_by_re.findall(c.message)
            obj['resolves'] = resolves_re.findall(c.message)
            obj['related'] = related_re.findall(c.message)
            objs.append(obj)
        self.objects.extend(objs)

        return super(Commit, self).get_objects(**kwargs)
Example #12
0
    def get_objects(self, uri, pull=True, **kwargs):
        """
        Walk through repo commits to generate a list of repo commit
        objects.

        Each object has the following properties:
            * repo uri
            * general commit info
            * files added, removed fnames
            * lines added, removed
            * acked_by
            * signed_off_by
            * resolves
            * related
        """
        self.repo = repo = git_clone(uri, pull=pull, reflect=True)
        # get a full list of all commit SHAs in the repo (all branches)
        cmd = "git rev-list --all"
        output = sys_call(cmd, cwd=repo.path)
        repo_shas = set(x.strip() for x in output.split("\n") if x)
        logger.debug("Total Commits: %s" % len(repo_shas))

        cmd = "git --no-pager log --all --format=sha:%H --numstat"
        output = sys_call(cmd)
        all_logs = re.sub("\n+", "\n", output)
        c_logs = [x for x in [s.strip() for s in all_logs.split("sha:")] if x]

        _end = None  # once was true, always is true...
        objs = []
        for c_log in c_logs:
            sha, s, all_changes = c_log.partition("\n")
            # try:
            c = repo.get_object(sha)
            # FIXME: not normalizing to UTC
            _start = ts2dt(c.commit_time)
            # except Exception as e:
            #    _start = now
            #    obj = dict(_oid=sha, _start=_start, _end=_end,
            #               repo_uri=uri, _e={sha: to_encoding(e)})
            #    self.objects.add(obj)
            #    continue

            # and some basic stuff...
            obj = dict(
                _oid=sha,
                _start=_start,
                _end=_end,
                repo_uri=uri,
                tree=c.tree,
                parents=c.parents,
                author=c.author,
                committer=c.committer,
                author_time=c.author_time,
                message=c.message,
                mergetag=c.mergetag,
                extra=c.extra,
            )

            for _file in all_changes.split("\n"):
                _file = _file.strip()
                obj.setdefault("files", {})
                if not _file:
                    added, removed, fname = 0, 0, None
                else:
                    added, removed, fname = _file.split("\t")
                    added = 0 if added == "-" else int(added)
                    removed = 0 if removed == "-" else int(removed)
                    # FIXME: sql doesn't nest well..
                    changes = {"added": added, "removed": removed}
                    obj["files"][fname] = changes

            # file +/- totals
            obj["added"] = sum([v.get("added", 0) for v in obj["files"].itervalues()])
            obj["removed"] = sum([v.get("removed", 0) for v in obj["files"].itervalues()])

            # extract interesting bits from the message
            obj["acked_by"] = acked_by_re.findall(c.message)
            obj["signed_off_by"] = signed_off_by_re.findall(c.message)
            obj["resolves"] = resolves_re.findall(c.message)
            obj["related"] = related_re.findall(c.message)
            objs.append(obj)
        self.objects.extend(objs)

        return super(Commit, self).get_objects(**kwargs)
def test_api():
    from metrique import MetriqueContainer, metrique_object
    from metrique.utils import utcnow, remove_file, dt2ts, ts2dt

    _start = ts2dt('2001-01-01')
    _end = ts2dt('2001-01-02')
    a = {'_oid': 1, 'col_1': 1, 'col_2': utcnow(), '_start': _start}
    b = {'_oid': 2, 'col_1': 2, 'col_2': utcnow(), '_start': _start}
    ma = metrique_object(**a)
    mb = metrique_object(**b)
    objs_list = [a, b]
    r_objs_dict = {u'1': ma, u'2': mb}

    c = MetriqueContainer()
    assert not c.name
    assert not c._proxy

    MetriqueContainer()

    # check various forms of passing in objects results in expected
    # container contents

    assert c == {}
    assert MetriqueContainer(objects=c) == {}
    assert MetriqueContainer(objects=objs_list) == r_objs_dict
    mc = MetriqueContainer(objects=objs_list)
    assert MetriqueContainer(objects=mc) == r_objs_dict

    # setting version should result in all objects added having that version
    # note: version -> _v in metrique_object
    assert mc.version == 0
    assert mc['1']['_v'] == 0
    mc = MetriqueContainer(objects=objs_list, version=3)
    assert mc.version == 3
    assert mc['1']['_v'] == 3

    # setting converts key to _id of value after being passed
    # through metrique_object(); notice key int(5) -> str('5')
    mc[5] = {'_oid': 5}
    assert mc['5']['_oid'] == 5
    # also note, that it doesn't actually matter what key we use
    # to set the object... since we always set based on value's
    # auto-generated _id value, anyway
    mc[42] = {'_oid': 5}
    assert mc['5']['_oid'] == 5

    # should have 3 objects, first two, plus the last one
    assert len(mc) == 3
    assert len(mc.values()) == 3
    assert sorted(mc._ids) == ['1', '2', '5']

    assert sorted(mc._oids) == [1, 2, 5]
    try:
        mc.ls()
    except NotImplementedError:
        pass
    else:
        assert False

    mc.extend([{'_oid': 6}, {'_oid': 7}])
    assert sorted(mc._oids) == [1, 2, 5, 6, 7]

    mc.add({'_oid': 8, '_start': _start, '_end': _end, 'col_1': True})
    mc.add({'_oid': 8, '_end': None, 'col_1': False})
    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]

    r = mc.filter(where={'_oid': 8})
    assert len(r) == 2
    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]

    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]
    mc.pop('7')
    assert sorted(mc._oids) == [1, 2, 5, 6, 8]
    mc.pop(6)
    assert sorted(mc._oids) == [1, 2, 5, 8]
    del mc[5]
    assert sorted(mc._oids) == [1, 2, 8]

    assert '1' in mc

    mc.clear()
    assert mc == {}

    db = 'admin'
    name = 'container_test'
    c = MetriqueContainer(name=name, db=db)

    _expected_db_path = os.path.join(cache_dir, 'admin.sqlite')
    # test drop
    c.drop(True)
    assert c.proxy._sqlite_path == _expected_db_path
    # make sure we're working with a clean db
    remove_file(_expected_db_path)

    mc = MetriqueContainer(name=name, db=db, objects=objs_list)
    assert mc.df() is not None
    assert mc.df().empty is False

    # local persistence; filter method queries .objects buffer
    # .upsert dumps data to proxy db; but leaves the data in the buffer
    # .flush dumps data and removes all objects dumped
    # count queries proxy db
    mc = MetriqueContainer(name=name, db=db, objects=objs_list)
    _store = deepcopy(mc.store)

    assert len(mc.filter({'col_1': 1})) == 1
    _ids = mc.upsert()
    assert _ids == ['1', '2']
    assert mc.store == _store
    assert len(mc.filter({'col_1': 1})) == 1
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # persisting again shouldn't result in new rows
    _ids = mc.upsert()
    assert _ids == ['1', '2']
    assert mc.store == _store
    assert len(mc.filter({'col_1': 1})) == 1
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # flushing now shouldn't result in new rows; but store should be empty
    _ids = mc.flush()
    assert _ids == ['1', '2']
    assert mc.store == {}
    assert len(mc.filter({'col_1': 1})) == 0
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # adding the same object shouldn't result in new rows
    a.update({'col_1': 42})
    mc.add(a)
    assert len(mc.filter({'col_1': 1})) == 0
    assert len(mc.filter({'col_1': 42})) == 1
    _ids = mc.flush()
    assert mc.count(date='~') == 3
    assert mc.count(date=None) == 2
    assert mc.count('col_1 == 1', date=None) == 0
    assert mc.count('col_1 == 1', date='~') == 1
    assert mc.count('col_1 == 42') == 1
    assert mc.count('col_1 == 42', date='~') == 1
    # adjust for local time...
    #_ts = dt2ts(convert(_start))
    _ts = dt2ts(_start)
    assert _ids == ['1', '1:%s' % _ts]

    # remove the db
    remove_file(_expected_db_path)
Example #14
0
def db_tester(proxy):
    from metrique.utils import ts2dt
    from metrique import metrique_object as O

    _start = ts2dt("2001-01-01 00:00:00")
    _start_plus = ts2dt("2001-01-01 00:00:01")
    _end = ts2dt("2001-01-02 00:00:00")
    _before = ts2dt("2000-12-31 00:00:00")
    _after = ts2dt("2001-01-03 00:00:00")
    _date = ts2dt("2014-01-01 00:00:00")
    TABLE = 'bla'
    p = proxy

    # Clear out ALL tables in the database!
    p.drop(True)

    assert p.ls() == []

    # must pass _oid as kwarg
    obj = {'col_1': 1, 'col_3': _date}
    try:
        O(**obj)
    except TypeError:
        pass
    else:
        assert False

    # _oid can't be null
    obj = {'_oid': None, 'col_1': 1, 'col_3': _date}
    try:
        O(**obj)
    except ValueError:
        pass
    else:
        assert False

    _obj_1 = {'_oid': 1, 'col_1': 1, 'col_3': _date}
    obj_1 = [O(**_obj_1)]

    schema = {
        '_oid': {
            'type': int
        },
        'col_1': {
            'type': int
        },
        'col_3': {
            'type': datetime
        },
    }

    autoschema = p.autoschema(obj_1)
    assert dict(autoschema) == dict(schema)

    table = p.autotable(name=TABLE, schema=schema, create=True)
    assert table is not None

    assert p.count() == 0

    expected_fields = [
        '__v__', '_e', '_end', '_hash', '_id', '_start', '_v', 'id'
    ]

    _exp = expected_fields + _obj_1.keys()
    assert sorted(p.columns()) == sorted(_exp)

    print 'Inserting %s' % obj_1
    p.insert(obj_1)

    assert p.count() == 1
    assert p.find('_oid == 1', raw=True, date=None)
    # should be one object with col_1 == 1 (_oids: 1)
    assert p.count('col_1 == 1', date='~') == 1

    _obj_2 = {
        '_oid': 2,
        'col_1': 1,
        'col_3': _date,
        '_start': _start,
        '_end': _end
    }
    obj_2 = [O(**_obj_2)]
    print 'Inserting %s' % obj_2
    p.insert(obj_2)
    assert p.count('_oid == 2') == 0
    assert p.count('_oid == 2', date=None) == 0
    assert p.count('_oid == 2', date='%s~' % _start) == 1
    # ~DATE does NOT include objects existing on DATE, but only UP TO/BEFORE
    assert p.count('_oid == 2', date='~%s' % _start) == 0
    assert p.count('_oid == 2', date='~%s' % _start_plus) == 1
    assert p.count('_oid == 2', date='~') == 1
    assert p.count('_oid == 2', date='~%s' % _before) == 0
    assert p.count('_oid == 2', date='%s~' % _after) == 0
    # should be two objects with col_1 == 1 (_oids: 1, 2)
    assert p.count('col_1 == 1', date='~') == 2

    assert p.distinct('_oid') == [1, 2]

    # insert new obj, then update col_3's values
    # note, working with the obj individually, but passing as a sigleton list
    # to insert(), etc
    _obj_3 = {
        '_oid': 3,
        'col_1': 1,
        'col_3': _date,
        '_start': _start,
        '_end': None
    }
    obj_3 = O(**_obj_3)
    print 'Inserting %s' % obj_3
    p.insert([obj_3])
    assert p.count('_oid == 3', date='~') == 1

    obj_3['col_1'] = 42
    print '... Update 1: %s' % obj_3
    obj_3 = O(**obj_3)
    p.upsert([obj_3])

    # should be two versions of _oid:3
    assert p.count('_oid == 3', date='~') == 2
    # should be three objects with col_1 == 1 (_oids: 1, 2, 3)
    assert p.count('col_1 == 1', date='~') == 3
    assert p.count('col_1 == 42', date='~') == 1

    # should be four object versions in total at this point
    assert p.count(date='~') == 4

    # last _oid should be 3
    assert p.get_last_field('_oid') == 3
    try:
        p.insert([obj_3])
    except Exception:
        pass
    else:
        assert False, "shouldn't be able to insert same object twice"

    _obj_4 = {'_oid': -1}
    obj_4 = O(**_obj_4)
    print '... Update 2: %s' % obj_4
    p.insert([obj_4])
    # 3 should still be highest _oid
    assert p.get_last_field('_oid') == 3

    _obj_5 = {'_oid': 42}
    obj_5 = O(**_obj_5)
    p.insert([obj_5])
    # now, 42 should be highest
    assert p.get_last_field('_oid') == 42

    assert p.ls() == [TABLE]

    # Indexes
    ix = [i['name'] for i in p.index_list().get(TABLE)]
    assert 'ix_col_1' not in ix
    p.index('col_1')
    ix = [i['name'] for i in p.index_list().get(TABLE)]
    assert 'ix_bla_col_1' in ix