コード例 #1
0
ファイル: test_utils.py プロジェクト: young8/metrique
def test_get_timezone_converter():
    ' args: from_timezone '
    ' convert is always TO utc '
    from metrique.utils import utcnow, get_timezone_converter

    # note: caching timezones always takes a few seconds
    est = 'US/Eastern'
    EST = pytz.timezone(est)

    now_utc_tz = utcnow(tz_aware=True, as_datetime=True)
    now_utc = now_utc_tz.replace(tzinfo=None)

    now_est = copy(now_utc_tz)
    now_est_tz = now_est.astimezone(EST)
    now_est = now_est_tz.replace(tzinfo=None)

    assert get_timezone_converter(None) is None

    c = get_timezone_converter(est)
    assert c(None) is None
    assert c(now_est) == now_utc
    assert c(now_est_tz) == now_utc
    assert c(now_est_tz) == c(now_est)

    c = get_timezone_converter(est, tz_aware=True)
    assert c(now_est) == now_utc_tz
    assert c(now_est_tz) == c(now_est)
    assert c(now_est_tz) == now_utc_tz
コード例 #2
0
ファイル: test_utils.py プロジェクト: kejbaly2/metrique
def test_get_timezone_converter():
    ' args: from_timezone '
    ' convert is always TO utc '
    from metrique.utils import utcnow, get_timezone_converter

    # note: caching timezones always takes a few seconds
    est = 'US/Eastern'
    EST = pytz.timezone(est)

    now_utc_tz = utcnow(tz_aware=True, as_datetime=True)
    now_utc = now_utc_tz.replace(tzinfo=None)

    now_est = copy(now_utc_tz)
    now_est_tz = now_est.astimezone(EST)
    now_est = now_est_tz.replace(tzinfo=None)

    assert get_timezone_converter(None) is None

    c = get_timezone_converter(est)
    assert c(None) is None
    assert c(now_est) == now_utc
    assert c(now_est_tz) == now_utc
    assert c(now_est_tz) == c(now_est)

    c = get_timezone_converter(est, tz_aware=True)
    assert c(now_est) == now_utc_tz
    assert c(now_est_tz) == c(now_est)
    assert c(now_est_tz) == now_utc_tz
コード例 #3
0
ファイル: plotting.py プロジェクト: kejbaly2/metrique
def timestamp_figure(figure, stamp=True):
    # drop seconds:
    t = str(utcnow(as_datetime=True)).split('.')[0][:-3]
    if isinstance(stamp, basestring):
        t = '%s %s' % (stamp, t)
    figure.text(0.95, 0.05, t, fontsize=12, color='gray',
                ha='right', va='bottom', alpha=0.5)
コード例 #4
0
def test_datatypes():
    from metrique import MetriqueContainer
    from metrique.utils import utcnow, remove_file

    o = {
        "_oid": 1,
        "date": utcnow(),
        "dict_null": {},
        "dict": {
            'hello': 'world'
        },
        "bool": True,
        "null": None,
        "list_null": [],
        "list": [1, 2, 3]
    }
    db = 'admin'
    table = 'test'
    c = MetriqueContainer(name=table, db=db)

    c.drop()
    remove_file(c._proxy._sqlite_path)

    c.add(o)
    c.upsert()

    c.drop()
    remove_file(c._proxy._sqlite_path)
コード例 #5
0
ファイル: generic.py プロジェクト: kejbaly2/metrique
    def _run_object_import(self, force, last_update, flush, full_history):
        workers = self.lconfig.get('workers')
        # if we're using multiple workers, break the oids
        # according to worker batchsize, then each worker will
        # break the batch into smaller sql batch size batches
        # otherwise, single threaded, use sql batch size
        w_batch_size = self.lconfig.get('worker_batch_size')
        s_batch_size = self.lconfig.get('batch_size')

        # store the time right before the ETL job starts,
        # so next run, we can catch delta changes b/w
        # next ETL start and previous (this)
        new_delta_ts = utcnow()
        # get list of oids which we plan to update
        oids, save_delta_ts = self._delta_force(force, last_update)

        msg = 'Getting Full History' if full_history else \
            'Getting Objects - Current Values'
        if HAS_JOBLIB and workers > 1:
            logger.debug('%s (%s@%s)' % (msg, workers, w_batch_size))
            runner = Parallel(n_jobs=workers)
            func = delayed(get_objects)
            result = runner(func(
                cube=self._cube, oids=batch,
                full_history=full_history, flush=flush,
                cube_name=self.name, config=self.config,
                config_file=self.config_file,
                config_key=self.config_key,
                container=type(self.objects),
                container_config=self.container_config,
                proxy=type(self.proxy),
                proxy_config=self.proxy_config)
                for batch in batch_gen(oids, w_batch_size))
            # merge list of lists (batched) into single list
            result = [i for l in result for i in l]
            if not flush:
                self.objects.extend(result)
        else:
            logger.debug('%s (%s@%s)' % (msg, workers, w_batch_size))
            result = []
            _s = 0
            for i, batch in enumerate(batch_gen(oids, s_batch_size)):
                _e = _s + s_batch_size
                logger.debug('batch %s: %s-%s of %s' % (i, _s, _e, len(oids)))
                if full_history:
                    _ = self._activity_get_objects(oids=batch, flush=flush)
                else:
                    _ = self._get_objects(oids=batch, flush=flush)
                result.extend(_)
                _s = _e

        # save new delta_ts:
        if flush and save_delta_ts:
            self.container.proxy.update_delta_ts(new_delta_ts)

        if flush:
            return result
        else:
            return self
コード例 #6
0
ファイル: test_utils.py プロジェクト: young8/metrique
def test_utcnow():
    ' args: as_datetime=False, tz_aware=False '
    from metrique.utils import utcnow

    # default behaivor is as_datetime == False, which return epoch/float
    assert isinstance(utcnow(), float)

    now_date = datetime.utcnow().replace(microsecond=0)
    now_date_utc = datetime.now(pytz.utc).replace(microsecond=0)
    now_time = int(calendar.timegm(now_date.utctimetuple()))

    # FIXME: millisecond resolution?
    assert utcnow(as_datetime=False, drop_micro=True) == now_time
    assert utcnow(as_datetime=True, drop_micro=True) == now_date
    _ = utcnow(as_datetime=True, tz_aware=True, drop_micro=True)
    assert _ == now_date_utc
    assert utcnow(as_datetime=False, tz_aware=True,
                  drop_micro=True) == now_time
コード例 #7
0
ファイル: test_utils.py プロジェクト: kejbaly2/metrique
def test_utcnow():
    ' args: as_datetime=False, tz_aware=False '
    from metrique.utils import utcnow

    # default behaivor is as_datetime == False, which return epoch/float
    assert isinstance(utcnow(), float)

    now_date = datetime.utcnow().replace(microsecond=0)
    now_date_utc = datetime.now(pytz.utc).replace(microsecond=0)
    now_time = int(calendar.timegm(now_date.utctimetuple()))

    # FIXME: millisecond resolution?
    assert utcnow(as_datetime=False, drop_micro=True) == now_time
    assert utcnow(as_datetime=True, drop_micro=True) == now_date
    _ = utcnow(as_datetime=True, tz_aware=True, drop_micro=True)
    assert _ == now_date_utc
    assert utcnow(as_datetime=False,
                  tz_aware=True, drop_micro=True) == now_time
コード例 #8
0
ファイル: core_api.py プロジェクト: young8/metrique
def metrique_object(_oid,
                    _id=None,
                    _hash=None,
                    _start=None,
                    _end=None,
                    _e=None,
                    _v=None,
                    id=None,
                    __v__=None,
                    **kwargs):
    '''
    Function which takes a dictionary (Mapping) object as input
    and returns return back a metrique object.

    Special meta property are added to each object::
        _oid: ...
        _start: ...
        ...
        FIXME
    '''
    # NOTE: we completely ignore incoming 'id' keys!
    # id is RESERVED and ALWAYS expected to be 'autoincrement'
    # upon insertion into DB (though, its optional, depending
    # on backend storage behaivor).
    if id:
        warnings.warn('non-null "id" keys detected, ignoring them!')

    _e = dict(_e or {})  # expecting a dict with copy() atr
    _v = int(_v or 0)

    if not isinstance(_start, float):
        _start = dt2ts(_start) if _start else utcnow(as_datetime=False)
    assert _start is not None, "_start (%s) must be set!" % _start

    if not isinstance(_end, float):
        _end = dt2ts(_end) if _end else None

    _err_msg = "_end(%s) must be >= _start(%s) or None!" % (_end, _start)
    assert _end is None or bool(_end >= _start), _err_msg

    # these meta fields are used to generate unique object _hash
    kwargs['_oid'] = _oid
    kwargs['_v'] = _v
    kwargs['_id'] = gen_id(_oid, _start, _end)  # ignore passed in _id
    # generate unique, consistent object _hash based on 'frozen' obj contents
    # FIXME: make _hash == None valid
    #kwargs['_hash'] = jsonhash(kwargs) if _hash else None
    kwargs['_hash'] = jsonhash(kwargs)

    # add some additional non-hashable meta data
    kwargs['_start'] = _start
    kwargs['_end'] = _end
    kwargs['__v__'] = __v__ or __version__
    kwargs['_e'] = _e
    return kwargs
コード例 #9
0
    def get_objects(self,
                    uri,
                    _oid=None,
                    _start=None,
                    _end=None,
                    load_kwargs=None,
                    **kwargs):
        '''
        Load and transform csv data into a list of dictionaries.

        Each row in the csv will result in one dictionary in the list.

        :param uri: uri (file://, http(s)://) of csv file to load
        :param _oid:
            column or func to apply to map _oid in all resulting objects
        :param _start:
            column or func to apply to map _start in all resulting objects
        :param _end:
            column or func to apply to map _end in all resulting objects
        :param kwargs: kwargs to pass to pandas.read_csv method

        _start and _oid arguments can be a column name or a function
        which accepts a single argument -- the row being extracted.

        If either is a column name (string) then that column will be applied
        as _oid for each object generated.

        If either is a function, the function will be applied per each row
        and the result of the function will be assigned to the _start
        or _oid, respectively.
        '''
        load_kwargs = load_kwargs or {}
        objects = load(path=uri, filetype='csv', **load_kwargs)

        k = itertools.count(1)
        now = utcnow()
        __oid = lambda o: k.next()

        _oid = _oid or __oid
        _start = _start or now
        _end = _end or None

        def is_callable(v):
            _v = type(v)
            _ = True if _v is type or hasattr(v, '__call__') else False
            return _

        for obj in objects:
            obj['_oid'] = _oid(obj) if is_callable(_oid) else _oid
            obj['_start'] = _start(obj) if is_callable(_start) else _start
            obj['_end'] = _end(obj) if is_callable(_end) else _end
            self.container.add(obj)

        return super(Rows, self).get_objects(**kwargs)
コード例 #10
0
def timestamp_figure(figure, stamp=True):
    # drop seconds:
    t = str(utcnow(as_datetime=True)).split('.')[0][:-3]
    if isinstance(stamp, basestring):
        t = '%s %s' % (stamp, t)
    figure.text(0.95,
                0.05,
                t,
                fontsize=12,
                color='gray',
                ha='right',
                va='bottom',
                alpha=0.5)
コード例 #11
0
ファイル: test_result.py プロジェクト: kejbaly2/metrique
def test_init():
    from metrique.result import Result
    from metrique.utils import utcnow

    try:
        data = [{'a': 1, 'b': 2}]
        Result(data)
    except ValueError:
        pass
    else:
        assert False, "_start and _end must be defined..."

    data = [{'_start': utcnow(), '_end': None, '_oid': 1, 'b': 2}]
    Result(data)
コード例 #12
0
ファイル: test_result.py プロジェクト: kejbaly2/metrique
def test_init():
    from metrique.result import Result
    from metrique.utils import utcnow

    try:
        data = [{'a': 1, 'b': 2}]
        Result(data)
    except ValueError:
        pass
    else:
        assert False, "_start and _end must be defined..."

    data = [{'_start': utcnow(), '_end': None, '_oid': 1, 'b': 2}]
    Result(data)
コード例 #13
0
ファイル: rows.py プロジェクト: kejbaly2/metrique
    def get_objects(self, uri, _oid=None, _start=None, _end=None,
                    load_kwargs=None, **kwargs):
        '''
        Load and transform csv data into a list of dictionaries.

        Each row in the csv will result in one dictionary in the list.

        :param uri: uri (file://, http(s)://) of csv file to load
        :param _oid:
            column or func to apply to map _oid in all resulting objects
        :param _start:
            column or func to apply to map _start in all resulting objects
        :param _end:
            column or func to apply to map _end in all resulting objects
        :param kwargs: kwargs to pass to pandas.read_csv method

        _start and _oid arguments can be a column name or a function
        which accepts a single argument -- the row being extracted.

        If either is a column name (string) then that column will be applied
        as _oid for each object generated.

        If either is a function, the function will be applied per each row
        and the result of the function will be assigned to the _start
        or _oid, respectively.
        '''
        load_kwargs = load_kwargs or {}
        objects = load(path=uri, filetype='csv', **load_kwargs)

        k = itertools.count(1)
        now = utcnow()
        __oid = lambda o: k.next()

        _oid = _oid or __oid
        _start = _start or now
        _end = _end or None

        def is_callable(v):
            _v = type(v)
            _ = True if _v is type or hasattr(v, '__call__') else False
            return _

        for obj in objects:
            obj['_oid'] = _oid(obj) if is_callable(_oid) else _oid
            obj['_start'] = _start(obj) if is_callable(_start) else _start
            obj['_end'] = _end(obj) if is_callable(_end) else _end
            self.container.add(obj)

        return super(Rows, self).get_objects(**kwargs)
コード例 #14
0
ファイル: core_api.py プロジェクト: kejbaly2/metrique
def metrique_object(_oid, _id=None, _hash=None, _start=None, _end=None,
                    _e=None, _v=None, id=None, __v__=None, **kwargs):
    '''
    Function which takes a dictionary (Mapping) object as input
    and returns return back a metrique object.

    Special meta property are added to each object::
        _oid: ...
        _start: ...
        ...
        FIXME
    '''
    # NOTE: we completely ignore incoming 'id' keys!
    # id is RESERVED and ALWAYS expected to be 'autoincrement'
    # upon insertion into DB (though, its optional, depending
    # on backend storage behaivor).
    if id:
        warnings.warn('non-null "id" keys detected, ignoring them!')

    _e = dict(_e or {})  # expecting a dict with copy() atr
    _v = int(_v or 0)

    if not isinstance(_start, float):
        _start = dt2ts(_start) if _start else utcnow(as_datetime=False)
    assert _start is not None, "_start (%s) must be set!" % _start

    if not isinstance(_end, float):
        _end = dt2ts(_end) if _end else None

    _err_msg = "_end(%s) must be >= _start(%s) or None!" % (_end, _start)
    assert _end is None or bool(_end >= _start), _err_msg

    # these meta fields are used to generate unique object _hash
    kwargs['_oid'] = _oid
    kwargs['_v'] = _v
    kwargs['_id'] = gen_id(_oid, _start, _end)  # ignore passed in _id
    # generate unique, consistent object _hash based on 'frozen' obj contents
    # FIXME: make _hash == None valid
    #kwargs['_hash'] = jsonhash(kwargs) if _hash else None
    kwargs['_hash'] = jsonhash(kwargs)

    # add some additional non-hashable meta data
    kwargs['_start'] = _start
    kwargs['_end'] = _end
    kwargs['__v__'] = __v__ or __version__
    kwargs['_e'] = _e
    return kwargs
コード例 #15
0
ファイル: test_utils.py プロジェクト: kejbaly2/metrique
def test__get_datetime():
    from metrique.utils import _get_datetime, utcnow, dt2ts

    now_tz = utcnow(tz_aware=True, as_datetime=True)
    now = now_tz.replace(tzinfo=None)
    try:
        now_tz == now  # can't compare tz_aware <> naive
    except TypeError:
        pass
    else:
        assert False
    # default is tz_aware=False
    assert _get_datetime(now_tz) == now
    assert _get_datetime(now) == now
    assert _get_datetime(now_tz, tz_aware=True) == now_tz
    assert _get_datetime(now, tz_aware=True) == now_tz
    assert _get_datetime(dt2ts(now), tz_aware=True) == now_tz
コード例 #16
0
ファイル: test_utils.py プロジェクト: young8/metrique
def test__get_datetime():
    from metrique.utils import _get_datetime, utcnow, dt2ts

    now_tz = utcnow(tz_aware=True, as_datetime=True)
    now = now_tz.replace(tzinfo=None)
    try:
        now_tz == now  # can't compare tz_aware <> naive
    except TypeError:
        pass
    else:
        assert False
    # default is tz_aware=False
    assert _get_datetime(now_tz) == now
    assert _get_datetime(now) == now
    assert _get_datetime(now_tz, tz_aware=True) == now_tz
    assert _get_datetime(now, tz_aware=True) == now_tz
    assert _get_datetime(dt2ts(now), tz_aware=True) == now_tz
コード例 #17
0
ファイル: rpm.py プロジェクト: kejbaly2/metrique
    def get_objects(self, **kwargs):
        '''
        Run `rpm -q` command on a {local, remote} system to get back
        details of installed RPMs.

        Default rpm details extracted are as follows:
            * name
            * version
            * release
            * arch
            * nvra
            * license
            * os
            * packager
            * platform
            * sourcepackage
            * sourcerpm
            * summary
        '''
        fmt = ':::'.join('%%{%s}' % f for f in self._fields)
        if self.ssh_host:
            output = self._ssh_cmd(fmt)
        else:
            output = self._local_cmd(fmt)
        if isinstance(output, basestring):
            output = unicode(output, 'utf-8')
            output = output.strip().split('\n')
        lines = [l.strip().split(':::') for l in output]
        now = utcnow()
        host = self.ssh_host or socket.gethostname()
        for line in lines:
            obj = {'host': host, '_start': now}
            for i, item in enumerate(line):
                if item == '(none)':
                    item = None
                obj[self._fields[i]] = item
            obj['_oid'] = '%s__%s' % (host, obj['nvra'])
            self.objects.add(obj)
        return super(Rpm, self).get_objects(**kwargs)
コード例 #18
0
    def get_objects(self, **kwargs):
        '''
        Run `rpm -q` command on a {local, remote} system to get back
        details of installed RPMs.

        Default rpm details extracted are as follows:
            * name
            * version
            * release
            * arch
            * nvra
            * license
            * os
            * packager
            * platform
            * sourcepackage
            * sourcerpm
            * summary
        '''
        fmt = ':::'.join('%%{%s}' % f for f in self._fields)
        if self.ssh_host:
            output = self._ssh_cmd(fmt)
        else:
            output = self._local_cmd(fmt)
        if isinstance(output, basestring):
            output = unicode(output, 'utf-8')
            output = output.strip().split('\n')
        lines = [l.strip().split(':::') for l in output]
        now = utcnow()
        host = self.ssh_host or socket.gethostname()
        for line in lines:
            obj = {'host': host, '_start': now}
            for i, item in enumerate(line):
                if item == '(none)':
                    item = None
                obj[self._fields[i]] = item
            obj['_oid'] = '%s__%s' % (host, obj['nvra'])
            self.objects.add(obj)
        return super(Rpm, self).get_objects(**kwargs)
コード例 #19
0
def test_datatypes():
    from metrique import MetriqueContainer
    from metrique.utils import utcnow, remove_file

    o = {"_oid": 1,
         "date": utcnow(),
         "dict_null": {},
         "dict": {'hello': 'world'},
         "bool": True,
         "null": None,
         "list_null": [],
         "list": [1, 2, 3]}
    db = 'admin'
    table = 'test'
    c = MetriqueContainer(name=table, db=db)

    c.drop()
    remove_file(c._proxy._sqlite_path)

    c.add(o)
    c.upsert()

    c.drop()
    remove_file(c._proxy._sqlite_path)
コード例 #20
0
def test_api():
    from metrique import MetriqueContainer, metrique_object
    from metrique.utils import utcnow, remove_file, dt2ts, ts2dt

    _start = ts2dt('2001-01-01')
    _end = ts2dt('2001-01-02')
    a = {'_oid': 1, 'col_1': 1, 'col_2': utcnow(), '_start': _start}
    b = {'_oid': 2, 'col_1': 2, 'col_2': utcnow(), '_start': _start}
    ma = metrique_object(**a)
    mb = metrique_object(**b)
    objs_list = [a, b]
    r_objs_dict = {u'1': ma, u'2': mb}

    c = MetriqueContainer()
    assert not c.name
    assert not c._proxy

    MetriqueContainer()

    # check various forms of passing in objects results in expected
    # container contents

    assert c == {}
    assert MetriqueContainer(objects=c) == {}
    assert MetriqueContainer(objects=objs_list) == r_objs_dict
    mc = MetriqueContainer(objects=objs_list)
    assert MetriqueContainer(objects=mc) == r_objs_dict

    # setting version should result in all objects added having that version
    # note: version -> _v in metrique_object
    assert mc.version == 0
    assert mc['1']['_v'] == 0
    mc = MetriqueContainer(objects=objs_list, version=3)
    assert mc.version == 3
    assert mc['1']['_v'] == 3

    # setting converts key to _id of value after being passed
    # through metrique_object(); notice key int(5) -> str('5')
    mc[5] = {'_oid': 5}
    assert mc['5']['_oid'] == 5
    # also note, that it doesn't actually matter what key we use
    # to set the object... since we always set based on value's
    # auto-generated _id value, anyway
    mc[42] = {'_oid': 5}
    assert mc['5']['_oid'] == 5

    # should have 3 objects, first two, plus the last one
    assert len(mc) == 3
    assert len(mc.values()) == 3
    assert sorted(mc._ids) == ['1', '2', '5']

    assert sorted(mc._oids) == [1, 2, 5]
    try:
        mc.ls()
    except NotImplementedError:
        pass
    else:
        assert False

    mc.extend([{'_oid': 6}, {'_oid': 7}])
    assert sorted(mc._oids) == [1, 2, 5, 6, 7]

    mc.add({'_oid': 8, '_start': _start, '_end': _end, 'col_1': True})
    mc.add({'_oid': 8, '_end': None, 'col_1': False})
    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]

    r = mc.filter(where={'_oid': 8})
    assert len(r) == 2
    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]

    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]
    mc.pop('7')
    assert sorted(mc._oids) == [1, 2, 5, 6, 8]
    mc.pop(6)
    assert sorted(mc._oids) == [1, 2, 5, 8]
    del mc[5]
    assert sorted(mc._oids) == [1, 2, 8]

    assert '1' in mc

    mc.clear()
    assert mc == {}

    db = 'admin'
    name = 'container_test'
    c = MetriqueContainer(name=name, db=db)

    _expected_db_path = os.path.join(cache_dir, 'admin.sqlite')
    # test drop
    c.drop(True)
    assert c.proxy._sqlite_path == _expected_db_path
    # make sure we're working with a clean db
    remove_file(_expected_db_path)

    mc = MetriqueContainer(name=name, db=db, objects=objs_list)
    assert mc.df() is not None
    assert mc.df().empty is False

    # local persistence; filter method queries .objects buffer
    # .upsert dumps data to proxy db; but leaves the data in the buffer
    # .flush dumps data and removes all objects dumped
    # count queries proxy db
    mc = MetriqueContainer(name=name, db=db, objects=objs_list)
    _store = deepcopy(mc.store)

    assert len(mc.filter({'col_1': 1})) == 1
    _ids = mc.upsert()
    assert _ids == ['1', '2']
    assert mc.store == _store
    assert len(mc.filter({'col_1': 1})) == 1
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # persisting again shouldn't result in new rows
    _ids = mc.upsert()
    assert _ids == ['1', '2']
    assert mc.store == _store
    assert len(mc.filter({'col_1': 1})) == 1
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # flushing now shouldn't result in new rows; but store should be empty
    _ids = mc.flush()
    assert _ids == ['1', '2']
    assert mc.store == {}
    assert len(mc.filter({'col_1': 1})) == 0
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # adding the same object shouldn't result in new rows
    a.update({'col_1': 42})
    mc.add(a)
    assert len(mc.filter({'col_1': 1})) == 0
    assert len(mc.filter({'col_1': 42})) == 1
    _ids = mc.flush()
    assert mc.count(date='~') == 3
    assert mc.count(date=None) == 2
    assert mc.count('col_1 == 1', date=None) == 0
    assert mc.count('col_1 == 1', date='~') == 1
    assert mc.count('col_1 == 42') == 1
    assert mc.count('col_1 == 42', date='~') == 1
    # adjust for local time...
    #_ts = dt2ts(convert(_start))
    _ts = dt2ts(_start)
    assert _ids == ['1', '1:%s' % _ts]

    # remove the db
    remove_file(_expected_db_path)
コード例 #21
0
def test_func():
    from metrique.core_api import metrique_object
    from metrique.utils import utcnow
    from metrique._version import __version__

    now = utcnow()
    a = {'col_1': 1, 'col_2': now}

    # _oid must be passed in (as arg or kwarg, doesn't matter)
    try:
        metrique_object()
    except TypeError:
        pass
    else:
        assert False

    # same here; _oid still not being passed in
    try:
        metrique_object(**a)
    except TypeError:
        pass
    else:
        assert False

    # _oid can't be null either
    a['_oid'] = None
    try:
        metrique_object(**a)
    except ValueError:
        pass
    else:
        assert False

    a['_oid'] = 1
    o = metrique_object(**a)
    assert o
    assert o['_start'] < utcnow()

    # all objects get the metrique version used to
    # build them applied
    assert o['__v__'] == __version__

    expected_keys = sorted(
        ['_hash', '_v', '__v__', '_e', '_oid', '_id',
         '_start', '_end', 'col_1', 'col_2'])

    assert sorted(o.keys()) == expected_keys

    # hash should be constant if values don't change
    _hash = o['_hash']
    assert _hash == metrique_object(**a).get('_hash')

    a['col_1'] = 2
    assert _hash != metrique_object(**a).get('_hash')
    a['col_1'] = 3
    # _hash should be different, since we have different col_1 value
    assert _hash != metrique_object(**a).get('_hash')

    # _id should be ignored if passed in; a unique _id will be generated
    # based on obj content (in this case, string of _oid
    a['_id'] = 'blabla'
    assert metrique_object(**a).get('_id') != 'blabla'
    assert metrique_object(**a).get('_id') == '1'

    a['_start'] = now
    a['_end'] = now
    o = metrique_object(**a)
    assert o['_start'] == o['_end']

    # _end must come on/after _start
    try:
        a['_end'] = now - 1
        a['_start'] = now
        o = metrique_object(**a)
    except AssertionError:
        pass
    else:
        assert False, '_end was able to be smaller than _start!'

    # _start, if null, will be set to utcnow(); _end if null, stays null
    a['_start'] = None
    a['_end'] = None

    assert metrique_object(**a).get('_start') is not None
    assert metrique_object(**a).get('_end') is None

    # dates (_start/_end) are epoch
    a['_end'] = int(utcnow() + 100)  # +100 to ensure _end >= _start
    o = metrique_object(**a)
    assert isinstance(o['_start'], float)
    assert isinstance(o['_end'], float)

    a['_end'] = None
    # check default object version is set to 0
    o = metrique_object(**a)
    o['_v'] = 0
コード例 #22
0
    def _run_object_import(self, force, last_update, flush, full_history):
        workers = self.lconfig.get('workers')
        # if we're using multiple workers, break the oids
        # according to worker batchsize, then each worker will
        # break the batch into smaller sql batch size batches
        # otherwise, single threaded, use sql batch size
        w_batch_size = self.lconfig.get('worker_batch_size')
        s_batch_size = self.lconfig.get('batch_size')

        # store the time right before the ETL job starts,
        # so next run, we can catch delta changes b/w
        # next ETL start and previous (this)
        new_delta_ts = utcnow()
        # get list of oids which we plan to update
        oids, save_delta_ts = self._delta_force(force, last_update)

        msg = 'Getting Full History' if full_history else \
            'Getting Objects - Current Values'
        if HAS_JOBLIB and workers > 1:
            logger.debug('%s (%s@%s)' % (msg, workers, w_batch_size))
            runner = Parallel(n_jobs=workers)
            func = delayed(get_objects)
            result = runner(
                func(cube=self._cube,
                     oids=batch,
                     full_history=full_history,
                     flush=flush,
                     cube_name=self.name,
                     config=self.config,
                     config_file=self.config_file,
                     config_key=self.config_key,
                     container=type(self.objects),
                     container_config=self.container_config,
                     proxy=type(self.proxy),
                     proxy_config=self.proxy_config)
                for batch in batch_gen(oids, w_batch_size))
            # merge list of lists (batched) into single list
            result = [i for l in result for i in l]
            if not flush:
                self.objects.extend(result)
        else:
            logger.debug('%s (%s@%s)' % (msg, workers, w_batch_size))
            result = []
            _s = 0
            for i, batch in enumerate(batch_gen(oids, s_batch_size)):
                _e = _s + s_batch_size
                logger.debug('batch %s: %s-%s of %s' % (i, _s, _e, len(oids)))
                if full_history:
                    _ = self._activity_get_objects(oids=batch, flush=flush)
                else:
                    _ = self._get_objects(oids=batch, flush=flush)
                result.extend(_)
                _s = _e

        # save new delta_ts:
        if flush and save_delta_ts:
            self.container.proxy.update_delta_ts(new_delta_ts)

        if flush:
            return result
        else:
            return self
コード例 #23
0
def test_api():
    from metrique import MetriqueContainer, metrique_object
    from metrique.utils import utcnow, remove_file, dt2ts, ts2dt

    _start = ts2dt('2001-01-01')
    _end = ts2dt('2001-01-02')
    a = {'_oid': 1, 'col_1': 1, 'col_2': utcnow(), '_start': _start}
    b = {'_oid': 2, 'col_1': 2, 'col_2': utcnow(), '_start': _start}
    ma = metrique_object(**a)
    mb = metrique_object(**b)
    objs_list = [a, b]
    r_objs_dict = {u'1': ma, u'2': mb}

    c = MetriqueContainer()
    assert not c.name
    assert not c._proxy

    MetriqueContainer()

    # check various forms of passing in objects results in expected
    # container contents

    assert c == {}
    assert MetriqueContainer(objects=c) == {}
    assert MetriqueContainer(objects=objs_list) == r_objs_dict
    mc = MetriqueContainer(objects=objs_list)
    assert MetriqueContainer(objects=mc) == r_objs_dict

    # setting version should result in all objects added having that version
    # note: version -> _v in metrique_object
    assert mc.version == 0
    assert mc['1']['_v'] == 0
    mc = MetriqueContainer(objects=objs_list, version=3)
    assert mc.version == 3
    assert mc['1']['_v'] == 3

    # setting converts key to _id of value after being passed
    # through metrique_object(); notice key int(5) -> str('5')
    mc[5] = {'_oid': 5}
    assert mc['5']['_oid'] == 5
    # also note, that it doesn't actually matter what key we use
    # to set the object... since we always set based on value's
    # auto-generated _id value, anyway
    mc[42] = {'_oid': 5}
    assert mc['5']['_oid'] == 5

    # should have 3 objects, first two, plus the last one
    assert len(mc) == 3
    assert len(mc.values()) == 3
    assert sorted(mc._ids) == ['1', '2', '5']

    assert sorted(mc._oids) == [1, 2, 5]
    try:
        mc.ls()
    except NotImplementedError:
        pass
    else:
        assert False

    mc.extend([{'_oid': 6}, {'_oid': 7}])
    assert sorted(mc._oids) == [1, 2, 5, 6, 7]

    mc.add({'_oid': 8, '_start': _start, '_end': _end, 'col_1': True})
    mc.add({'_oid': 8, '_end': None, 'col_1': False})
    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]

    r = mc.filter(where={'_oid': 8})
    assert len(r) == 2
    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]

    assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8]
    mc.pop('7')
    assert sorted(mc._oids) == [1, 2, 5, 6, 8]
    mc.pop(6)
    assert sorted(mc._oids) == [1, 2, 5, 8]
    del mc[5]
    assert sorted(mc._oids) == [1, 2, 8]

    assert '1' in mc

    mc.clear()
    assert mc == {}

    db = 'admin'
    name = 'container_test'
    c = MetriqueContainer(name=name, db=db)

    _expected_db_path = os.path.join(cache_dir, 'admin.sqlite')
    # test drop
    c.drop(True)
    assert c.proxy._sqlite_path == _expected_db_path
    # make sure we're working with a clean db
    remove_file(_expected_db_path)

    mc = MetriqueContainer(name=name, db=db, objects=objs_list)
    assert mc.df() is not None
    assert mc.df().empty is False

    # local persistence; filter method queries .objects buffer
    # .upsert dumps data to proxy db; but leaves the data in the buffer
    # .flush dumps data and removes all objects dumped
    # count queries proxy db
    mc = MetriqueContainer(name=name, db=db, objects=objs_list)
    _store = deepcopy(mc.store)

    assert len(mc.filter({'col_1': 1})) == 1
    _ids = mc.upsert()
    assert _ids == ['1', '2']
    assert mc.store == _store
    assert len(mc.filter({'col_1': 1})) == 1
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # persisting again shouldn't result in new rows
    _ids = mc.upsert()
    assert _ids == ['1', '2']
    assert mc.store == _store
    assert len(mc.filter({'col_1': 1})) == 1
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # flushing now shouldn't result in new rows; but store should be empty
    _ids = mc.flush()
    assert _ids == ['1', '2']
    assert mc.store == {}
    assert len(mc.filter({'col_1': 1})) == 0
    assert mc.count('col_1 == 1') == 1
    assert mc.count() == 2

    # adding the same object shouldn't result in new rows
    a.update({'col_1': 42})
    mc.add(a)
    assert len(mc.filter({'col_1': 1})) == 0
    assert len(mc.filter({'col_1': 42})) == 1
    _ids = mc.flush()
    assert mc.count(date='~') == 3
    assert mc.count(date=None) == 2
    assert mc.count('col_1 == 1', date=None) == 0
    assert mc.count('col_1 == 1', date='~') == 1
    assert mc.count('col_1 == 42') == 1
    assert mc.count('col_1 == 42', date='~') == 1
    # adjust for local time...
    #_ts = dt2ts(convert(_start))
    _ts = dt2ts(_start)
    assert _ids == ['1', '1:%s' % _ts]

    # remove the db
    remove_file(_expected_db_path)
コード例 #24
0
def test_func():
    from metrique.core_api import metrique_object
    from metrique.utils import utcnow
    from metrique._version import __version__

    now = utcnow()
    a = {'col_1': 1, 'col_2': now}

    # _oid must be passed in (as arg or kwarg, doesn't matter)
    try:
        metrique_object()
    except TypeError:
        pass
    else:
        assert False

    # same here; _oid still not being passed in
    try:
        metrique_object(**a)
    except TypeError:
        pass
    else:
        assert False

    # _oid can't be null either
    a['_oid'] = None
    try:
        metrique_object(**a)
    except ValueError:
        pass
    else:
        assert False

    a['_oid'] = 1
    o = metrique_object(**a)
    assert o
    assert o['_start'] < utcnow()

    # all objects get the metrique version used to
    # build them applied
    assert o['__v__'] == __version__

    expected_keys = sorted([
        '_hash', '_v', '__v__', '_e', '_oid', '_id', '_start', '_end', 'col_1',
        'col_2'
    ])

    assert sorted(o.keys()) == expected_keys

    # hash should be constant if values don't change
    _hash = o['_hash']
    assert _hash == metrique_object(**a).get('_hash')

    a['col_1'] = 2
    assert _hash != metrique_object(**a).get('_hash')
    a['col_1'] = 3
    # _hash should be different, since we have different col_1 value
    assert _hash != metrique_object(**a).get('_hash')

    # _id should be ignored if passed in; a unique _id will be generated
    # based on obj content (in this case, string of _oid
    a['_id'] = 'blabla'
    assert metrique_object(**a).get('_id') != 'blabla'
    assert metrique_object(**a).get('_id') == '1'

    a['_start'] = now
    a['_end'] = now
    o = metrique_object(**a)
    assert o['_start'] == o['_end']

    # _end must come on/after _start
    try:
        a['_end'] = now - 1
        a['_start'] = now
        o = metrique_object(**a)
    except AssertionError:
        pass
    else:
        assert False, '_end was able to be smaller than _start!'

    # _start, if null, will be set to utcnow(); _end if null, stays null
    a['_start'] = None
    a['_end'] = None

    assert metrique_object(**a).get('_start') is not None
    assert metrique_object(**a).get('_end') is None

    # dates (_start/_end) are epoch
    a['_end'] = int(utcnow() + 100)  # +100 to ensure _end >= _start
    o = metrique_object(**a)
    assert isinstance(o['_start'], float)
    assert isinstance(o['_end'], float)

    a['_end'] = None
    # check default object version is set to 0
    o = metrique_object(**a)
    o['_v'] = 0