def test_get_timezone_converter(): ' args: from_timezone ' ' convert is always TO utc ' from metrique.utils import utcnow, get_timezone_converter # note: caching timezones always takes a few seconds est = 'US/Eastern' EST = pytz.timezone(est) now_utc_tz = utcnow(tz_aware=True, as_datetime=True) now_utc = now_utc_tz.replace(tzinfo=None) now_est = copy(now_utc_tz) now_est_tz = now_est.astimezone(EST) now_est = now_est_tz.replace(tzinfo=None) assert get_timezone_converter(None) is None c = get_timezone_converter(est) assert c(None) is None assert c(now_est) == now_utc assert c(now_est_tz) == now_utc assert c(now_est_tz) == c(now_est) c = get_timezone_converter(est, tz_aware=True) assert c(now_est) == now_utc_tz assert c(now_est_tz) == c(now_est) assert c(now_est_tz) == now_utc_tz
def timestamp_figure(figure, stamp=True): # drop seconds: t = str(utcnow(as_datetime=True)).split('.')[0][:-3] if isinstance(stamp, basestring): t = '%s %s' % (stamp, t) figure.text(0.95, 0.05, t, fontsize=12, color='gray', ha='right', va='bottom', alpha=0.5)
def test_datatypes(): from metrique import MetriqueContainer from metrique.utils import utcnow, remove_file o = { "_oid": 1, "date": utcnow(), "dict_null": {}, "dict": { 'hello': 'world' }, "bool": True, "null": None, "list_null": [], "list": [1, 2, 3] } db = 'admin' table = 'test' c = MetriqueContainer(name=table, db=db) c.drop() remove_file(c._proxy._sqlite_path) c.add(o) c.upsert() c.drop() remove_file(c._proxy._sqlite_path)
def _run_object_import(self, force, last_update, flush, full_history): workers = self.lconfig.get('workers') # if we're using multiple workers, break the oids # according to worker batchsize, then each worker will # break the batch into smaller sql batch size batches # otherwise, single threaded, use sql batch size w_batch_size = self.lconfig.get('worker_batch_size') s_batch_size = self.lconfig.get('batch_size') # store the time right before the ETL job starts, # so next run, we can catch delta changes b/w # next ETL start and previous (this) new_delta_ts = utcnow() # get list of oids which we plan to update oids, save_delta_ts = self._delta_force(force, last_update) msg = 'Getting Full History' if full_history else \ 'Getting Objects - Current Values' if HAS_JOBLIB and workers > 1: logger.debug('%s (%s@%s)' % (msg, workers, w_batch_size)) runner = Parallel(n_jobs=workers) func = delayed(get_objects) result = runner(func( cube=self._cube, oids=batch, full_history=full_history, flush=flush, cube_name=self.name, config=self.config, config_file=self.config_file, config_key=self.config_key, container=type(self.objects), container_config=self.container_config, proxy=type(self.proxy), proxy_config=self.proxy_config) for batch in batch_gen(oids, w_batch_size)) # merge list of lists (batched) into single list result = [i for l in result for i in l] if not flush: self.objects.extend(result) else: logger.debug('%s (%s@%s)' % (msg, workers, w_batch_size)) result = [] _s = 0 for i, batch in enumerate(batch_gen(oids, s_batch_size)): _e = _s + s_batch_size logger.debug('batch %s: %s-%s of %s' % (i, _s, _e, len(oids))) if full_history: _ = self._activity_get_objects(oids=batch, flush=flush) else: _ = self._get_objects(oids=batch, flush=flush) result.extend(_) _s = _e # save new delta_ts: if flush and save_delta_ts: self.container.proxy.update_delta_ts(new_delta_ts) if flush: return result else: return self
def test_utcnow(): ' args: as_datetime=False, tz_aware=False ' from metrique.utils import utcnow # default behaivor is as_datetime == False, which return epoch/float assert isinstance(utcnow(), float) now_date = datetime.utcnow().replace(microsecond=0) now_date_utc = datetime.now(pytz.utc).replace(microsecond=0) now_time = int(calendar.timegm(now_date.utctimetuple())) # FIXME: millisecond resolution? assert utcnow(as_datetime=False, drop_micro=True) == now_time assert utcnow(as_datetime=True, drop_micro=True) == now_date _ = utcnow(as_datetime=True, tz_aware=True, drop_micro=True) assert _ == now_date_utc assert utcnow(as_datetime=False, tz_aware=True, drop_micro=True) == now_time
def metrique_object(_oid, _id=None, _hash=None, _start=None, _end=None, _e=None, _v=None, id=None, __v__=None, **kwargs): ''' Function which takes a dictionary (Mapping) object as input and returns return back a metrique object. Special meta property are added to each object:: _oid: ... _start: ... ... FIXME ''' # NOTE: we completely ignore incoming 'id' keys! # id is RESERVED and ALWAYS expected to be 'autoincrement' # upon insertion into DB (though, its optional, depending # on backend storage behaivor). if id: warnings.warn('non-null "id" keys detected, ignoring them!') _e = dict(_e or {}) # expecting a dict with copy() atr _v = int(_v or 0) if not isinstance(_start, float): _start = dt2ts(_start) if _start else utcnow(as_datetime=False) assert _start is not None, "_start (%s) must be set!" % _start if not isinstance(_end, float): _end = dt2ts(_end) if _end else None _err_msg = "_end(%s) must be >= _start(%s) or None!" % (_end, _start) assert _end is None or bool(_end >= _start), _err_msg # these meta fields are used to generate unique object _hash kwargs['_oid'] = _oid kwargs['_v'] = _v kwargs['_id'] = gen_id(_oid, _start, _end) # ignore passed in _id # generate unique, consistent object _hash based on 'frozen' obj contents # FIXME: make _hash == None valid #kwargs['_hash'] = jsonhash(kwargs) if _hash else None kwargs['_hash'] = jsonhash(kwargs) # add some additional non-hashable meta data kwargs['_start'] = _start kwargs['_end'] = _end kwargs['__v__'] = __v__ or __version__ kwargs['_e'] = _e return kwargs
def get_objects(self, uri, _oid=None, _start=None, _end=None, load_kwargs=None, **kwargs): ''' Load and transform csv data into a list of dictionaries. Each row in the csv will result in one dictionary in the list. :param uri: uri (file://, http(s)://) of csv file to load :param _oid: column or func to apply to map _oid in all resulting objects :param _start: column or func to apply to map _start in all resulting objects :param _end: column or func to apply to map _end in all resulting objects :param kwargs: kwargs to pass to pandas.read_csv method _start and _oid arguments can be a column name or a function which accepts a single argument -- the row being extracted. If either is a column name (string) then that column will be applied as _oid for each object generated. If either is a function, the function will be applied per each row and the result of the function will be assigned to the _start or _oid, respectively. ''' load_kwargs = load_kwargs or {} objects = load(path=uri, filetype='csv', **load_kwargs) k = itertools.count(1) now = utcnow() __oid = lambda o: k.next() _oid = _oid or __oid _start = _start or now _end = _end or None def is_callable(v): _v = type(v) _ = True if _v is type or hasattr(v, '__call__') else False return _ for obj in objects: obj['_oid'] = _oid(obj) if is_callable(_oid) else _oid obj['_start'] = _start(obj) if is_callable(_start) else _start obj['_end'] = _end(obj) if is_callable(_end) else _end self.container.add(obj) return super(Rows, self).get_objects(**kwargs)
def test_init(): from metrique.result import Result from metrique.utils import utcnow try: data = [{'a': 1, 'b': 2}] Result(data) except ValueError: pass else: assert False, "_start and _end must be defined..." data = [{'_start': utcnow(), '_end': None, '_oid': 1, 'b': 2}] Result(data)
def test__get_datetime(): from metrique.utils import _get_datetime, utcnow, dt2ts now_tz = utcnow(tz_aware=True, as_datetime=True) now = now_tz.replace(tzinfo=None) try: now_tz == now # can't compare tz_aware <> naive except TypeError: pass else: assert False # default is tz_aware=False assert _get_datetime(now_tz) == now assert _get_datetime(now) == now assert _get_datetime(now_tz, tz_aware=True) == now_tz assert _get_datetime(now, tz_aware=True) == now_tz assert _get_datetime(dt2ts(now), tz_aware=True) == now_tz
def get_objects(self, **kwargs): ''' Run `rpm -q` command on a {local, remote} system to get back details of installed RPMs. Default rpm details extracted are as follows: * name * version * release * arch * nvra * license * os * packager * platform * sourcepackage * sourcerpm * summary ''' fmt = ':::'.join('%%{%s}' % f for f in self._fields) if self.ssh_host: output = self._ssh_cmd(fmt) else: output = self._local_cmd(fmt) if isinstance(output, basestring): output = unicode(output, 'utf-8') output = output.strip().split('\n') lines = [l.strip().split(':::') for l in output] now = utcnow() host = self.ssh_host or socket.gethostname() for line in lines: obj = {'host': host, '_start': now} for i, item in enumerate(line): if item == '(none)': item = None obj[self._fields[i]] = item obj['_oid'] = '%s__%s' % (host, obj['nvra']) self.objects.add(obj) return super(Rpm, self).get_objects(**kwargs)
def test_datatypes(): from metrique import MetriqueContainer from metrique.utils import utcnow, remove_file o = {"_oid": 1, "date": utcnow(), "dict_null": {}, "dict": {'hello': 'world'}, "bool": True, "null": None, "list_null": [], "list": [1, 2, 3]} db = 'admin' table = 'test' c = MetriqueContainer(name=table, db=db) c.drop() remove_file(c._proxy._sqlite_path) c.add(o) c.upsert() c.drop() remove_file(c._proxy._sqlite_path)
def test_api(): from metrique import MetriqueContainer, metrique_object from metrique.utils import utcnow, remove_file, dt2ts, ts2dt _start = ts2dt('2001-01-01') _end = ts2dt('2001-01-02') a = {'_oid': 1, 'col_1': 1, 'col_2': utcnow(), '_start': _start} b = {'_oid': 2, 'col_1': 2, 'col_2': utcnow(), '_start': _start} ma = metrique_object(**a) mb = metrique_object(**b) objs_list = [a, b] r_objs_dict = {u'1': ma, u'2': mb} c = MetriqueContainer() assert not c.name assert not c._proxy MetriqueContainer() # check various forms of passing in objects results in expected # container contents assert c == {} assert MetriqueContainer(objects=c) == {} assert MetriqueContainer(objects=objs_list) == r_objs_dict mc = MetriqueContainer(objects=objs_list) assert MetriqueContainer(objects=mc) == r_objs_dict # setting version should result in all objects added having that version # note: version -> _v in metrique_object assert mc.version == 0 assert mc['1']['_v'] == 0 mc = MetriqueContainer(objects=objs_list, version=3) assert mc.version == 3 assert mc['1']['_v'] == 3 # setting converts key to _id of value after being passed # through metrique_object(); notice key int(5) -> str('5') mc[5] = {'_oid': 5} assert mc['5']['_oid'] == 5 # also note, that it doesn't actually matter what key we use # to set the object... since we always set based on value's # auto-generated _id value, anyway mc[42] = {'_oid': 5} assert mc['5']['_oid'] == 5 # should have 3 objects, first two, plus the last one assert len(mc) == 3 assert len(mc.values()) == 3 assert sorted(mc._ids) == ['1', '2', '5'] assert sorted(mc._oids) == [1, 2, 5] try: mc.ls() except NotImplementedError: pass else: assert False mc.extend([{'_oid': 6}, {'_oid': 7}]) assert sorted(mc._oids) == [1, 2, 5, 6, 7] mc.add({'_oid': 8, '_start': _start, '_end': _end, 'col_1': True}) mc.add({'_oid': 8, '_end': None, 'col_1': False}) assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] r = mc.filter(where={'_oid': 8}) assert len(r) == 2 assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] mc.pop('7') assert sorted(mc._oids) == [1, 2, 5, 6, 8] mc.pop(6) assert sorted(mc._oids) == [1, 2, 5, 8] del mc[5] assert sorted(mc._oids) == [1, 2, 8] assert '1' in mc mc.clear() assert mc == {} db = 'admin' name = 'container_test' c = MetriqueContainer(name=name, db=db) _expected_db_path = os.path.join(cache_dir, 'admin.sqlite') # test drop c.drop(True) assert c.proxy._sqlite_path == _expected_db_path # make sure we're working with a clean db remove_file(_expected_db_path) mc = MetriqueContainer(name=name, db=db, objects=objs_list) assert mc.df() is not None assert mc.df().empty is False # local persistence; filter method queries .objects buffer # .upsert dumps data to proxy db; but leaves the data in the buffer # .flush dumps data and removes all objects dumped # count queries proxy db mc = MetriqueContainer(name=name, db=db, objects=objs_list) _store = deepcopy(mc.store) assert len(mc.filter({'col_1': 1})) == 1 _ids = mc.upsert() assert _ids == ['1', '2'] assert mc.store == _store assert len(mc.filter({'col_1': 1})) == 1 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # persisting again shouldn't result in new rows _ids = mc.upsert() assert _ids == ['1', '2'] assert mc.store == _store assert len(mc.filter({'col_1': 1})) == 1 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # flushing now shouldn't result in new rows; but store should be empty _ids = mc.flush() assert _ids == ['1', '2'] assert mc.store == {} assert len(mc.filter({'col_1': 1})) == 0 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # adding the same object shouldn't result in new rows a.update({'col_1': 42}) mc.add(a) assert len(mc.filter({'col_1': 1})) == 0 assert len(mc.filter({'col_1': 42})) == 1 _ids = mc.flush() assert mc.count(date='~') == 3 assert mc.count(date=None) == 2 assert mc.count('col_1 == 1', date=None) == 0 assert mc.count('col_1 == 1', date='~') == 1 assert mc.count('col_1 == 42') == 1 assert mc.count('col_1 == 42', date='~') == 1 # adjust for local time... #_ts = dt2ts(convert(_start)) _ts = dt2ts(_start) assert _ids == ['1', '1:%s' % _ts] # remove the db remove_file(_expected_db_path)
def test_func(): from metrique.core_api import metrique_object from metrique.utils import utcnow from metrique._version import __version__ now = utcnow() a = {'col_1': 1, 'col_2': now} # _oid must be passed in (as arg or kwarg, doesn't matter) try: metrique_object() except TypeError: pass else: assert False # same here; _oid still not being passed in try: metrique_object(**a) except TypeError: pass else: assert False # _oid can't be null either a['_oid'] = None try: metrique_object(**a) except ValueError: pass else: assert False a['_oid'] = 1 o = metrique_object(**a) assert o assert o['_start'] < utcnow() # all objects get the metrique version used to # build them applied assert o['__v__'] == __version__ expected_keys = sorted( ['_hash', '_v', '__v__', '_e', '_oid', '_id', '_start', '_end', 'col_1', 'col_2']) assert sorted(o.keys()) == expected_keys # hash should be constant if values don't change _hash = o['_hash'] assert _hash == metrique_object(**a).get('_hash') a['col_1'] = 2 assert _hash != metrique_object(**a).get('_hash') a['col_1'] = 3 # _hash should be different, since we have different col_1 value assert _hash != metrique_object(**a).get('_hash') # _id should be ignored if passed in; a unique _id will be generated # based on obj content (in this case, string of _oid a['_id'] = 'blabla' assert metrique_object(**a).get('_id') != 'blabla' assert metrique_object(**a).get('_id') == '1' a['_start'] = now a['_end'] = now o = metrique_object(**a) assert o['_start'] == o['_end'] # _end must come on/after _start try: a['_end'] = now - 1 a['_start'] = now o = metrique_object(**a) except AssertionError: pass else: assert False, '_end was able to be smaller than _start!' # _start, if null, will be set to utcnow(); _end if null, stays null a['_start'] = None a['_end'] = None assert metrique_object(**a).get('_start') is not None assert metrique_object(**a).get('_end') is None # dates (_start/_end) are epoch a['_end'] = int(utcnow() + 100) # +100 to ensure _end >= _start o = metrique_object(**a) assert isinstance(o['_start'], float) assert isinstance(o['_end'], float) a['_end'] = None # check default object version is set to 0 o = metrique_object(**a) o['_v'] = 0
def _run_object_import(self, force, last_update, flush, full_history): workers = self.lconfig.get('workers') # if we're using multiple workers, break the oids # according to worker batchsize, then each worker will # break the batch into smaller sql batch size batches # otherwise, single threaded, use sql batch size w_batch_size = self.lconfig.get('worker_batch_size') s_batch_size = self.lconfig.get('batch_size') # store the time right before the ETL job starts, # so next run, we can catch delta changes b/w # next ETL start and previous (this) new_delta_ts = utcnow() # get list of oids which we plan to update oids, save_delta_ts = self._delta_force(force, last_update) msg = 'Getting Full History' if full_history else \ 'Getting Objects - Current Values' if HAS_JOBLIB and workers > 1: logger.debug('%s (%s@%s)' % (msg, workers, w_batch_size)) runner = Parallel(n_jobs=workers) func = delayed(get_objects) result = runner( func(cube=self._cube, oids=batch, full_history=full_history, flush=flush, cube_name=self.name, config=self.config, config_file=self.config_file, config_key=self.config_key, container=type(self.objects), container_config=self.container_config, proxy=type(self.proxy), proxy_config=self.proxy_config) for batch in batch_gen(oids, w_batch_size)) # merge list of lists (batched) into single list result = [i for l in result for i in l] if not flush: self.objects.extend(result) else: logger.debug('%s (%s@%s)' % (msg, workers, w_batch_size)) result = [] _s = 0 for i, batch in enumerate(batch_gen(oids, s_batch_size)): _e = _s + s_batch_size logger.debug('batch %s: %s-%s of %s' % (i, _s, _e, len(oids))) if full_history: _ = self._activity_get_objects(oids=batch, flush=flush) else: _ = self._get_objects(oids=batch, flush=flush) result.extend(_) _s = _e # save new delta_ts: if flush and save_delta_ts: self.container.proxy.update_delta_ts(new_delta_ts) if flush: return result else: return self
def test_func(): from metrique.core_api import metrique_object from metrique.utils import utcnow from metrique._version import __version__ now = utcnow() a = {'col_1': 1, 'col_2': now} # _oid must be passed in (as arg or kwarg, doesn't matter) try: metrique_object() except TypeError: pass else: assert False # same here; _oid still not being passed in try: metrique_object(**a) except TypeError: pass else: assert False # _oid can't be null either a['_oid'] = None try: metrique_object(**a) except ValueError: pass else: assert False a['_oid'] = 1 o = metrique_object(**a) assert o assert o['_start'] < utcnow() # all objects get the metrique version used to # build them applied assert o['__v__'] == __version__ expected_keys = sorted([ '_hash', '_v', '__v__', '_e', '_oid', '_id', '_start', '_end', 'col_1', 'col_2' ]) assert sorted(o.keys()) == expected_keys # hash should be constant if values don't change _hash = o['_hash'] assert _hash == metrique_object(**a).get('_hash') a['col_1'] = 2 assert _hash != metrique_object(**a).get('_hash') a['col_1'] = 3 # _hash should be different, since we have different col_1 value assert _hash != metrique_object(**a).get('_hash') # _id should be ignored if passed in; a unique _id will be generated # based on obj content (in this case, string of _oid a['_id'] = 'blabla' assert metrique_object(**a).get('_id') != 'blabla' assert metrique_object(**a).get('_id') == '1' a['_start'] = now a['_end'] = now o = metrique_object(**a) assert o['_start'] == o['_end'] # _end must come on/after _start try: a['_end'] = now - 1 a['_start'] = now o = metrique_object(**a) except AssertionError: pass else: assert False, '_end was able to be smaller than _start!' # _start, if null, will be set to utcnow(); _end if null, stays null a['_start'] = None a['_end'] = None assert metrique_object(**a).get('_start') is not None assert metrique_object(**a).get('_end') is None # dates (_start/_end) are epoch a['_end'] = int(utcnow() + 100) # +100 to ensure _end >= _start o = metrique_object(**a) assert isinstance(o['_start'], float) assert isinstance(o['_end'], float) a['_end'] = None # check default object version is set to 0 o = metrique_object(**a) o['_v'] = 0