def test_osinfo_rpm(): from metrique import pyclient from metrique.utils import sys_call if sys_call('which rpm', ignore_errors=True) is None: # skip these tests, since we don't have rpm installed return name = 'osinfo_rpm' db_file = os.path.join(cache_dir, '%s.sqlite' % name) remove_file(db_file) m = pyclient(cube=name) m.objects.drop() print 'Getting RPM objects; might take a few seconds.' m.get_objects() print ' ... done.' k = len(m.objects) assert k > 0 name = 'bash' _filtered = m.objects.filter(where={'name': name}) assert len(_filtered) == 1 print 'Object: %s' % _filtered _ids = m.objects.flush() assert len(_ids) == k remove_file(db_file)
def get_objects(cube, oids, full_history, flush=False, cube_name=None, config=None, config_file=None, config_key=None, container=None, container_config=None, proxy=None, proxy_config=None, **kwargs): # force a single worker to avoid 'nested' (invalid) joblib runs. kwargs['workers'] = 1 m = pyclient(cube=cube, name=cube_name, config=config, config_file=config_file, config_key=config_key, container=container, container_config=container_config, proxy=proxy, proxy_config=proxy_config, **kwargs) results = [] batch_size = m.lconfig.get('batch_size') for batch in batch_gen(oids, batch_size): if full_history: _ = m._activity_get_objects(oids=batch, flush=flush) else: _ = m._get_objects(oids=batch, flush=flush) results.extend(_) return results
def test_osinfo_rpm(): from metrique import pyclient from metrique.utils import sys_call if sys_call("which rpm", ignore_errors=True) is None: # skip these tests, since we don't have rpm installed return name = "osinfo_rpm" db_file = os.path.join(cache_dir, "%s.sqlite" % name) remove_file(db_file) m = pyclient(cube=name) m.objects.drop() print "Getting RPM objects; might take a few seconds." m.get_objects() print " ... done." k = len(m.objects) assert k > 0 name = "bash" _filtered = m.objects.filter(where={"name": name}) assert len(_filtered) == 1 print "Object: %s" % _filtered _ids = m.objects.flush() assert len(_ids) == k remove_file(db_file)
def test_user_api(): fingerprint = '894EE1CEEA61DC3D7D20327C4200AD1F2F22F46C' m = pyclient(name='test_user', gnupg_dir=GNUPG_DIR, gnupg_fingerprint=fingerprint, **config) assert m.config.gnupg_fingerprint == fingerprint assert m.user_register(username, password) # should except if trying to register again try: m.user_register(username, password) except: pass aboutme = m.aboutme() assert aboutme is not None assert m.config.gnupg_pubkey is not None pubkey = m.config.gnupg_pubkey gnupg = {'pubkey': pubkey, 'fingerprint': fingerprint} result = m.user_update_profile(gnupg=gnupg) assert result['previous'] == aboutme assert 'gnupg' in result['now'] assert result['now']['gnupg']['fingerprint'] == fingerprint assert result['now']['gnupg']['pubkey'] == pubkey assert m.user_remove()
def test_api(): m = pyclient(**config) m.user_remove(username, quiet=True) # to be sure it doesn't exist already assert m.user_register(username, password) cubes = ['csvcube_local', 'jsoncube_local'] for cube in cubes: _cube = m.get_cube(cube=cube, pkgs=pkgs, cube_paths=paths, init=True) _cube.cookiejar_clear() _cube.cube_drop(quiet=True) # to be sure it doesn't exist already... assert _cube.cube_register() result = _cube.extract() assert result is not None assert len(result) > 0 # we should get back some results df = _cube.find(fields='~', date='~') assert df is not None # default obj type returned should be metrique.result.Result assert isinstance(df, Result) # raw should return back a list of dicts raw = _cube.find(raw=True, fields='~', date='~') assert isinstance(raw, list) assert len(raw) > 0 assert isinstance(raw[0], dict) k = len(result) assert k == _cube.count(date='~') # a second extract of the same data should not result # new objects being saved result = _cube.extract() assert k == _cube.count(date='~') # rename cube name = _cube.name[:] new_name = 'renamed_%s' % name assert _cube.cube_rename(new_name=new_name) assert _cube.name == new_name ## count should remain the same in renamed cube assert k == _cube.count(date='~') assert _cube.cube_rename(new_name=name) # drop the cube assert _cube.cube_drop() assert _cube.cube_id not in _cube.cube_list_all() # with the last cube, do a few more things... # re-register _cube = m.get_cube(cube=cubes[0], pkgs=pkgs, cube_paths=paths, init=True) assert _cube.cube_register() name = '%s__%s' % (username, _cube.name) assert name in _cube.cube_list_all() # drop the cube assert _cube.cube_drop() assert name not in _cube.cube_list_all() # then drop the user assert _cube.user_remove()
def test_csvdata(): ''' ''' from metrique import pyclient name = 'us_idx_eod' db_file = os.path.join(cache_dir, '%s.sqlite' % name) remove_file(db_file) m = pyclient(cube='csvdata_rows', name=name) m.objects.drop() uri = os.path.join(fixtures, 'us-idx-eod.csv') m.get_objects(uri=uri, load_kwargs=dict(use_pandas=True)) assert m.objects assert len(m.objects) == 14 assert m.objects.fields == [ '__v__', '_e', '_end', '_hash', '_id', '_oid', '_start', '_v', 'close', 'date', 'open', 'symbol' ] # {u'symbol': u'$AJT', u'date': u'09/08/2008', u'close': 18.15, u'_start': # datetime.datetime(2014, 5, 28, 14, 9, 22, 999145), u'open': 17.84, # u'_oid': 11, u'_v': 0, u'_end': None, u'_hash': # u'76e81838bdde51f693f8a09a2308557a7962aa78', u'__v__': u'0.3.1-1a', # u'_e': {}, u'_id': u'11'} _ids = m.objects._ids _hash = '76e81838bdde51f693f8a09a2308557a7962aa78' _oid = 11 _filtered = m.objects.filter(where={'_oid': _oid}) print 'Object: %s' % _filtered assert len(_filtered) == 1 assert m.objects['11']['_hash'] == _hash # check _hash is as expected assert m.objects['11']['symbol'] == '$AJT' assert m.objects.upsert() == _ids # still there... assert m.objects['11']['symbol'] == '$AJT' # persist and remove from container assert m.objects.flush() == _ids assert m.objects == {} objs = m.objects.find('_oid == %s' % _oid, one=True, raw=True, fields='~') o = {k: v for k, v in objs.items() if k != 'id'} _o = dict(_filtered[0]) # we can't assure float precision is exact as it goes in/out # but it should be close... assert o['_start'] - _o['_start'] <= .1 # FIXME: ideally, _e would come back out as it went in! # not going in as {} but come out as None for k in ['_start', '_e']: del o[k] del _o[k] assert o == _o remove_file(db_file)
def test_gitdata_commit(): from metrique import pyclient from metrique.utils import remove_file name = 'gitdata_commit' db_file = os.path.join(cache_dir, '%s.sqlite' % name) remove_file(db_file) uri_1 = 'https://github.com/kejbaly2/tornadohttp.git' uri_2 = 'https://github.com/kejbaly2/metrique.git' m = pyclient(cube=name) m.objects.drop() m.get_objects(uri=uri_1) k = len(m.objects) assert k > 0 m.get_objects(uri=uri_1, pull=True) assert k == len(m.objects) # {u'files': {u'setup.py': {u'removed': 0, u'added': 3}, # u'tornadohttp/tornadohttp.py': {u'removed': 7, u'added': 10}, # u'tornadohttp/__init__.py': {u'removed': 0, u'added': 7}, # u'tornadohttp/_version.py': {u'removed': 0, u'added': 9}}, u'committer': # u'Chris Ward <*****@*****.**>', u'added': 29, u'extra': None, # u'author_time': 1396355424, u'related': None, u'repo_uri': # u'https://github.com/kejbaly2/tornadohttp.git', u'acked_by': None, # u'resolves': None, u'message': u'version bump; logdir and other configs # renamed\n', u'_start': datetime.datetime(2014, 4, 1, 12, 30, 24), # u'_oid': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705', u'removed': 7, # u'mergetag': None, u'author': u'Chris Ward <*****@*****.**>', u'_v': 0, # u'tree': u'66406ded27ba129ad1639928b079b821ab416fed', u'_end': None, # u'signed_off_by': None, u'parents': # ['78b311d90e35eb36016a7f41e75657754dbe0784'], u'_hash': # u'79a11c24ac814f001abcd27963de761ccb37a908', u'__v__': u'0.3.1-1a', # u'_e': {}, u'_id': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705'} _hash = '79a11c24ac814f001abcd27963de761ccb37a908' _oid = '99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705' _filtered = m.objects.filter(where={'_oid': _oid}) assert len(_filtered) == 1 print 'Object: %s' % _filtered assert _filtered[0]['_hash'] == _hash _ids = m.objects.flush() assert len(_ids) == k # load a second repo # make sure our sessions are working as expected and # a second call works as expected; eg, in the past # there was a bug where we didn't load the table into # metadata if the table wasn't being created for the # first time and so non-standard types weren't # defined in the session... m.get_objects(uri=uri_2, flush=True) remove_file(m.repo.path, force=True) remove_file(db_file)
def test_gitdata_commit(): from metrique import pyclient from metrique.utils import remove_file name = "gitdata_commit" db_file = os.path.join(cache_dir, "%s.sqlite" % name) remove_file(db_file) uri_1 = "https://github.com/kejbaly2/tornadohttp.git" uri_2 = "https://github.com/kejbaly2/metrique.git" m = pyclient(cube=name) m.objects.drop() m.get_objects(uri=uri_1) k = len(m.objects) assert k > 0 m.get_objects(uri=uri_1, pull=True) assert k == len(m.objects) # {u'files': {u'setup.py': {u'removed': 0, u'added': 3}, # u'tornadohttp/tornadohttp.py': {u'removed': 7, u'added': 10}, # u'tornadohttp/__init__.py': {u'removed': 0, u'added': 7}, # u'tornadohttp/_version.py': {u'removed': 0, u'added': 9}}, u'committer': # u'Chris Ward <*****@*****.**>', u'added': 29, u'extra': None, # u'author_time': 1396355424, u'related': None, u'repo_uri': # u'https://github.com/kejbaly2/tornadohttp.git', u'acked_by': None, # u'resolves': None, u'message': u'version bump; logdir and other configs # renamed\n', u'_start': datetime.datetime(2014, 4, 1, 12, 30, 24), # u'_oid': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705', u'removed': 7, # u'mergetag': None, u'author': u'Chris Ward <*****@*****.**>', u'_v': 0, # u'tree': u'66406ded27ba129ad1639928b079b821ab416fed', u'_end': None, # u'signed_off_by': None, u'parents': # ['78b311d90e35eb36016a7f41e75657754dbe0784'], u'_hash': # u'79a11c24ac814f001abcd27963de761ccb37a908', u'__v__': u'0.3.1-1a', # u'_e': {}, u'_id': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705'} _hash = "79a11c24ac814f001abcd27963de761ccb37a908" _oid = "99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705" _filtered = m.objects.filter(where={"_oid": _oid}) assert len(_filtered) == 1 print "Object: %s" % _filtered assert _filtered[0]["_hash"] == _hash _ids = m.objects.flush() assert len(_ids) == k # load a second repo # make sure our sessions are working as expected and # a second call works as expected; eg, in the past # there was a bug where we didn't load the table into # metadata if the table wasn't being created for the # first time and so non-standard types weren't # defined in the session... m.get_objects(uri=uri_2, flush=True) remove_file(m.repo.path, force=True) remove_file(db_file)
def test_load_json(): ''' ''' from metrique import pyclient from metrique.utils import load name = 'meps' db_file = os.path.join(cache_dir, '%s.sqlite' % name) remove_file(db_file) def _oid_func(o): o['_oid'] = o['id'] return o m = pyclient(name=name) m.objects.drop() path = os.path.join(fixtures, 'meps.json') objects = load(path, _oid=_oid_func, orient='index') assert len(objects) == 736 m.objects.extend(objects) assert len(m.objects) # {u'phone_stb': u'+33 (0)3 88 1 75224', u'comms': None, u'country': # u'Latvia', u'_start': ... # u'_oid': 28615, u'name': u'Roberts Z\u012aLE', u'url': # u'http://www.europarl.euro...rs/expert/committees/view.do?id=28615', # u'_v': 0, u'phone_bxl': u'+32 (0)2 28 45224', u'_end': None, u'_hash': # u'e8d2a6943734a80f268d112514040b4707915181', u'__v__': u'0.3.1-1a', # u'party': u'European Conservatives and Reformists', u'_e': {}, u'_id': # u'28615', u'email': None} _hash = 'e8d2a6943734a80f268d112514040b4707915181' _filtered = m.objects.filter(where={'_oid': 28615}) assert len(_filtered) == 1 print 'Object: %s' % _filtered assert _filtered[0]['_hash'] == _hash _ids = m.objects.flush() assert sorted(_ids) == sorted(map(unicode, [o['_oid'] for o in objects])) assert m.objects == {} remove_file(db_file)
def test_load_json(): """ """ from metrique import pyclient from metrique.utils import load name = "meps" db_file = os.path.join(cache_dir, "%s.sqlite" % name) remove_file(db_file) def _oid_func(o): o["_oid"] = o["id"] return o m = pyclient(name=name) m.objects.drop() path = os.path.join(fixtures, "meps.json") objects = load(path, _oid=_oid_func, orient="index") assert len(objects) == 736 m.objects.extend(objects) assert len(m.objects) # {u'phone_stb': u'+33 (0)3 88 1 75224', u'comms': None, u'country': # u'Latvia', u'_start': ... # u'_oid': 28615, u'name': u'Roberts Z\u012aLE', u'url': # u'http://www.europarl.euro...rs/expert/committees/view.do?id=28615', # u'_v': 0, u'phone_bxl': u'+32 (0)2 28 45224', u'_end': None, u'_hash': # u'e8d2a6943734a80f268d112514040b4707915181', u'__v__': u'0.3.1-1a', # u'party': u'European Conservatives and Reformists', u'_e': {}, u'_id': # u'28615', u'email': None} _hash = "e8d2a6943734a80f268d112514040b4707915181" _filtered = m.objects.filter(where={"_oid": 28615}) assert len(_filtered) == 1 print "Object: %s" % _filtered assert _filtered[0]["_hash"] == _hash _ids = m.objects.flush() assert sorted(_ids) == sorted(map(unicode, [o["_oid"] for o in objects])) assert m.objects == {} remove_file(db_file)
def get_objects(cube, oids, field_order, start, save=True, cube_name=None, autosnap=True, **kwargs): start = start m = pyclient(cube=cube, name=cube_name, **kwargs) return m._get_objects(oids=oids, field_order=field_order, start=start, save=save, autosnap=autosnap)
def get_full_history(cube, oids, save=True, cube_name=None, autosnap=False, **kwargs): m = pyclient(cube=cube, name=cube_name, **kwargs) return m._activity_get_objects(oids=oids, save=save, autosnap=autosnap)
def test_csvdata(): """ """ from metrique import pyclient name = "us_idx_eod" db_file = os.path.join(cache_dir, "%s.sqlite" % name) remove_file(db_file) m = pyclient(cube="csvdata_rows", name=name) m.objects.drop() uri = os.path.join(fixtures, "us-idx-eod.csv") m.get_objects(uri=uri, load_kwargs=dict(use_pandas=True)) assert m.objects assert len(m.objects) == 14 assert m.objects.fields == [ "__v__", "_e", "_end", "_hash", "_id", "_oid", "_start", "_v", "close", "date", "open", "symbol", ] # {u'symbol': u'$AJT', u'date': u'09/08/2008', u'close': 18.15, u'_start': # datetime.datetime(2014, 5, 28, 14, 9, 22, 999145), u'open': 17.84, # u'_oid': 11, u'_v': 0, u'_end': None, u'_hash': # u'76e81838bdde51f693f8a09a2308557a7962aa78', u'__v__': u'0.3.1-1a', # u'_e': {}, u'_id': u'11'} _ids = m.objects._ids _hash = "76e81838bdde51f693f8a09a2308557a7962aa78" _oid = 11 _filtered = m.objects.filter(where={"_oid": _oid}) print "Object: %s" % _filtered assert len(_filtered) == 1 assert m.objects["11"]["_hash"] == _hash # check _hash is as expected assert m.objects["11"]["symbol"] == "$AJT" assert m.objects.upsert() == _ids # still there... assert m.objects["11"]["symbol"] == "$AJT" # persist and remove from container assert m.objects.flush() == _ids assert m.objects == {} objs = m.objects.find("_oid == %s" % _oid, one=True, raw=True, fields="~") o = {k: v for k, v in objs.items() if k != "id"} _o = dict(_filtered[0]) # we can't assure float precision is exact as it goes in/out # but it should be close... assert o["_start"] - _o["_start"] <= 0.1 # FIXME: ideally, _e would come back out as it went in! # not going in as {} but come out as None for k in ["_start", "_e"]: del o[k] del _o[k] assert o == _o remove_file(db_file)
def test_admin(): m = pyclient(**config) m.user_remove(username, quiet=True) # to be sure it doesn't exist already assert m.user_register(username, password) m.user_remove(username, quiet=True) # to be sure it doesn't exist already