def test_write_file(): from metrique.utils import write_file, rand_chars, read_file from metrique.utils import remove_file f1 = os.path.join(cache_dir, rand_chars()) write_file(f1, 'hello world') assert exists(f1) assert read_file(f1) == 'hello world' # can't overwrite files with default settings try: write_file(f1, 'hello world', force=False, exists_ext=None) except RuntimeError: pass else: assert False, "File overwritten without force=True" write_file(f1, 'hello metrique', force=True) assert exists(f1) assert read_file(f1) == 'hello metrique' write_file(f1, 'hello world', mode='a') assert exists(f1) assert read_file(f1) == 'hello metriquehello world' # done remove the file; write it again, second time will write '.new' file # .new file added to new file on write is exists_ext not null write_file(f1, 'hello world', force=False, exists_ext='new') assert read_file(f1) assert read_file('%s.new' % f1) remove_file(f1)
def test_file_is_empty(): from metrique.utils import file_is_empty, write_file, rand_chars from metrique.utils import remove_file f1 = os.path.join(cache_dir, rand_chars(prefix='empty_test_1')) f2 = os.path.join(cache_dir, rand_chars(prefix='not_empty_test_2')) write_file(f1, '') write_file(f2, 'not empty') assert file_is_empty(f1) assert exists(f1) assert file_is_empty(f1, remove=True) assert not exists(f1) assert not file_is_empty(f2) try: # not a valid path file_is_empty('DOES_NOT_EXIST') except RuntimeError: pass else: assert False try: # not a valid path file_is_empty(True) except RuntimeError: pass else: assert False remove_file(f2) assert not exists(f2)
def test_osinfo_rpm(): from metrique import pyclient from metrique.utils import sys_call if sys_call('which rpm', ignore_errors=True) is None: # skip these tests, since we don't have rpm installed return name = 'osinfo_rpm' db_file = os.path.join(cache_dir, '%s.sqlite' % name) remove_file(db_file) m = pyclient(cube=name) m.objects.drop() print 'Getting RPM objects; might take a few seconds.' m.get_objects() print ' ... done.' k = len(m.objects) assert k > 0 name = 'bash' _filtered = m.objects.filter(where={'name': name}) assert len(_filtered) == 1 print 'Object: %s' % _filtered _ids = m.objects.flush() assert len(_ids) == k remove_file(db_file)
def test_get_pid(): from metrique.utils import get_pid, rand_chars, remove_file assert get_pid() == 0 assert get_pid(None) == 0 path = os.path.join(cache_dir, '%s.pid' % rand_chars(prefix='get_pid')) # invalid path returns 0 assert get_pid(path) == 0 with open(path, 'w') as f: f.write("1") assert exists(path) assert get_pid(path) == 1 remove_file(path) with open(path, 'w') as f: f.write("a") try: get_pid(path) except ValueError: pass else: assert False remove_file(path) # invalid path returns 0 assert get_pid("boomboompow") == 0
def test_sqlite3(): from metrique.utils import remove_file from metrique.sqlalchemy import SQLAlchemyProxy _expected_db_path = os.path.join(cache_dir, 'test.sqlite') remove_file(_expected_db_path) DB = 'test' TABLE = 'bla' p = SQLAlchemyProxy(db=DB, table=TABLE) p.initialize() assert p._engine_uri == 'sqlite:///%s' % _expected_db_path db_tester(p) p.drop() try: assert p.count() == 0 except RuntimeError: pass else: assert False assert p.ls() == [] remove_file(_expected_db_path)
def cython(args=None, cmd=''): cmd = getattr(args, 'command', cmd) if cmd == 'compile': utils.sys_call('./setup.py build_ext --inplace') elif cmd == 'clean': # FIXME: add *.c too? utils.remove_file('metrique/*.so')
def test_datatypes(): from metrique import MetriqueContainer from metrique.utils import utcnow, remove_file o = { "_oid": 1, "date": utcnow(), "dict_null": {}, "dict": { 'hello': 'world' }, "bool": True, "null": None, "list_null": [], "list": [1, 2, 3] } db = 'admin' table = 'test' c = MetriqueContainer(name=table, db=db) c.drop() remove_file(c._proxy._sqlite_path) c.add(o) c.upsert() c.drop() remove_file(c._proxy._sqlite_path)
def test_osinfo_rpm(): from metrique import pyclient from metrique.utils import sys_call if sys_call("which rpm", ignore_errors=True) is None: # skip these tests, since we don't have rpm installed return name = "osinfo_rpm" db_file = os.path.join(cache_dir, "%s.sqlite" % name) remove_file(db_file) m = pyclient(cube=name) m.objects.drop() print "Getting RPM objects; might take a few seconds." m.get_objects() print " ... done." k = len(m.objects) assert k > 0 name = "bash" _filtered = m.objects.filter(where={"name": name}) assert len(_filtered) == 1 print "Object: %s" % _filtered _ids = m.objects.flush() assert len(_ids) == k remove_file(db_file)
def celerybeat(args): if args.command == "start": celerybeat_run(args) elif args.command == "stop": utils.terminate(CELERYBEAT_PIDFILE) elif args.command == "clean": utils.remove_file(CELERYBEAT_PIDFILE) else: raise ValueError("unknown command %s" % args.command)
def test_csvdata(): ''' ''' from metrique import pyclient name = 'us_idx_eod' db_file = os.path.join(cache_dir, '%s.sqlite' % name) remove_file(db_file) m = pyclient(cube='csvdata_rows', name=name) m.objects.drop() uri = os.path.join(fixtures, 'us-idx-eod.csv') m.get_objects(uri=uri, load_kwargs=dict(use_pandas=True)) assert m.objects assert len(m.objects) == 14 assert m.objects.fields == [ '__v__', '_e', '_end', '_hash', '_id', '_oid', '_start', '_v', 'close', 'date', 'open', 'symbol' ] # {u'symbol': u'$AJT', u'date': u'09/08/2008', u'close': 18.15, u'_start': # datetime.datetime(2014, 5, 28, 14, 9, 22, 999145), u'open': 17.84, # u'_oid': 11, u'_v': 0, u'_end': None, u'_hash': # u'76e81838bdde51f693f8a09a2308557a7962aa78', u'__v__': u'0.3.1-1a', # u'_e': {}, u'_id': u'11'} _ids = m.objects._ids _hash = '76e81838bdde51f693f8a09a2308557a7962aa78' _oid = 11 _filtered = m.objects.filter(where={'_oid': _oid}) print 'Object: %s' % _filtered assert len(_filtered) == 1 assert m.objects['11']['_hash'] == _hash # check _hash is as expected assert m.objects['11']['symbol'] == '$AJT' assert m.objects.upsert() == _ids # still there... assert m.objects['11']['symbol'] == '$AJT' # persist and remove from container assert m.objects.flush() == _ids assert m.objects == {} objs = m.objects.find('_oid == %s' % _oid, one=True, raw=True, fields='~') o = {k: v for k, v in objs.items() if k != 'id'} _o = dict(_filtered[0]) # we can't assure float precision is exact as it goes in/out # but it should be close... assert o['_start'] - _o['_start'] <= .1 # FIXME: ideally, _e would come back out as it went in! # not going in as {} but come out as None for k in ['_start', '_e']: del o[k] del _o[k] assert o == _o remove_file(db_file)
def test_debug_setup(capsys): from metrique.utils import debug_setup, remove_file import logging #logging.basicConfig(level=logging.DEBUG) log_file = '__test_log.log' # by default, logging -> file, not stdout _l = debug_setup() assert _l assert _l.level == logging.INFO assert _l.name == 'metrique' assert len(_l.handlers) == 1 assert isinstance(_l.handlers[0], logging.FileHandler) logger_test = logging.getLogger('test') log_format = '%(message)s' _l = debug_setup(logger=logger_test, log_format=log_format) assert _l is logger_test assert _l.name == 'test' _l = debug_setup(logger=logger_test, log_format=log_format, log2file=False, log2stdout=True) _l.info('*') out, err = [x.strip() for x in capsys.readouterr()] #assert out == '' assert err == '*' # no output should seen for info(), since we set level # to warn, but issue an info call _l = debug_setup(logger=logger_test, log_format=log_format, log2file=False, log2stdout=True, level=logging.WARN) _l.info('*') out, err = [x.strip() for x in capsys.readouterr()] #assert out == '' assert err == '' _l.warn('*') out, err = [x.strip() for x in capsys.readouterr()] #assert out == '' assert err == '*' try: # output should be redirected to disk # reduce output to only include the message _l = debug_setup(logger=logger_test, truncate=True, log_dir=log_dir, log_file=log_file, log_format='%(message)s') _l.info('*') _lf = os.path.join(log_dir, log_file) lf = open(_lf).readlines() text = ''.join(lf).strip() assert text == '*' finally: remove_file(_lf)
def test_remove_file(): from metrique.utils import remove_file, rand_chars, make_dirs assert remove_file(None) == [] assert remove_file('') == [] assert remove_file('DOES_NOT_EXIST') == [] path = os.path.join(cache_dir, rand_chars()) assert not exists(path) open(path, 'w').close() assert exists(path) assert remove_file(path) == path assert not exists(path) open(path, 'w').close() assert remove_file(path) == path assert not exists(path) assert remove_file('DOES_NOT_EXIST') == [] # build a simple nested directory tree path = os.path.join(cache_dir, rand_chars()) assert make_dirs(path) == path try: remove_file(path) except RuntimeError: pass else: assert False, "Managed to remove a random path" assert remove_file(path, force=True) == path
def supervisord(args): if args.command == "start": supervisord_run(args) elif args.command == "stop": utils.terminate(SUPERVISORD_PIDFILE) elif args.command == "clean": utils.remove_file(SUPERVISORD_PIDFILE) elif args.command == "reload": utils.terminate(SUPERVISORD_PIDFILE, signal.SIGHUP) else: raise ValueError("unknown command %s" % args.command)
def test_backup(): from metrique.utils import backup, rand_chars, remove_file f1 = os.path.join(cache_dir, '%s' % rand_chars(prefix='backup')) f2 = os.path.join(cache_dir, '%s' % rand_chars(prefix='backup')) open(f1, 'w').close() open(f2, 'w').close() assert [exists(f) for f in (f1, f2)] saveas = backup('%s %s' % (f1, f2)) assert exists(saveas) remove_file(saveas) saveas = backup((f1, f2)) assert exists(saveas) remove_file((saveas, f1, f2))
def backup_clean(args, path, prefix): keep = args.keep if args.keep != 0 else 3 path = pjoin(path, prefix) + '*' files = sorted(glob.glob(path), reverse=True) to_remove = files[keep:] logger.debug('Removing %i backups' % len(to_remove)) [utils.remove_file(f) for f in to_remove]
def test_load_json(): ''' ''' from metrique import pyclient from metrique.utils import load name = 'meps' db_file = os.path.join(cache_dir, '%s.sqlite' % name) remove_file(db_file) def _oid_func(o): o['_oid'] = o['id'] return o m = pyclient(name=name) m.objects.drop() path = os.path.join(fixtures, 'meps.json') objects = load(path, _oid=_oid_func, orient='index') assert len(objects) == 736 m.objects.extend(objects) assert len(m.objects) # {u'phone_stb': u'+33 (0)3 88 1 75224', u'comms': None, u'country': # u'Latvia', u'_start': ... # u'_oid': 28615, u'name': u'Roberts Z\u012aLE', u'url': # u'http://www.europarl.euro...rs/expert/committees/view.do?id=28615', # u'_v': 0, u'phone_bxl': u'+32 (0)2 28 45224', u'_end': None, u'_hash': # u'e8d2a6943734a80f268d112514040b4707915181', u'__v__': u'0.3.1-1a', # u'party': u'European Conservatives and Reformists', u'_e': {}, u'_id': # u'28615', u'email': None} _hash = 'e8d2a6943734a80f268d112514040b4707915181' _filtered = m.objects.filter(where={'_oid': 28615}) assert len(_filtered) == 1 print 'Object: %s' % _filtered assert _filtered[0]['_hash'] == _hash _ids = m.objects.flush() assert sorted(_ids) == sorted(map(unicode, [o['_oid'] for o in objects])) assert m.objects == {} remove_file(db_file)
def test_load_json(): """ """ from metrique import pyclient from metrique.utils import load name = "meps" db_file = os.path.join(cache_dir, "%s.sqlite" % name) remove_file(db_file) def _oid_func(o): o["_oid"] = o["id"] return o m = pyclient(name=name) m.objects.drop() path = os.path.join(fixtures, "meps.json") objects = load(path, _oid=_oid_func, orient="index") assert len(objects) == 736 m.objects.extend(objects) assert len(m.objects) # {u'phone_stb': u'+33 (0)3 88 1 75224', u'comms': None, u'country': # u'Latvia', u'_start': ... # u'_oid': 28615, u'name': u'Roberts Z\u012aLE', u'url': # u'http://www.europarl.euro...rs/expert/committees/view.do?id=28615', # u'_v': 0, u'phone_bxl': u'+32 (0)2 28 45224', u'_end': None, u'_hash': # u'e8d2a6943734a80f268d112514040b4707915181', u'__v__': u'0.3.1-1a', # u'party': u'European Conservatives and Reformists', u'_e': {}, u'_id': # u'28615', u'email': None} _hash = "e8d2a6943734a80f268d112514040b4707915181" _filtered = m.objects.filter(where={"_oid": 28615}) assert len(_filtered) == 1 print "Object: %s" % _filtered assert _filtered[0]["_hash"] == _hash _ids = m.objects.flush() assert sorted(_ids) == sorted(map(unicode, [o["_oid"] for o in objects])) assert m.objects == {} remove_file(db_file)
def test_urlretrieve(): from metrique.utils import urlretrieve, remove_file uri = 'https://mysafeinfo.com/api/data?list=days&format=csv' saveas = os.path.join(cache_dir, 'test_download.csv') remove_file(saveas) _path = urlretrieve(uri, saveas=saveas, cache_dir=cache_dir) assert _path == saveas assert exists(_path) assert os.stat(_path).st_size > 0 remove_file(_path) try: urlretrieve('does not exist') except RuntimeError: pass else: assert False, "Retrieved non-existing url"
def test_make_dirs(): from metrique.utils import make_dirs, rand_chars, remove_file d_1 = rand_chars(prefix='make_dirs') d_2 = rand_chars() base = os.path.join(tmp_dir, d_1) rand_dirs = os.path.join(base, d_2) path = os.path.join(tmp_dir, rand_dirs) assert make_dirs(path) == path assert exists(path) remove_file(base, force=True) assert not exists(base) for _ in ['', 'relative/dir']: # requires absolute path! try: make_dirs(_) except OSError: pass else: assert False, "Made dirs with relative paths"
def test_rsync(): from metrique.utils import rsync, sys_call, rand_chars, remove_file from metrique.utils import read_file #remove_file(f_1) #remove_file(dest, force=True) if not sys_call('which rsync'): return # skip this test if rsync isn't available fname = rand_chars(prefix='rsync') path = os.path.join(cache_dir, fname) with open(path, 'w') as f: f.write('test') dest = os.path.join(tmp_dir, 'rsync') rsync(targets=path, dest=dest) assert read_file(os.path.join(dest, fname)) == 'test' with open(path, 'w') as f: f.write('test 2') rsync(targets=path, dest=dest) assert read_file(os.path.join(dest, fname)) == 'test 2' remove_file(path, force=True) remove_file(dest, force=True)
def test_git_clone(): # FIXME: THIS ONE IS CAUSING SOME INTERESTING PROBLEMS? from metrique.utils import git_clone, safestr, remove_file uri = 'https://github.com/kejbaly2/tornadohttp.git' local_path = os.path.join(cache_dir, safestr(uri)) remove_file(local_path, force=True) _t = time() repo = git_clone(uri, pull=False, reflect=False, cache_dir=cache_dir) assert repo == local_path not_cached = time() - _t _t = time() repo = git_clone(uri, pull=False, reflect=True, cache_dir=cache_dir) cached = time() - _t assert repo.path == local_path assert cached < not_cached git_clone(uri, pull=True, reflect=False, cache_dir=cache_dir) remove_file(local_path, force=True)
def trash(args=None): named = getattr(args, 'named', None) named = '%s-%s' % (named[0], NOW) if named else NOW supervisord_terminate() celerybeat_terminate() celeryd_terminate() nginx_terminate() postgresql_stop() dest = pjoin(TRASH_DIR, 'metrique-%s' % named) logger.warn('Trashing existing .metrique -> %s' % dest) for f in [ETC_DIR, PIDS_DIR, LOGS_DIR, CACHE_DIR, TMP_DIR, POSTGRESQL_PGDATA_PATH]: _dest = os.path.join(dest, os.path.basename(f)) try: utils.move(f, _dest) except (IOError, OSError) as e: logger.error(e) continue firstboot_glob = os.path.join(PREFIX_DIR, '.firstboot*') utils.remove_file(firstboot_glob)
def test_move(): from metrique.utils import move, rand_chars, remove_file dest = tmp_dir rel_path_1 = rand_chars(prefix='move') path_1 = os.path.join(cache_dir, rel_path_1) _path_1 = os.path.join(dest, rel_path_1) open(path_1, 'a').close() rel_path_2 = rand_chars(prefix='move') path_2 = os.path.join(cache_dir, rel_path_2) open(path_2, 'a').close() paths = (path_1, path_2) assert exists(path_1) move(path_1, dest) assert not exists(path_1) move(_path_1, cache_dir) assert exists(path_2) move(paths, dest) assert not any((exists(path_1), exists(path_2))) remove_file(paths, force=True) remove_file(dest, force=True) remove_file(tmp_dir, force=True) try: move('DOES_NOT_EXST', 'SOMEWHERE') except IOError: pass else: assert False, "Moving DOES_NOT_EXIST to SOMEWHERE did not throw an "\ "exception" assert move('DOES_NOT_EXST', 'SOMEWHERE', quiet=True) == []
def _deploy_virtenv_init(args): _virtenv = utils.active_virtualenv() virtenv = getattr(args, 'virtenv') or _virtenv # skip if we're already in the targeted virtenv... if virtenv and virtenv != _virtenv: # we can't alrady be in a virtenv when running virtualenv.main() utils.virtualenv_deactivate() # scratch the existing virtenv directory, if requested if args.trash: utils.remove_file(virtenv, force=True) if args.trash_home: trash() # virtualenv.main; pass in only the virtenv path sys.argv = sys.argv[0:1] + [virtenv] # run the virtualenv script to install the virtenv virtualenv.main() # activate the newly installed virtenv utils.virtualenv_activate(args.virtenv) return virtenv
def test_clear_stale_pids(): from metrique.utils import clear_stale_pids, remove_file pid_dir = '/tmp' # we assume we'll have a pid 1, but never -1 pids = map(unicode, [1, -1]) pids_str = '1, -1' pid_files = [os.path.join(pid_dir, '%s.pid' % pid) for pid in pids] prefix = '' for pid, pid_file in zip(pids, pid_files): with open(pid_file, 'w') as f: f.write(str(pid)) running = clear_stale_pids(pids, pid_dir, prefix) assert '1' in running assert '-1' not in running assert exists(pid_files[0]) assert not exists(pid_files[1]) running = clear_stale_pids(pids_str, pid_dir, prefix) assert '1' in running assert '-1' not in running assert exists(pid_files[0]) assert not exists(pid_files[1]) remove_file(pid_files[0])
def deploy(args): virtenv = _deploy_virtenv_init(args) # make sure we have some basic defaults configured in the environment firstboot() # install all dependencies first, before installing metrique _deploy_extras(args) cmd = 'install' setup(args, cmd, pip=False) if args.develop: path = pjoin(virtenv, 'lib/python2.7/site-packages/metrique*') utils.remove_file(path, force=True) develop(args) if (args.all or args.cython) and args.develop: cython(cmd='compile') # run py.test after install if args.test: utils.sys_call('coverage run --source=metrique -m py.test tests')
def trash(args=None): named = getattr(args, 'named', None) named = '%s-%s' % (named[0], NOW) if named else NOW supervisord_terminate() celerybeat_terminate() celeryd_terminate() nginx_terminate() postgresql_stop() dest = pjoin(TRASH_DIR, 'metrique-%s' % named) logger.warn('Trashing existing .metrique -> %s' % dest) for f in [ ETC_DIR, PIDS_DIR, LOGS_DIR, CACHE_DIR, TMP_DIR, POSTGRESQL_PGDATA_PATH ]: _dest = os.path.join(dest, os.path.basename(f)) try: utils.move(f, _dest) except (IOError, OSError) as e: logger.error(e) continue firstboot_glob = os.path.join(PREFIX_DIR, '.firstboot*') utils.remove_file(firstboot_glob)
def test_datatypes(): from metrique import MetriqueContainer from metrique.utils import utcnow, remove_file o = {"_oid": 1, "date": utcnow(), "dict_null": {}, "dict": {'hello': 'world'}, "bool": True, "null": None, "list_null": [], "list": [1, 2, 3]} db = 'admin' table = 'test' c = MetriqueContainer(name=table, db=db) c.drop() remove_file(c._proxy._sqlite_path) c.add(o) c.upsert() c.drop() remove_file(c._proxy._sqlite_path)
def test_gitdata_commit(): from metrique import pyclient from metrique.utils import remove_file name = "gitdata_commit" db_file = os.path.join(cache_dir, "%s.sqlite" % name) remove_file(db_file) uri_1 = "https://github.com/kejbaly2/tornadohttp.git" uri_2 = "https://github.com/kejbaly2/metrique.git" m = pyclient(cube=name) m.objects.drop() m.get_objects(uri=uri_1) k = len(m.objects) assert k > 0 m.get_objects(uri=uri_1, pull=True) assert k == len(m.objects) # {u'files': {u'setup.py': {u'removed': 0, u'added': 3}, # u'tornadohttp/tornadohttp.py': {u'removed': 7, u'added': 10}, # u'tornadohttp/__init__.py': {u'removed': 0, u'added': 7}, # u'tornadohttp/_version.py': {u'removed': 0, u'added': 9}}, u'committer': # u'Chris Ward <*****@*****.**>', u'added': 29, u'extra': None, # u'author_time': 1396355424, u'related': None, u'repo_uri': # u'https://github.com/kejbaly2/tornadohttp.git', u'acked_by': None, # u'resolves': None, u'message': u'version bump; logdir and other configs # renamed\n', u'_start': datetime.datetime(2014, 4, 1, 12, 30, 24), # u'_oid': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705', u'removed': 7, # u'mergetag': None, u'author': u'Chris Ward <*****@*****.**>', u'_v': 0, # u'tree': u'66406ded27ba129ad1639928b079b821ab416fed', u'_end': None, # u'signed_off_by': None, u'parents': # ['78b311d90e35eb36016a7f41e75657754dbe0784'], u'_hash': # u'79a11c24ac814f001abcd27963de761ccb37a908', u'__v__': u'0.3.1-1a', # u'_e': {}, u'_id': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705'} _hash = "79a11c24ac814f001abcd27963de761ccb37a908" _oid = "99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705" _filtered = m.objects.filter(where={"_oid": _oid}) assert len(_filtered) == 1 print "Object: %s" % _filtered assert _filtered[0]["_hash"] == _hash _ids = m.objects.flush() assert len(_ids) == k # load a second repo # make sure our sessions are working as expected and # a second call works as expected; eg, in the past # there was a bug where we didn't load the table into # metadata if the table wasn't being created for the # first time and so non-standard types weren't # defined in the session... m.get_objects(uri=uri_2, flush=True) remove_file(m.repo.path, force=True) remove_file(db_file)
def test_gitdata_commit(): from metrique import pyclient from metrique.utils import remove_file name = 'gitdata_commit' db_file = os.path.join(cache_dir, '%s.sqlite' % name) remove_file(db_file) uri_1 = 'https://github.com/kejbaly2/tornadohttp.git' uri_2 = 'https://github.com/kejbaly2/metrique.git' m = pyclient(cube=name) m.objects.drop() m.get_objects(uri=uri_1) k = len(m.objects) assert k > 0 m.get_objects(uri=uri_1, pull=True) assert k == len(m.objects) # {u'files': {u'setup.py': {u'removed': 0, u'added': 3}, # u'tornadohttp/tornadohttp.py': {u'removed': 7, u'added': 10}, # u'tornadohttp/__init__.py': {u'removed': 0, u'added': 7}, # u'tornadohttp/_version.py': {u'removed': 0, u'added': 9}}, u'committer': # u'Chris Ward <*****@*****.**>', u'added': 29, u'extra': None, # u'author_time': 1396355424, u'related': None, u'repo_uri': # u'https://github.com/kejbaly2/tornadohttp.git', u'acked_by': None, # u'resolves': None, u'message': u'version bump; logdir and other configs # renamed\n', u'_start': datetime.datetime(2014, 4, 1, 12, 30, 24), # u'_oid': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705', u'removed': 7, # u'mergetag': None, u'author': u'Chris Ward <*****@*****.**>', u'_v': 0, # u'tree': u'66406ded27ba129ad1639928b079b821ab416fed', u'_end': None, # u'signed_off_by': None, u'parents': # ['78b311d90e35eb36016a7f41e75657754dbe0784'], u'_hash': # u'79a11c24ac814f001abcd27963de761ccb37a908', u'__v__': u'0.3.1-1a', # u'_e': {}, u'_id': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705'} _hash = '79a11c24ac814f001abcd27963de761ccb37a908' _oid = '99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705' _filtered = m.objects.filter(where={'_oid': _oid}) assert len(_filtered) == 1 print 'Object: %s' % _filtered assert _filtered[0]['_hash'] == _hash _ids = m.objects.flush() assert len(_ids) == k # load a second repo # make sure our sessions are working as expected and # a second call works as expected; eg, in the past # there was a bug where we didn't load the table into # metadata if the table wasn't being created for the # first time and so non-standard types weren't # defined in the session... m.get_objects(uri=uri_2, flush=True) remove_file(m.repo.path, force=True) remove_file(db_file)
def postgresql_trash(): postgresql_stop() dest = pjoin(TRASH_DIR, 'postgresql-%s' % NOW) utils.move(POSTGRESQL_PGDATA_PATH, dest) utils.remove_file(POSTGRESQL_FIRSTBOOT_PATH) utils.make_dirs(POSTGRESQL_PGDATA_PATH)
def postgresql_clean(): utils.remove_file(POSTGRESQL_PIDFILE)
def test_csvdata(): """ """ from metrique import pyclient name = "us_idx_eod" db_file = os.path.join(cache_dir, "%s.sqlite" % name) remove_file(db_file) m = pyclient(cube="csvdata_rows", name=name) m.objects.drop() uri = os.path.join(fixtures, "us-idx-eod.csv") m.get_objects(uri=uri, load_kwargs=dict(use_pandas=True)) assert m.objects assert len(m.objects) == 14 assert m.objects.fields == [ "__v__", "_e", "_end", "_hash", "_id", "_oid", "_start", "_v", "close", "date", "open", "symbol", ] # {u'symbol': u'$AJT', u'date': u'09/08/2008', u'close': 18.15, u'_start': # datetime.datetime(2014, 5, 28, 14, 9, 22, 999145), u'open': 17.84, # u'_oid': 11, u'_v': 0, u'_end': None, u'_hash': # u'76e81838bdde51f693f8a09a2308557a7962aa78', u'__v__': u'0.3.1-1a', # u'_e': {}, u'_id': u'11'} _ids = m.objects._ids _hash = "76e81838bdde51f693f8a09a2308557a7962aa78" _oid = 11 _filtered = m.objects.filter(where={"_oid": _oid}) print "Object: %s" % _filtered assert len(_filtered) == 1 assert m.objects["11"]["_hash"] == _hash # check _hash is as expected assert m.objects["11"]["symbol"] == "$AJT" assert m.objects.upsert() == _ids # still there... assert m.objects["11"]["symbol"] == "$AJT" # persist and remove from container assert m.objects.flush() == _ids assert m.objects == {} objs = m.objects.find("_oid == %s" % _oid, one=True, raw=True, fields="~") o = {k: v for k, v in objs.items() if k != "id"} _o = dict(_filtered[0]) # we can't assure float precision is exact as it goes in/out # but it should be close... assert o["_start"] - _o["_start"] <= 0.1 # FIXME: ideally, _e would come back out as it went in! # not going in as {} but come out as None for k in ["_start", "_e"]: del o[k] del _o[k] assert o == _o remove_file(db_file)
def test_api(): from metrique import MetriqueContainer, metrique_object from metrique.utils import utcnow, remove_file, dt2ts, ts2dt _start = ts2dt('2001-01-01') _end = ts2dt('2001-01-02') a = {'_oid': 1, 'col_1': 1, 'col_2': utcnow(), '_start': _start} b = {'_oid': 2, 'col_1': 2, 'col_2': utcnow(), '_start': _start} ma = metrique_object(**a) mb = metrique_object(**b) objs_list = [a, b] r_objs_dict = {u'1': ma, u'2': mb} c = MetriqueContainer() assert not c.name assert not c._proxy MetriqueContainer() # check various forms of passing in objects results in expected # container contents assert c == {} assert MetriqueContainer(objects=c) == {} assert MetriqueContainer(objects=objs_list) == r_objs_dict mc = MetriqueContainer(objects=objs_list) assert MetriqueContainer(objects=mc) == r_objs_dict # setting version should result in all objects added having that version # note: version -> _v in metrique_object assert mc.version == 0 assert mc['1']['_v'] == 0 mc = MetriqueContainer(objects=objs_list, version=3) assert mc.version == 3 assert mc['1']['_v'] == 3 # setting converts key to _id of value after being passed # through metrique_object(); notice key int(5) -> str('5') mc[5] = {'_oid': 5} assert mc['5']['_oid'] == 5 # also note, that it doesn't actually matter what key we use # to set the object... since we always set based on value's # auto-generated _id value, anyway mc[42] = {'_oid': 5} assert mc['5']['_oid'] == 5 # should have 3 objects, first two, plus the last one assert len(mc) == 3 assert len(mc.values()) == 3 assert sorted(mc._ids) == ['1', '2', '5'] assert sorted(mc._oids) == [1, 2, 5] try: mc.ls() except NotImplementedError: pass else: assert False mc.extend([{'_oid': 6}, {'_oid': 7}]) assert sorted(mc._oids) == [1, 2, 5, 6, 7] mc.add({'_oid': 8, '_start': _start, '_end': _end, 'col_1': True}) mc.add({'_oid': 8, '_end': None, 'col_1': False}) assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] r = mc.filter(where={'_oid': 8}) assert len(r) == 2 assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] assert sorted(mc._oids) == [1, 2, 5, 6, 7, 8] mc.pop('7') assert sorted(mc._oids) == [1, 2, 5, 6, 8] mc.pop(6) assert sorted(mc._oids) == [1, 2, 5, 8] del mc[5] assert sorted(mc._oids) == [1, 2, 8] assert '1' in mc mc.clear() assert mc == {} db = 'admin' name = 'container_test' c = MetriqueContainer(name=name, db=db) _expected_db_path = os.path.join(cache_dir, 'admin.sqlite') # test drop c.drop(True) assert c.proxy._sqlite_path == _expected_db_path # make sure we're working with a clean db remove_file(_expected_db_path) mc = MetriqueContainer(name=name, db=db, objects=objs_list) assert mc.df() is not None assert mc.df().empty is False # local persistence; filter method queries .objects buffer # .upsert dumps data to proxy db; but leaves the data in the buffer # .flush dumps data and removes all objects dumped # count queries proxy db mc = MetriqueContainer(name=name, db=db, objects=objs_list) _store = deepcopy(mc.store) assert len(mc.filter({'col_1': 1})) == 1 _ids = mc.upsert() assert _ids == ['1', '2'] assert mc.store == _store assert len(mc.filter({'col_1': 1})) == 1 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # persisting again shouldn't result in new rows _ids = mc.upsert() assert _ids == ['1', '2'] assert mc.store == _store assert len(mc.filter({'col_1': 1})) == 1 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # flushing now shouldn't result in new rows; but store should be empty _ids = mc.flush() assert _ids == ['1', '2'] assert mc.store == {} assert len(mc.filter({'col_1': 1})) == 0 assert mc.count('col_1 == 1') == 1 assert mc.count() == 2 # adding the same object shouldn't result in new rows a.update({'col_1': 42}) mc.add(a) assert len(mc.filter({'col_1': 1})) == 0 assert len(mc.filter({'col_1': 42})) == 1 _ids = mc.flush() assert mc.count(date='~') == 3 assert mc.count(date=None) == 2 assert mc.count('col_1 == 1', date=None) == 0 assert mc.count('col_1 == 1', date='~') == 1 assert mc.count('col_1 == 42') == 1 assert mc.count('col_1 == 42', date='~') == 1 # adjust for local time... #_ts = dt2ts(convert(_start)) _ts = dt2ts(_start) assert _ids == ['1', '1:%s' % _ts] # remove the db remove_file(_expected_db_path)