Ejemplo n.º 1
0
def test_osinfo_rpm():
    from metrique import pyclient
    from metrique.utils import sys_call

    if sys_call('which rpm', ignore_errors=True) is None:
        # skip these tests, since we don't have rpm installed
        return

    name = 'osinfo_rpm'
    db_file = os.path.join(cache_dir, '%s.sqlite' % name)
    remove_file(db_file)
    m = pyclient(cube=name)
    m.objects.drop()

    print 'Getting RPM objects; might take a few seconds.'
    m.get_objects()
    print ' ... done.'
    k = len(m.objects)
    assert k > 0

    name = 'bash'
    _filtered = m.objects.filter(where={'name': name})
    assert len(_filtered) == 1
    print 'Object: %s' % _filtered

    _ids = m.objects.flush()
    assert len(_ids) == k

    remove_file(db_file)
Ejemplo n.º 2
0
def get_objects(cube,
                oids,
                full_history,
                flush=False,
                cube_name=None,
                config=None,
                config_file=None,
                config_key=None,
                container=None,
                container_config=None,
                proxy=None,
                proxy_config=None,
                **kwargs):
    # force a single worker to avoid 'nested' (invalid) joblib runs.
    kwargs['workers'] = 1
    m = pyclient(cube=cube,
                 name=cube_name,
                 config=config,
                 config_file=config_file,
                 config_key=config_key,
                 container=container,
                 container_config=container_config,
                 proxy=proxy,
                 proxy_config=proxy_config,
                 **kwargs)
    results = []
    batch_size = m.lconfig.get('batch_size')
    for batch in batch_gen(oids, batch_size):
        if full_history:
            _ = m._activity_get_objects(oids=batch, flush=flush)
        else:
            _ = m._get_objects(oids=batch, flush=flush)
        results.extend(_)
    return results
Ejemplo n.º 3
0
def test_osinfo_rpm():
    from metrique import pyclient
    from metrique.utils import sys_call

    if sys_call("which rpm", ignore_errors=True) is None:
        # skip these tests, since we don't have rpm installed
        return

    name = "osinfo_rpm"
    db_file = os.path.join(cache_dir, "%s.sqlite" % name)
    remove_file(db_file)
    m = pyclient(cube=name)
    m.objects.drop()

    print "Getting RPM objects; might take a few seconds."
    m.get_objects()
    print " ... done."
    k = len(m.objects)
    assert k > 0

    name = "bash"
    _filtered = m.objects.filter(where={"name": name})
    assert len(_filtered) == 1
    print "Object: %s" % _filtered

    _ids = m.objects.flush()
    assert len(_ids) == k

    remove_file(db_file)
Ejemplo n.º 4
0
def test_user_api():
    fingerprint = '894EE1CEEA61DC3D7D20327C4200AD1F2F22F46C'

    m = pyclient(name='test_user',
                 gnupg_dir=GNUPG_DIR,
                 gnupg_fingerprint=fingerprint,
                 **config)

    assert m.config.gnupg_fingerprint == fingerprint

    assert m.user_register(username, password)
    # should except if trying to register again
    try:
        m.user_register(username, password)
    except:
        pass

    aboutme = m.aboutme()
    assert aboutme is not None

    assert m.config.gnupg_pubkey is not None
    pubkey = m.config.gnupg_pubkey
    gnupg = {'pubkey': pubkey, 'fingerprint': fingerprint}
    result = m.user_update_profile(gnupg=gnupg)
    assert result['previous'] == aboutme
    assert 'gnupg' in result['now']
    assert result['now']['gnupg']['fingerprint'] == fingerprint
    assert result['now']['gnupg']['pubkey'] == pubkey

    assert m.user_remove()
Ejemplo n.º 5
0
def test_api():
    m = pyclient(**config)
    m.user_remove(username, quiet=True)  # to be sure it doesn't exist already
    assert m.user_register(username, password)
    cubes = ['csvcube_local', 'jsoncube_local']
    for cube in cubes:
        _cube = m.get_cube(cube=cube, pkgs=pkgs, cube_paths=paths, init=True)
        _cube.cookiejar_clear()
        _cube.cube_drop(quiet=True)  # to be sure it doesn't exist already...

        assert _cube.cube_register()

        result = _cube.extract()
        assert result is not None
        assert len(result) > 0

        # we should get back some results
        df = _cube.find(fields='~', date='~')
        assert df is not None
        # default obj type returned should be metrique.result.Result
        assert isinstance(df, Result)

        # raw should return back a list of dicts
        raw = _cube.find(raw=True, fields='~', date='~')
        assert isinstance(raw, list)
        assert len(raw) > 0
        assert isinstance(raw[0], dict)

        k = len(result)
        assert k == _cube.count(date='~')

        # a second extract of the same data should not result
        # new objects being saved
        result = _cube.extract()
        assert k == _cube.count(date='~')

        # rename cube
        name = _cube.name[:]
        new_name = 'renamed_%s' % name
        assert _cube.cube_rename(new_name=new_name)
        assert _cube.name == new_name
        ## count should remain the same in renamed cube
        assert k == _cube.count(date='~')
        assert _cube.cube_rename(new_name=name)
        # drop the cube
        assert _cube.cube_drop()
        assert _cube.cube_id not in _cube.cube_list_all()

    # with the last cube, do a few more things...
    # re-register
    _cube = m.get_cube(cube=cubes[0], pkgs=pkgs, cube_paths=paths, init=True)
    assert _cube.cube_register()
    name = '%s__%s' % (username, _cube.name)
    assert name in _cube.cube_list_all()
    # drop the cube
    assert _cube.cube_drop()
    assert name not in _cube.cube_list_all()
    # then drop the user
    assert _cube.user_remove()
Ejemplo n.º 6
0
def test_csvdata():
    '''

    '''
    from metrique import pyclient

    name = 'us_idx_eod'
    db_file = os.path.join(cache_dir, '%s.sqlite' % name)
    remove_file(db_file)
    m = pyclient(cube='csvdata_rows', name=name)
    m.objects.drop()

    uri = os.path.join(fixtures, 'us-idx-eod.csv')
    m.get_objects(uri=uri, load_kwargs=dict(use_pandas=True))

    assert m.objects
    assert len(m.objects) == 14
    assert m.objects.fields == [
        '__v__', '_e', '_end', '_hash', '_id', '_oid', '_start', '_v', 'close',
        'date', 'open', 'symbol'
    ]

    # {u'symbol': u'$AJT', u'date': u'09/08/2008', u'close': 18.15, u'_start':
    # datetime.datetime(2014, 5, 28, 14, 9, 22, 999145), u'open': 17.84,
    # u'_oid': 11, u'_v': 0, u'_end': None, u'_hash':
    # u'76e81838bdde51f693f8a09a2308557a7962aa78', u'__v__': u'0.3.1-1a',
    # u'_e': {}, u'_id': u'11'}
    _ids = m.objects._ids
    _hash = '76e81838bdde51f693f8a09a2308557a7962aa78'
    _oid = 11
    _filtered = m.objects.filter(where={'_oid': _oid})
    print 'Object: %s' % _filtered
    assert len(_filtered) == 1
    assert m.objects['11']['_hash'] == _hash  # check _hash is as expected
    assert m.objects['11']['symbol'] == '$AJT'
    assert m.objects.upsert() == _ids
    # still there...
    assert m.objects['11']['symbol'] == '$AJT'

    # persist and remove from container
    assert m.objects.flush() == _ids
    assert m.objects == {}

    objs = m.objects.find('_oid == %s' % _oid, one=True, raw=True, fields='~')
    o = {k: v for k, v in objs.items() if k != 'id'}
    _o = dict(_filtered[0])
    # we can't assure float precision is exact as it goes in/out
    # but it should be close...
    assert o['_start'] - _o['_start'] <= .1
    # FIXME: ideally, _e would come back out as it went in!
    # not going in as {} but come out as None
    for k in ['_start', '_e']:
        del o[k]
        del _o[k]
    assert o == _o

    remove_file(db_file)
Ejemplo n.º 7
0
def test_gitdata_commit():
    from metrique import pyclient
    from metrique.utils import remove_file

    name = 'gitdata_commit'
    db_file = os.path.join(cache_dir, '%s.sqlite' % name)
    remove_file(db_file)

    uri_1 = 'https://github.com/kejbaly2/tornadohttp.git'
    uri_2 = 'https://github.com/kejbaly2/metrique.git'
    m = pyclient(cube=name)
    m.objects.drop()

    m.get_objects(uri=uri_1)
    k = len(m.objects)
    assert k > 0
    m.get_objects(uri=uri_1, pull=True)
    assert k == len(m.objects)

    # {u'files': {u'setup.py': {u'removed': 0, u'added': 3},
    # u'tornadohttp/tornadohttp.py': {u'removed': 7, u'added': 10},
    # u'tornadohttp/__init__.py': {u'removed': 0, u'added': 7},
    # u'tornadohttp/_version.py': {u'removed': 0, u'added': 9}}, u'committer':
    # u'Chris Ward <*****@*****.**>', u'added': 29, u'extra': None,
    # u'author_time': 1396355424, u'related': None, u'repo_uri':
    # u'https://github.com/kejbaly2/tornadohttp.git', u'acked_by': None,
    # u'resolves': None, u'message': u'version bump; logdir and other configs
    # renamed\n', u'_start': datetime.datetime(2014, 4, 1, 12, 30, 24),
    # u'_oid': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705', u'removed': 7,
    # u'mergetag': None, u'author': u'Chris Ward <*****@*****.**>', u'_v': 0,
    # u'tree': u'66406ded27ba129ad1639928b079b821ab416fed', u'_end': None,
    # u'signed_off_by': None, u'parents':
    # ['78b311d90e35eb36016a7f41e75657754dbe0784'], u'_hash':
    # u'79a11c24ac814f001abcd27963de761ccb37a908', u'__v__': u'0.3.1-1a',
    # u'_e': {}, u'_id': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705'}
    _hash = '79a11c24ac814f001abcd27963de761ccb37a908'
    _oid = '99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705'
    _filtered = m.objects.filter(where={'_oid': _oid})
    assert len(_filtered) == 1
    print 'Object: %s' % _filtered
    assert _filtered[0]['_hash'] == _hash

    _ids = m.objects.flush()
    assert len(_ids) == k

    # load a second repo
    # make sure our sessions are working as expected and
    # a second call works as expected; eg, in the past
    # there was a bug where we didn't load the table into
    # metadata if the table wasn't being created for the
    # first time and so non-standard types weren't
    # defined in the session...
    m.get_objects(uri=uri_2, flush=True)

    remove_file(m.repo.path, force=True)
    remove_file(db_file)
Ejemplo n.º 8
0
def test_gitdata_commit():
    from metrique import pyclient
    from metrique.utils import remove_file

    name = "gitdata_commit"
    db_file = os.path.join(cache_dir, "%s.sqlite" % name)
    remove_file(db_file)

    uri_1 = "https://github.com/kejbaly2/tornadohttp.git"
    uri_2 = "https://github.com/kejbaly2/metrique.git"
    m = pyclient(cube=name)
    m.objects.drop()

    m.get_objects(uri=uri_1)
    k = len(m.objects)
    assert k > 0
    m.get_objects(uri=uri_1, pull=True)
    assert k == len(m.objects)

    # {u'files': {u'setup.py': {u'removed': 0, u'added': 3},
    # u'tornadohttp/tornadohttp.py': {u'removed': 7, u'added': 10},
    # u'tornadohttp/__init__.py': {u'removed': 0, u'added': 7},
    # u'tornadohttp/_version.py': {u'removed': 0, u'added': 9}}, u'committer':
    # u'Chris Ward <*****@*****.**>', u'added': 29, u'extra': None,
    # u'author_time': 1396355424, u'related': None, u'repo_uri':
    # u'https://github.com/kejbaly2/tornadohttp.git', u'acked_by': None,
    # u'resolves': None, u'message': u'version bump; logdir and other configs
    # renamed\n', u'_start': datetime.datetime(2014, 4, 1, 12, 30, 24),
    # u'_oid': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705', u'removed': 7,
    # u'mergetag': None, u'author': u'Chris Ward <*****@*****.**>', u'_v': 0,
    # u'tree': u'66406ded27ba129ad1639928b079b821ab416fed', u'_end': None,
    # u'signed_off_by': None, u'parents':
    # ['78b311d90e35eb36016a7f41e75657754dbe0784'], u'_hash':
    # u'79a11c24ac814f001abcd27963de761ccb37a908', u'__v__': u'0.3.1-1a',
    # u'_e': {}, u'_id': u'99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705'}
    _hash = "79a11c24ac814f001abcd27963de761ccb37a908"
    _oid = "99dc1e5c4e3ab2c8ab5510e50a3edf64f9fcc705"
    _filtered = m.objects.filter(where={"_oid": _oid})
    assert len(_filtered) == 1
    print "Object: %s" % _filtered
    assert _filtered[0]["_hash"] == _hash

    _ids = m.objects.flush()
    assert len(_ids) == k

    # load a second repo
    # make sure our sessions are working as expected and
    # a second call works as expected; eg, in the past
    # there was a bug where we didn't load the table into
    # metadata if the table wasn't being created for the
    # first time and so non-standard types weren't
    # defined in the session...
    m.get_objects(uri=uri_2, flush=True)

    remove_file(m.repo.path, force=True)
    remove_file(db_file)
Ejemplo n.º 9
0
def test_load_json():
    '''

    '''
    from metrique import pyclient
    from metrique.utils import load

    name = 'meps'
    db_file = os.path.join(cache_dir, '%s.sqlite' % name)
    remove_file(db_file)

    def _oid_func(o):
        o['_oid'] = o['id']
        return o

    m = pyclient(name=name)
    m.objects.drop()

    path = os.path.join(fixtures, 'meps.json')
    objects = load(path, _oid=_oid_func, orient='index')

    assert len(objects) == 736

    m.objects.extend(objects)

    assert len(m.objects)

    # {u'phone_stb': u'+33 (0)3 88 1 75224', u'comms': None, u'country':
    # u'Latvia', u'_start': ...
    # u'_oid': 28615, u'name': u'Roberts Z\u012aLE', u'url':
    # u'http://www.europarl.euro...rs/expert/committees/view.do?id=28615',
    # u'_v': 0, u'phone_bxl': u'+32 (0)2 28 45224', u'_end': None, u'_hash':
    # u'e8d2a6943734a80f268d112514040b4707915181', u'__v__': u'0.3.1-1a',
    # u'party': u'European Conservatives and Reformists', u'_e': {}, u'_id':
    # u'28615', u'email': None}
    _hash = 'e8d2a6943734a80f268d112514040b4707915181'
    _filtered = m.objects.filter(where={'_oid': 28615})
    assert len(_filtered) == 1
    print 'Object: %s' % _filtered
    assert _filtered[0]['_hash'] == _hash

    _ids = m.objects.flush()

    assert sorted(_ids) == sorted(map(unicode, [o['_oid'] for o in objects]))
    assert m.objects == {}

    remove_file(db_file)
Ejemplo n.º 10
0
def test_load_json():
    """

    """
    from metrique import pyclient
    from metrique.utils import load

    name = "meps"
    db_file = os.path.join(cache_dir, "%s.sqlite" % name)
    remove_file(db_file)

    def _oid_func(o):
        o["_oid"] = o["id"]
        return o

    m = pyclient(name=name)
    m.objects.drop()

    path = os.path.join(fixtures, "meps.json")
    objects = load(path, _oid=_oid_func, orient="index")

    assert len(objects) == 736

    m.objects.extend(objects)

    assert len(m.objects)

    # {u'phone_stb': u'+33 (0)3 88 1 75224', u'comms': None, u'country':
    # u'Latvia', u'_start': ...
    # u'_oid': 28615, u'name': u'Roberts Z\u012aLE', u'url':
    # u'http://www.europarl.euro...rs/expert/committees/view.do?id=28615',
    # u'_v': 0, u'phone_bxl': u'+32 (0)2 28 45224', u'_end': None, u'_hash':
    # u'e8d2a6943734a80f268d112514040b4707915181', u'__v__': u'0.3.1-1a',
    # u'party': u'European Conservatives and Reformists', u'_e': {}, u'_id':
    # u'28615', u'email': None}
    _hash = "e8d2a6943734a80f268d112514040b4707915181"
    _filtered = m.objects.filter(where={"_oid": 28615})
    assert len(_filtered) == 1
    print "Object: %s" % _filtered
    assert _filtered[0]["_hash"] == _hash

    _ids = m.objects.flush()

    assert sorted(_ids) == sorted(map(unicode, [o["_oid"] for o in objects]))
    assert m.objects == {}

    remove_file(db_file)
Ejemplo n.º 11
0
def get_objects(cube, oids, full_history, flush=False, cube_name=None,
                config=None, config_file=None, config_key=None,
                container=None, container_config=None,
                proxy=None, proxy_config=None, **kwargs):
    # force a single worker to avoid 'nested' (invalid) joblib runs.
    kwargs['workers'] = 1
    m = pyclient(cube=cube, name=cube_name, config=config,
                 config_file=config_file, config_key=config_key,
                 container=container, container_config=container_config,
                 proxy=proxy, proxy_config=proxy_config, **kwargs)
    results = []
    batch_size = m.lconfig.get('batch_size')
    for batch in batch_gen(oids, batch_size):
        if full_history:
            _ = m._activity_get_objects(oids=batch, flush=flush)
        else:
            _ = m._get_objects(oids=batch, flush=flush)
        results.extend(_)
    return results
Ejemplo n.º 12
0
def get_objects(cube, oids, field_order, start, save=True, cube_name=None,
                autosnap=True, **kwargs):
    start = start
    m = pyclient(cube=cube, name=cube_name, **kwargs)
    return m._get_objects(oids=oids, field_order=field_order,
                          start=start, save=save, autosnap=autosnap)
Ejemplo n.º 13
0
def get_full_history(cube, oids, save=True, cube_name=None, 
                     autosnap=False, **kwargs):
    m = pyclient(cube=cube, name=cube_name, **kwargs)
    return m._activity_get_objects(oids=oids, save=save, autosnap=autosnap)
Ejemplo n.º 14
0
def test_csvdata():
    """

    """
    from metrique import pyclient

    name = "us_idx_eod"
    db_file = os.path.join(cache_dir, "%s.sqlite" % name)
    remove_file(db_file)
    m = pyclient(cube="csvdata_rows", name=name)
    m.objects.drop()

    uri = os.path.join(fixtures, "us-idx-eod.csv")
    m.get_objects(uri=uri, load_kwargs=dict(use_pandas=True))

    assert m.objects
    assert len(m.objects) == 14
    assert m.objects.fields == [
        "__v__",
        "_e",
        "_end",
        "_hash",
        "_id",
        "_oid",
        "_start",
        "_v",
        "close",
        "date",
        "open",
        "symbol",
    ]

    # {u'symbol': u'$AJT', u'date': u'09/08/2008', u'close': 18.15, u'_start':
    # datetime.datetime(2014, 5, 28, 14, 9, 22, 999145), u'open': 17.84,
    # u'_oid': 11, u'_v': 0, u'_end': None, u'_hash':
    # u'76e81838bdde51f693f8a09a2308557a7962aa78', u'__v__': u'0.3.1-1a',
    # u'_e': {}, u'_id': u'11'}
    _ids = m.objects._ids
    _hash = "76e81838bdde51f693f8a09a2308557a7962aa78"
    _oid = 11
    _filtered = m.objects.filter(where={"_oid": _oid})
    print "Object: %s" % _filtered
    assert len(_filtered) == 1
    assert m.objects["11"]["_hash"] == _hash  # check _hash is as expected
    assert m.objects["11"]["symbol"] == "$AJT"
    assert m.objects.upsert() == _ids
    # still there...
    assert m.objects["11"]["symbol"] == "$AJT"

    # persist and remove from container
    assert m.objects.flush() == _ids
    assert m.objects == {}

    objs = m.objects.find("_oid == %s" % _oid, one=True, raw=True, fields="~")
    o = {k: v for k, v in objs.items() if k != "id"}
    _o = dict(_filtered[0])
    # we can't assure float precision is exact as it goes in/out
    # but it should be close...
    assert o["_start"] - _o["_start"] <= 0.1
    # FIXME: ideally, _e would come back out as it went in!
    # not going in as {} but come out as None
    for k in ["_start", "_e"]:
        del o[k]
        del _o[k]
    assert o == _o

    remove_file(db_file)
Ejemplo n.º 15
0
def test_admin():
    m = pyclient(**config)
    m.user_remove(username, quiet=True)  # to be sure it doesn't exist already
    assert m.user_register(username, password)
    m.user_remove(username, quiet=True)  # to be sure it doesn't exist already