Ejemplo n.º 1
0
    def get_objects(self,
                    uri,
                    _oid=None,
                    _start=None,
                    _end=None,
                    load_kwargs=None,
                    **kwargs):
        '''
        Load and transform csv data into a list of dictionaries.

        Each row in the csv will result in one dictionary in the list.

        :param uri: uri (file://, http(s)://) of csv file to load
        :param _oid:
            column or func to apply to map _oid in all resulting objects
        :param _start:
            column or func to apply to map _start in all resulting objects
        :param _end:
            column or func to apply to map _end in all resulting objects
        :param kwargs: kwargs to pass to pandas.read_csv method

        _start and _oid arguments can be a column name or a function
        which accepts a single argument -- the row being extracted.

        If either is a column name (string) then that column will be applied
        as _oid for each object generated.

        If either is a function, the function will be applied per each row
        and the result of the function will be assigned to the _start
        or _oid, respectively.
        '''
        load_kwargs = load_kwargs or {}
        objects = load(path=uri, filetype='csv', **load_kwargs)

        k = itertools.count(1)
        now = utcnow()
        __oid = lambda o: k.next()

        _oid = _oid or __oid
        _start = _start or now
        _end = _end or None

        def is_callable(v):
            _v = type(v)
            _ = True if _v is type or hasattr(v, '__call__') else False
            return _

        for obj in objects:
            obj['_oid'] = _oid(obj) if is_callable(_oid) else _oid
            obj['_start'] = _start(obj) if is_callable(_start) else _start
            obj['_end'] = _end(obj) if is_callable(_end) else _end
            self.container.add(obj)

        return super(Rows, self).get_objects(**kwargs)
Ejemplo n.º 2
0
    def get_objects(self, uri, _oid=None, _start=None, _end=None,
                    load_kwargs=None, **kwargs):
        '''
        Load and transform csv data into a list of dictionaries.

        Each row in the csv will result in one dictionary in the list.

        :param uri: uri (file://, http(s)://) of csv file to load
        :param _oid:
            column or func to apply to map _oid in all resulting objects
        :param _start:
            column or func to apply to map _start in all resulting objects
        :param _end:
            column or func to apply to map _end in all resulting objects
        :param kwargs: kwargs to pass to pandas.read_csv method

        _start and _oid arguments can be a column name or a function
        which accepts a single argument -- the row being extracted.

        If either is a column name (string) then that column will be applied
        as _oid for each object generated.

        If either is a function, the function will be applied per each row
        and the result of the function will be assigned to the _start
        or _oid, respectively.
        '''
        load_kwargs = load_kwargs or {}
        objects = load(path=uri, filetype='csv', **load_kwargs)

        k = itertools.count(1)
        now = utcnow()
        __oid = lambda o: k.next()

        _oid = _oid or __oid
        _start = _start or now
        _end = _end or None

        def is_callable(v):
            _v = type(v)
            _ = True if _v is type or hasattr(v, '__call__') else False
            return _

        for obj in objects:
            obj['_oid'] = _oid(obj) if is_callable(_oid) else _oid
            obj['_start'] = _start(obj) if is_callable(_start) else _start
            obj['_end'] = _end(obj) if is_callable(_end) else _end
            self.container.add(obj)

        return super(Rows, self).get_objects(**kwargs)
Ejemplo n.º 3
0
def test_load_json():
    '''

    '''
    from metrique import pyclient
    from metrique.utils import load

    name = 'meps'
    db_file = os.path.join(cache_dir, '%s.sqlite' % name)
    remove_file(db_file)

    def _oid_func(o):
        o['_oid'] = o['id']
        return o

    m = pyclient(name=name)
    m.objects.drop()

    path = os.path.join(fixtures, 'meps.json')
    objects = load(path, _oid=_oid_func, orient='index')

    assert len(objects) == 736

    m.objects.extend(objects)

    assert len(m.objects)

    # {u'phone_stb': u'+33 (0)3 88 1 75224', u'comms': None, u'country':
    # u'Latvia', u'_start': ...
    # u'_oid': 28615, u'name': u'Roberts Z\u012aLE', u'url':
    # u'http://www.europarl.euro...rs/expert/committees/view.do?id=28615',
    # u'_v': 0, u'phone_bxl': u'+32 (0)2 28 45224', u'_end': None, u'_hash':
    # u'e8d2a6943734a80f268d112514040b4707915181', u'__v__': u'0.3.1-1a',
    # u'party': u'European Conservatives and Reformists', u'_e': {}, u'_id':
    # u'28615', u'email': None}
    _hash = 'e8d2a6943734a80f268d112514040b4707915181'
    _filtered = m.objects.filter(where={'_oid': 28615})
    assert len(_filtered) == 1
    print 'Object: %s' % _filtered
    assert _filtered[0]['_hash'] == _hash

    _ids = m.objects.flush()

    assert sorted(_ids) == sorted(map(unicode, [o['_oid'] for o in objects]))
    assert m.objects == {}

    remove_file(db_file)
Ejemplo n.º 4
0
def test_load_json():
    """

    """
    from metrique import pyclient
    from metrique.utils import load

    name = "meps"
    db_file = os.path.join(cache_dir, "%s.sqlite" % name)
    remove_file(db_file)

    def _oid_func(o):
        o["_oid"] = o["id"]
        return o

    m = pyclient(name=name)
    m.objects.drop()

    path = os.path.join(fixtures, "meps.json")
    objects = load(path, _oid=_oid_func, orient="index")

    assert len(objects) == 736

    m.objects.extend(objects)

    assert len(m.objects)

    # {u'phone_stb': u'+33 (0)3 88 1 75224', u'comms': None, u'country':
    # u'Latvia', u'_start': ...
    # u'_oid': 28615, u'name': u'Roberts Z\u012aLE', u'url':
    # u'http://www.europarl.euro...rs/expert/committees/view.do?id=28615',
    # u'_v': 0, u'phone_bxl': u'+32 (0)2 28 45224', u'_end': None, u'_hash':
    # u'e8d2a6943734a80f268d112514040b4707915181', u'__v__': u'0.3.1-1a',
    # u'party': u'European Conservatives and Reformists', u'_e': {}, u'_id':
    # u'28615', u'email': None}
    _hash = "e8d2a6943734a80f268d112514040b4707915181"
    _filtered = m.objects.filter(where={"_oid": 28615})
    assert len(_filtered) == 1
    print "Object: %s" % _filtered
    assert _filtered[0]["_hash"] == _hash

    _ids = m.objects.flush()

    assert sorted(_ids) == sorted(map(unicode, [o["_oid"] for o in objects]))
    assert m.objects == {}

    remove_file(db_file)
Ejemplo n.º 5
0
 def load(*args, **kwargs):
     ''' wrapper for utils.load automated data loader '''
     return load(*args, **kwargs)
Ejemplo n.º 6
0
def test_load():
    from metrique.utils import load
    path_glob = os.path.join(fixtures, 'test*.csv')

    x = load(path_glob, use_pandas=True)
    assert len(x) == 2
    assert 'col_1' in x[0].keys()
    assert 1 in x[0].values()
    assert 100 in x[1].values()

    x = load(path_glob, _oid=True)
    assert '_oid' in x[0].keys()
    assert x[0]['_oid'] == 1
    assert x[1]['_oid'] == 2

    try:
        set_oid_func = 'i am a string, not a func'
        x = load(path_glob, _oid=set_oid_func)
    except TypeError:
        pass
    else:
        assert False

    set_oid_func = lambda o: dict(_oid=42, **o)
    x = load(path_glob, _oid=set_oid_func)
    assert x[0]['_oid'] == 42
    assert x[1]['_oid'] == 42

    # check that we can get a dataframe
    x = load(path_glob, as_df=True)
    assert hasattr(x, 'ix')

    # passing in a dataframe should return back the same dataframe...
    _x = load(x)
    assert _x is x

    try:  # can load only files or dataframes
        load(1)
    except ValueError:
        pass
    else:
        assert False, "Loaded 1"

    empty = os.path.join(fixtures, 'empty.csv')
    try:
        load(empty, header=None, use_pandas=True)
    except ValueError:
        pass
    else:
        assert False

    try:
        load(empty, header=None, use_pandas=False)
    except RuntimeError:
        pass
    else:
        assert False

    header = os.path.join(fixtures, 'header_only.csv')
    try:
        load(header)
    except RuntimeError:
        pass

    try:
        load('DOES_NOT_EXIST')
    except IOError:
        pass
    else:
        assert False, "Loaded DOES_NOT_EXIST"

    # check that we can grab data from the web
    uri = 'https://mysafeinfo.com/api/data?list=days&format=csv'
    x = list(load(uri, filetype='csv'))
    assert len(x) == 7
    x = load(path_glob)
Ejemplo n.º 7
0
 def load(*args, **kwargs):
     ''' wrapper for utils.load automated data loader '''
     return load(*args, **kwargs)
Ejemplo n.º 8
0
def test_load():
    from metrique.utils import load
    path_glob = os.path.join(fixtures, 'test*.csv')

    x = load(path_glob, use_pandas=True)
    assert len(x) == 2
    assert 'col_1' in x[0].keys()
    assert 1 in x[0].values()
    assert 100 in x[1].values()

    x = load(path_glob, _oid=True)
    assert '_oid' in x[0].keys()
    assert x[0]['_oid'] == 1
    assert x[1]['_oid'] == 2

    try:
        set_oid_func = 'i am a string, not a func'
        x = load(path_glob, _oid=set_oid_func)
    except TypeError:
        pass
    else:
        assert False

    set_oid_func = lambda o: dict(_oid=42, **o)
    x = load(path_glob, _oid=set_oid_func)
    assert x[0]['_oid'] == 42
    assert x[1]['_oid'] == 42

    # check that we can get a dataframe
    x = load(path_glob, as_df=True)
    assert hasattr(x, 'ix')

    # passing in a dataframe should return back the same dataframe...
    _x = load(x)
    assert _x is x

    try:  # can load only files or dataframes
        load(1)
    except ValueError:
        pass
    else:
        assert False, "Loaded 1"

    empty = os.path.join(fixtures, 'empty.csv')
    try:
        load(empty, header=None, use_pandas=True)
    except ValueError:
        pass
    else:
        assert False

    try:
        load(empty, header=None, use_pandas=False)
    except RuntimeError:
        pass
    else:
        assert False

    header = os.path.join(fixtures, 'header_only.csv')
    try:
        load(header)
    except RuntimeError:
        pass

    try:
        load('DOES_NOT_EXIST')
    except IOError:
        pass
    else:
        assert False, "Loaded DOES_NOT_EXIST"

    # check that we can grab data from the web
    uri = 'https://mysafeinfo.com/api/data?list=days&format=csv'
    x = list(load(uri, filetype='csv'))
    assert len(x) == 7
    x = load(path_glob)