def test_resource_directory():
    with csvs() as path:
        r = resource(path)
        assert type(r) == Directory(CSV)
        assert r.path.rstrip(os.path.sep) == path.rstrip(os.path.sep)

        r2 = resource(os.path.join(path, '*.csv'))
        assert type(r2) == Directory(CSV)
        assert r2.path.rstrip(os.path.sep) == path.rstrip(os.path.sep)
Exemple #2
0
def test_dialect_of():
    with filetext(accounts_1_csv) as fn:
        d = dialect_of(CSV(fn))
        assert d['delimiter'] == ','
        assert d['has_header'] is True

    with accounts_data() as (directory, (a, b, c)):
        directory2 = HDFS(Directory(CSV))(directory.path, hdfs=directory.hdfs)
        d = dialect_of(directory2)
        assert d['has_header'] is True

        directory2 = HDFS(Directory(CSV))(directory.path,
                                          hdfs=directory.hdfs,
                                          has_header=False)
        d = dialect_of(directory2)
        assert d['has_header'] is False
Exemple #3
0
def test_resource_directory():
    r = resource('ssh://joe@localhost:/path/to/')
    assert issubclass(r.subtype, _Directory)

    r = resource('ssh://joe@localhost:/path/to/*.csv')
    assert r.subtype == Directory(CSV)
    assert r.path == '/path/to/'
Exemple #4
0
def test_hdfs_resource():
    r = resource('hdfs://user@hostname:1234:/path/to/myfile.json')
    assert isinstance(r, HDFS(JSONLines))
    assert r.hdfs.user_name == 'user'
    assert r.hdfs.host == 'hostname'
    assert r.hdfs.port == '1234'
    assert r.path == '/path/to/myfile.json'

    assert isinstance(
        resource('hdfs://path/to/myfile.csv',
                 host='host',
                 user='******',
                 port=1234), HDFS(CSV))
    assert isinstance(
        resource('hdfs://path/to/*.csv', host='host', user='******', port=1234),
        HDFS(Directory(CSV)))
Exemple #5
0
def accounts_data():
    a = '/user/hive/test/accounts/accounts.1.csv'
    b = '/user/hive/test/accounts/accounts.2.csv'
    c = '/user/hive/test/accounts.3.csv'
    hdfs.make_dir('user/hive/test/accounts')
    hdfs.create_file(a.lstrip('/'), accounts_1_csv)
    hdfs.create_file(b.lstrip('/'), accounts_2_csv)
    hdfs.create_file(c.lstrip('/'), accounts_3_csv)

    A = HDFS(CSV)(a, hdfs=hdfs)
    B = HDFS(CSV)(b, hdfs=hdfs)
    C = HDFS(CSV)(c, hdfs=hdfs)
    directory = HDFS(Directory(CSV))('/user/hive/test/accounts/', hdfs=hdfs)

    try:
        yield (directory, (A, B, C))
    finally:
        hdfs.delete_file_dir(a)
        hdfs.delete_file_dir(b)
        hdfs.delete_file_dir(c)
def test_discover():
    with csvs() as path:
        d = Directory(CSV)(path)
        assert discover(d) == dshape('var * {a: int64, b: int64}')