def test_resource_directory(): with csvs() as path: r = resource(path) assert type(r) == Directory(CSV) assert r.path.rstrip(os.path.sep) == path.rstrip(os.path.sep) r2 = resource(os.path.join(path, '*.csv')) assert type(r2) == Directory(CSV) assert r2.path.rstrip(os.path.sep) == path.rstrip(os.path.sep)
def test_dialect_of(): with filetext(accounts_1_csv) as fn: d = dialect_of(CSV(fn)) assert d['delimiter'] == ',' assert d['has_header'] is True with accounts_data() as (directory, (a, b, c)): directory2 = HDFS(Directory(CSV))(directory.path, hdfs=directory.hdfs) d = dialect_of(directory2) assert d['has_header'] is True directory2 = HDFS(Directory(CSV))(directory.path, hdfs=directory.hdfs, has_header=False) d = dialect_of(directory2) assert d['has_header'] is False
def test_resource_directory(): r = resource('ssh://joe@localhost:/path/to/') assert issubclass(r.subtype, _Directory) r = resource('ssh://joe@localhost:/path/to/*.csv') assert r.subtype == Directory(CSV) assert r.path == '/path/to/'
def test_hdfs_resource(): r = resource('hdfs://user@hostname:1234:/path/to/myfile.json') assert isinstance(r, HDFS(JSONLines)) assert r.hdfs.user_name == 'user' assert r.hdfs.host == 'hostname' assert r.hdfs.port == '1234' assert r.path == '/path/to/myfile.json' assert isinstance( resource('hdfs://path/to/myfile.csv', host='host', user='******', port=1234), HDFS(CSV)) assert isinstance( resource('hdfs://path/to/*.csv', host='host', user='******', port=1234), HDFS(Directory(CSV)))
def accounts_data(): a = '/user/hive/test/accounts/accounts.1.csv' b = '/user/hive/test/accounts/accounts.2.csv' c = '/user/hive/test/accounts.3.csv' hdfs.make_dir('user/hive/test/accounts') hdfs.create_file(a.lstrip('/'), accounts_1_csv) hdfs.create_file(b.lstrip('/'), accounts_2_csv) hdfs.create_file(c.lstrip('/'), accounts_3_csv) A = HDFS(CSV)(a, hdfs=hdfs) B = HDFS(CSV)(b, hdfs=hdfs) C = HDFS(CSV)(c, hdfs=hdfs) directory = HDFS(Directory(CSV))('/user/hive/test/accounts/', hdfs=hdfs) try: yield (directory, (A, B, C)) finally: hdfs.delete_file_dir(a) hdfs.delete_file_dir(b) hdfs.delete_file_dir(c)
def test_discover(): with csvs() as path: d = Directory(CSV)(path) assert discover(d) == dshape('var * {a: int64, b: int64}')