def test_resource_directory(): with csvs() as path: r = resource(path) assert type(r) == Directory(CSV) assert r.path.rstrip(os.path.sep) == path.rstrip(os.path.sep) r2 = resource(os.path.join(path, '*.csv')) assert type(r) == Directory(CSV) assert r2.path.rstrip(os.path.sep) == path.rstrip(os.path.sep)
def test_resource_directory(): r = resource('ssh://joe@localhost:/path/to/') assert issubclass(r.subtype, _Directory) r = resource('ssh://joe@localhost:/path/to/*.csv') assert r.subtype == Directory(CSV) assert r.path == '/path/to/'
def test_hdfs_resource(): r = resource('hdfs://user@hostname:1234:/path/to/myfile.json') assert isinstance(r, HDFS(JSONLines)) assert r.hdfs.user_name == 'user' assert r.hdfs.host == 'hostname' assert r.hdfs.port == '1234' assert r.path == '/path/to/myfile.json' assert isinstance(resource('hdfs://path/to/myfile.csv', host='host', user='******', port=1234), HDFS(CSV)) assert isinstance(resource('hdfs://path/to/*.csv', host='host', user='******', port=1234), HDFS(Directory(CSV)))
from into import into, drop, JSONLines import sqlalchemy as sa from datashape import dshape from into.directory import Directory import os host = '' or os.environ.get('HDFS_TEST_HOST') if not host: import pytest pytest.importorskip('does_not_exist') hdfs = PyWebHdfsClient(host=host, port='14000', user_name='hdfs') hdfs_csv= HDFS(CSV)('/user/hive/mrocklin/accounts/accounts.csv', hdfs=hdfs) hdfs_directory = HDFS(Directory(CSV))('/user/hive/mrocklin/accounts/', hdfs=hdfs) ds = dshape('var * {id: ?int64, name: ?string, amount: ?int64}') engine = resource('hive://hdfs@%s:10000/default' % host) def test_discover(): assert discover(hdfs_csv) == \ dshape('var * {id: int64, name: string, amount: int64}') def test_discover_hdfs_directory(): assert discover(hdfs_directory) == \ dshape('var * {id: int64, name: string, amount: int64}') def normalize(s): return ' '.join(s.split())
def test_discover(): with csvs() as path: d = Directory(CSV)(path) assert discover(d) == dshape('var * {a: int64, b: int64}')