def test_hive_resource(): db = resource('hive://hdfs@%s:10000/default' % host) assert isinstance(db, sa.engine.Engine) db = resource('hive://%s/' % host) assert isinstance(db, sa.engine.Engine) assert str(db.url) == 'hive://hdfs@%s:10000/default' % host
def test_hive_resource_with_internal_external(): with hive_table(host) as uri: r = resource(uri, external=False, stored_as='PARQUET', dshape='var * {name: string, balance: int32}') assert isinstance(r, sa.Table) with hive_table(host) as uri: r = resource(uri, external=False, stored_as='PARQUET') assert not isinstance(r, sa.Table) with hive_table(host) as uri: r = resource(uri, external=True, stored_as='PARQUET') assert not isinstance(r, sa.Table)
def test_hdfs_resource(): r = resource('hdfs://user@hostname:1234:/path/to/myfile.json') assert isinstance(r, HDFS(JSONLines)) assert r.hdfs.user_name == 'user' assert r.hdfs.host == 'hostname' assert r.hdfs.port == '1234' assert r.path == '/path/to/myfile.json' assert isinstance(resource('hdfs://path/to/myfile.csv', host='host', user='******', port=1234), HDFS(CSV)) assert isinstance(resource('hdfs://path/to/*.csv', host='host', user='******', port=1234), HDFS(Directory(CSV)))
def test_hdfs_resource(): r = resource('hdfs://user@hostname:1234:/path/to/myfile.json') assert isinstance(r, HDFS(JSONLines)) assert r.hdfs.user_name == 'user' assert r.hdfs.host == 'hostname' assert r.hdfs.port == '1234' assert r.path == '/path/to/myfile.json' assert isinstance( resource('hdfs://path/to/myfile.csv', host='host', user='******', port=1234), HDFS(CSV)) assert isinstance( resource('hdfs://path/to/*.csv', host='host', user='******', port=1234), HDFS(Directory(CSV)))
import pandas as pd import numpy as np import uuid from odo.backends.hdfs import discover, HDFS, CSV, SSH, dialect_of, TableProxy from odo.backends.sql import resource from odo.backends.ssh import sftp from odo import into, drop, JSONLines, odo from odo.utils import filetext, ignoring, tmpfile import sqlalchemy as sa from datashape import dshape from odo.directory import Directory from contextlib import contextmanager hdfs = PyWebHdfsClient(host=host, port='14000', user_name='hdfs') ds = dshape('var * {id: ?int64, name: ?string, amount: ?int64}') engine = resource('hive://hdfs@%s:10000/default' % host) accounts_1_csv = """ id,name,amount 1,Alice,100 2,Bob,200 3,Charlie,300 4,Dan,400 5,Edith,500""".strip() accounts_2_csv = """ id,name,amount 6,Frank,600 7,George,700 8,Hannah,800 """.strip()
import pandas as pd import numpy as np import uuid from odo.backends.hdfs import discover, HDFS, CSV, SSH, dialect_of, TableProxy from odo.backends.sql import resource from odo.backends.ssh import sftp from odo import into, drop, JSONLines, odo from odo.utils import filetext, ignoring, tmpfile import sqlalchemy as sa from datashape import dshape from odo.directory import Directory from contextlib import contextmanager hdfs = PyWebHdfsClient(host=host, port='14000', user_name='hdfs') ds = dshape('var * {id: ?int64, name: ?string, amount: ?int64}') engine = resource('hive://hdfs@%s:10000/default' % host) accounts_1_csv = """ id,name,amount 1,Alice,100 2,Bob,200 3,Charlie,300 4,Dan,400 5,Edith,500""".strip() accounts_2_csv = """ id,name,amount 6,Frank,600 7,George,700 8,Hannah,800