def test_hive_resource():
    db = resource('hive://hdfs@%s:10000/default' % host)
    assert isinstance(db, sa.engine.Engine)

    db = resource('hive://%s/' % host)
    assert isinstance(db, sa.engine.Engine)
    assert str(db.url) == 'hive://hdfs@%s:10000/default' % host
Example #2
0
def test_hive_resource():
    db = resource('hive://hdfs@%s:10000/default' % host)
    assert isinstance(db, sa.engine.Engine)

    db = resource('hive://%s/' % host)
    assert isinstance(db, sa.engine.Engine)
    assert str(db.url) == 'hive://hdfs@%s:10000/default' % host
def test_hive_resource_with_internal_external():
    with hive_table(host) as uri:
        r = resource(uri, external=False, stored_as='PARQUET',
                     dshape='var * {name: string, balance: int32}')
        assert isinstance(r, sa.Table)

    with hive_table(host) as uri:
        r = resource(uri, external=False, stored_as='PARQUET')
        assert not isinstance(r, sa.Table)

    with hive_table(host) as uri:
        r = resource(uri, external=True, stored_as='PARQUET')
        assert not isinstance(r, sa.Table)
def test_hdfs_resource():
    r = resource('hdfs://user@hostname:1234:/path/to/myfile.json')
    assert isinstance(r, HDFS(JSONLines))
    assert r.hdfs.user_name == 'user'
    assert r.hdfs.host == 'hostname'
    assert r.hdfs.port == '1234'
    assert r.path == '/path/to/myfile.json'

    assert isinstance(resource('hdfs://path/to/myfile.csv',
                                host='host', user='******', port=1234),
                      HDFS(CSV))
    assert isinstance(resource('hdfs://path/to/*.csv',
                                host='host', user='******', port=1234),
                      HDFS(Directory(CSV)))
Example #5
0
def test_hive_resource_with_internal_external():
    with hive_table(host) as uri:
        r = resource(uri,
                     external=False,
                     stored_as='PARQUET',
                     dshape='var * {name: string, balance: int32}')
        assert isinstance(r, sa.Table)

    with hive_table(host) as uri:
        r = resource(uri, external=False, stored_as='PARQUET')
        assert not isinstance(r, sa.Table)

    with hive_table(host) as uri:
        r = resource(uri, external=True, stored_as='PARQUET')
        assert not isinstance(r, sa.Table)
Example #6
0
def test_hdfs_resource():
    r = resource('hdfs://user@hostname:1234:/path/to/myfile.json')
    assert isinstance(r, HDFS(JSONLines))
    assert r.hdfs.user_name == 'user'
    assert r.hdfs.host == 'hostname'
    assert r.hdfs.port == '1234'
    assert r.path == '/path/to/myfile.json'

    assert isinstance(
        resource('hdfs://path/to/myfile.csv',
                 host='host',
                 user='******',
                 port=1234), HDFS(CSV))
    assert isinstance(
        resource('hdfs://path/to/*.csv', host='host', user='******', port=1234),
        HDFS(Directory(CSV)))
Example #7
0
import pandas as pd
import numpy as np
import uuid
from odo.backends.hdfs import discover, HDFS, CSV, SSH, dialect_of, TableProxy
from odo.backends.sql import resource
from odo.backends.ssh import sftp
from odo import into, drop, JSONLines, odo
from odo.utils import filetext, ignoring, tmpfile
import sqlalchemy as sa
from datashape import dshape
from odo.directory import Directory
from contextlib import contextmanager

hdfs = PyWebHdfsClient(host=host, port='14000', user_name='hdfs')
ds = dshape('var * {id: ?int64, name: ?string, amount: ?int64}')
engine = resource('hive://hdfs@%s:10000/default' % host)

accounts_1_csv = """
id,name,amount
1,Alice,100
2,Bob,200
3,Charlie,300
4,Dan,400
5,Edith,500""".strip()

accounts_2_csv = """
id,name,amount
6,Frank,600
7,George,700
8,Hannah,800
""".strip()
import pandas as pd
import numpy as np
import uuid
from odo.backends.hdfs import discover, HDFS, CSV, SSH, dialect_of, TableProxy
from odo.backends.sql import resource
from odo.backends.ssh import sftp
from odo import into, drop, JSONLines, odo
from odo.utils import filetext, ignoring, tmpfile
import sqlalchemy as sa
from datashape import dshape
from odo.directory import Directory
from contextlib import contextmanager

hdfs = PyWebHdfsClient(host=host, port='14000', user_name='hdfs')
ds = dshape('var * {id: ?int64, name: ?string, amount: ?int64}')
engine = resource('hive://hdfs@%s:10000/default' % host)


accounts_1_csv = """
id,name,amount
1,Alice,100
2,Bob,200
3,Charlie,300
4,Dan,400
5,Edith,500""".strip()

accounts_2_csv = """
id,name,amount
6,Frank,600
7,George,700
8,Hannah,800