コード例 #1
0
def make_connection():
    print(ENV)
    ic = ibis.impala_connect(host=ENV.impala_host, port=ENV.impala_port,
                             protocol=ENV.impala_protocol)
    hdfs = ibis.hdfs_connect(host=ENV.nn_host, port=ENV.webhdfs_port)
    client = ibis.make_client(ic, hdfs_client=hdfs)
    return client
コード例 #2
0
    def test_create_table_with_location(self):
        base = pjoin(self.tmp_dir, util.guid())
        name = 'test_{0}'.format(util.guid())
        tmp_path = pjoin(base, name)

        # impala user has trouble writing to jenkins-owned dir so here we give
        # the tmp dir 777
        superuser_hdfs = ibis.hdfs_connect(host=ENV.nn_host,
                                           port=ENV.webhdfs_port,
                                           auth_mechanism=ENV.auth_mechanism,
                                           verify=(ENV.auth_mechanism
                                                   not in ['GSSAPI', 'LDAP']),
                                           user=ENV.hdfs_superuser)
        superuser_hdfs.mkdir(base)
        superuser_hdfs.chmod(base, '777')

        expr = self.alltypes
        table_name = _random_table_name()

        self.con.create_table(table_name,
                              expr=expr,
                              path=tmp_path,
                              database=self.test_data_db)
        self.temp_tables.append('.'.join([self.test_data_db, table_name]))
        assert self.hdfs.exists(tmp_path)
コード例 #3
0
def get_database_sqlalchemy_conn(database_type,
                                 host,
                                 port,
                                 database,
                                 user=None,
                                 password=None,
                                 **kwargs):
    if database_type == 'greenplum':
        conn = sqlalchemy.create_engine(
            'postgresql+psycopg2://{}:{}@{}:{}/{}'.format(
                user, password, host, port, database))
    elif database_type == 'mysql':
        conn = sqlalchemy.create_engine(
            'mysql+pymysql://{}:{}@{}:{}/{}'.format(user, password, host, port,
                                                    database))
    elif database_type == 'hive':
        hdfs_host = kwargs.get('hdfs_host', host)
        hdfs_port = kwargs.get('hdfs_port', 50070)
        auth_mechanism = kwargs.get('hive_auth_mechanism', 'PLAIN')

        hdfs_client = ibis.hdfs_connect(host=hdfs_host, port=hdfs_port)
        conn = connect(host,
                       port,
                       auth_mechanism=auth_mechanism,
                       database=database,
                       hdfs_client=hdfs_client,
                       user=user,
                       password=password)
    else:
        return None
    return conn
コード例 #4
0
def hdfs_client(env):
    return ibis.hdfs_connect(
        host=env.nn_host,
        port=int(env.webhdfs_port),
        auth_mechanism=env.auth_mechanism,
        user=env.webhdfs_user,
    )
コード例 #5
0
def make_connection():
    print(ENV)
    ic = ibis.impala_connect(host=ENV.impala_host,
                             port=ENV.impala_port,
                             protocol=ENV.impala_protocol)
    hdfs = ibis.hdfs_connect(host=ENV.nn_host, port=ENV.webhdfs_port)
    client = ibis.make_client(ic, hdfs_client=hdfs)
    return client
コード例 #6
0
def make_ibis_client():
    hc = ibis.hdfs_connect(host=ENV.nn_host, port=ENV.webhdfs_port,
                           auth_mechanism=ENV.auth_mechanism,
                           verify=(ENV.auth_mechanism
                                   not in ['GSSAPI', 'LDAP']))
    if ENV.auth_mechanism in ['GSSAPI', 'LDAP']:
        print("Warning: ignoring invalid Certificate Authority errors")
    return ibis.impala.connect(host=ENV.impala_host, port=ENV.impala_port,
                               auth_mechanism=ENV.auth_mechanism,
                               hdfs_client=hc)
コード例 #7
0
ファイル: test_filesystems.py プロジェクト: zuxfoucault/ibis
 def setUpClass(cls):
     cls.ENV = ENV
     cls.tmp_dir = pjoin(cls.ENV.tmp_dir, util.guid())
     if cls.ENV.use_kerberos:
         print("Warning: ignoring invalid Certificate Authority errors")
     cls.hdfs = ibis.hdfs_connect(host=cls.ENV.nn_host,
                                  port=cls.ENV.webhdfs_port,
                                  use_kerberos=cls.ENV.use_kerberos,
                                  verify=(not cls.ENV.use_kerberos))
     cls.hdfs.mkdir(cls.tmp_dir)
コード例 #8
0
ファイル: test_filesystems.py プロジェクト: raderaj/ibis
 def setUpClass(cls):
     cls.ENV = ENV
     cls.tmp_dir = pjoin(cls.ENV.tmp_dir, util.guid())
     if cls.ENV.use_kerberos:
         print("Warning: ignoring invalid Certificate Authority errors")
     cls.hdfs = ibis.hdfs_connect(host=cls.ENV.nn_host,
                                  port=cls.ENV.webhdfs_port,
                                  use_kerberos=cls.ENV.use_kerberos,
                                  verify=(not cls.ENV.use_kerberos))
     cls.hdfs.mkdir(cls.tmp_dir)
コード例 #9
0
ファイル: test_data_admin.py プロジェクト: zuxfoucault/ibis
def make_ibis_client():
    ic = ibis.impala.connect(host=ENV.impala_host, port=ENV.impala_port,
                             protocol=ENV.impala_protocol,
                             use_kerberos=ENV.use_kerberos)
    if ENV.use_kerberos:
        print("Warning: ignoring invalid Certificate Authority errors")
    hc = ibis.hdfs_connect(host=ENV.nn_host, port=ENV.webhdfs_port,
                           use_kerberos=ENV.use_kerberos,
                           verify=(not ENV.use_kerberos))
    return ibis.make_client(ic, hdfs_client=hc)
コード例 #10
0
def make_connection():
    ic = ibis.impala_connect(host=ENV.impala_host, port=ENV.impala_port,
                             protocol=ENV.impala_protocol,
                             use_kerberos=ENV.use_kerberos)
    if ENV.use_kerberos:
        print("Warning: ignoring invalid Certificate Authority errors")
    hdfs = ibis.hdfs_connect(host=ENV.nn_host, port=ENV.webhdfs_port,
                             use_kerberos=ENV.use_kerberos,
                             verify=(not ENV.use_kerberos))
    return ibis.make_client(ic, hdfs_client=hdfs)
コード例 #11
0
ファイル: test_data_admin.py プロジェクト: raincoatrun/ibis
def make_ibis_client():
    hc = ibis.hdfs_connect(host=ENV.nn_host, port=ENV.webhdfs_port,
                           auth_mechanism=ENV.auth_mechanism,
                           verify=(ENV.auth_mechanism
                                   not in ['GSSAPI', 'LDAP']))
    if ENV.auth_mechanism in ['GSSAPI', 'LDAP']:
        print("Warning: ignoring invalid Certificate Authority errors")
    return ibis.impala.connect(host=ENV.impala_host, port=ENV.impala_port,
                               auth_mechanism=ENV.auth_mechanism,
                               hdfs_client=hc)
コード例 #12
0
ファイル: test_filesystems.py プロジェクト: BabelTower/ibis
 def setUpClass(cls):
     cls.ENV = ENV
     cls.tmp_dir = pjoin(cls.ENV.tmp_dir, util.guid())
     if cls.ENV.auth_mechanism in ['GSSAPI', 'LDAP']:
         print("Warning: ignoring invalid Certificate Authority errors")
     cls.hdfs = ibis.hdfs_connect(host=cls.ENV.nn_host,
                                  port=cls.ENV.webhdfs_port,
                                  auth_mechanism=cls.ENV.auth_mechanism,
                                  verify=(cls.ENV.auth_mechanism
                                          not in ['GSSAPI', 'LDAP']))
     cls.hdfs.mkdir(cls.tmp_dir)
コード例 #13
0
ファイル: test_filesystems.py プロジェクト: zuxfoucault/ibis
 def setUpClass(cls):
     cls.ENV = ENV
     cls.tmp_dir = pjoin(cls.ENV.tmp_dir, util.guid())
     if cls.ENV.use_kerberos:
         print("Warning: ignoring invalid Certificate Authority errors")
     # NOTE: specifying superuser as set in IbisTestEnv
     cls.hdfs = ibis.hdfs_connect(host=cls.ENV.nn_host,
                                  port=cls.ENV.webhdfs_port,
                                  use_kerberos=cls.ENV.use_kerberos,
                                  verify=(not cls.ENV.use_kerberos),
                                  user=cls.ENV.hdfs_superuser)
     cls.hdfs.mkdir(cls.tmp_dir)
コード例 #14
0
 def setUpClass(cls):
     cls.ENV = ENV
     cls.tmp_dir = pjoin(cls.ENV.tmp_dir, util.guid())
     if cls.ENV.auth_mechanism in ['GSSAPI', 'LDAP']:
         print("Warning: ignoring invalid Certificate Authority errors")
     cls.hdfs = ibis.hdfs_connect(host=cls.ENV.nn_host,
                                  port=cls.ENV.webhdfs_port,
                                  auth_mechanism=cls.ENV.auth_mechanism,
                                  verify=(cls.ENV.auth_mechanism
                                          not in ['GSSAPI', 'LDAP']),
                                  user=cls.ENV.webhdfs_user)
     cls.hdfs.mkdir(cls.tmp_dir)
コード例 #15
0
ファイル: test_filesystems.py プロジェクト: zuxfoucault/ibis
 def setUpClass(cls):
     cls.ENV = ENV
     cls.tmp_dir = pjoin(cls.ENV.tmp_dir, util.guid())
     if cls.ENV.use_kerberos:
         print("Warning: ignoring invalid Certificate Authority errors")
     # NOTE: specifying superuser as set in IbisTestEnv
     cls.hdfs = ibis.hdfs_connect(host=cls.ENV.nn_host,
                                  port=cls.ENV.webhdfs_port,
                                  use_kerberos=cls.ENV.use_kerberos,
                                  verify=(not cls.ENV.use_kerberos),
                                  user=cls.ENV.hdfs_superuser)
     cls.hdfs.mkdir(cls.tmp_dir)
コード例 #16
0
ファイル: common.py プロジェクト: cloudorn/ibis
 def _create_777_tmp_dir(cls):
     base = pjoin(cls.tmp_dir, util.guid())
     tmp_path = pjoin(base, util.guid())
     env = IbisTestEnv()
     superuser_hdfs = ibis.hdfs_connect(host=env.nn_host,
                                        port=env.webhdfs_port,
                                        auth_mechanism=env.auth_mechanism,
                                        verify=(env.auth_mechanism
                                                not in ['GSSAPI', 'LDAP']),
                                        user=env.hdfs_superuser)
     superuser_hdfs.mkdir(base)
     superuser_hdfs.chmod(base, '777')
     return tmp_path
コード例 #17
0
 def _create_777_tmp_dir(cls):
     base = pjoin(cls.tmp_dir, util.guid())
     tmp_path = pjoin(base, util.guid())
     env = IbisTestEnv()
     superuser_hdfs = ibis.hdfs_connect(host=env.nn_host,
                                        port=env.webhdfs_port,
                                        auth_mechanism=env.auth_mechanism,
                                        verify=(env.auth_mechanism
                                                not in ['GSSAPI', 'LDAP']),
                                        user=env.hdfs_superuser)
     superuser_hdfs.mkdir(base)
     superuser_hdfs.chmod(base, '777')
     return tmp_path
コード例 #18
0
ファイル: impalamgr.py プロジェクト: sanjc/ibis
def make_ibis_client(env):
    hc = ibis.hdfs_connect(host=env.nn_host,
                           port=env.webhdfs_port,
                           auth_mechanism=env.auth_mechanism,
                           verify=env.auth_mechanism not in ['GSSAPI', 'LDAP'],
                           user=env.webhdfs_user)
    auth_mechanism = env.auth_mechanism
    if auth_mechanism == 'GSSAPI' or auth_mechanism == 'LDAP':
        logger.warning('Ignoring invalid Certificate Authority errors')
    return ibis.impala.connect(host=env.impala_host,
                               port=env.impala_port,
                               auth_mechanism=env.auth_mechanism,
                               hdfs_client=hc,
                               pool_size=16)
コード例 #19
0
    def setUpClass(cls):
        from ibis.backends.impala.tests.conftest import IbisTestEnv

        cls.ENV = IbisTestEnv()
        cls.tmp_dir = pjoin(cls.ENV.tmp_dir, util.guid())
        if cls.ENV.auth_mechanism in ['GSSAPI', 'LDAP']:
            print("Warning: ignoring invalid Certificate Authority errors")
        cls.hdfs = ibis.hdfs_connect(
            host=cls.ENV.nn_host,
            port=cls.ENV.webhdfs_port,
            auth_mechanism=cls.ENV.auth_mechanism,
            verify=(cls.ENV.auth_mechanism not in ['GSSAPI', 'LDAP']),
            user=cls.ENV.webhdfs_user,
        )
        cls.hdfs.mkdir(cls.tmp_dir)
コード例 #20
0
 def __init__(self,
              table_csv="newhouselog_csv",
              table="newhouselog",
              local_path=HIVE_NEWHOUSELOG_CSV_PATH,
              hive_path=HIVE_SERVER_NEWHOUSELOG_CSV_PATH):
     self.table_csv = table_csv
     self.table = table
     self.local_path = local_path
     self.hive_path = hive_path
     self.hdfs = ibis.hdfs_connect(host=HIVE_URL, port=HIVE_PORT)
     self.client = ibis.impala.connect(host=HIVE_URL,
                                       database='user_track',
                                       hdfs_client=self.hdfs)
     conn = connect(host=HIVE_URL)
     self.cursor = conn.cursor()
コード例 #21
0
ファイル: backends.py プロジェクト: shshe/ibis
 def connect(cls, data_directory):
     env = ImpalaEnv()
     hdfs_client = ibis.hdfs_connect(host=env.nn_host,
                                     port=env.webhdfs_port,
                                     auth_mechanism=env.auth_mechanism,
                                     verify=env.auth_mechanism
                                     not in ['GSSAPI', 'LDAP'],
                                     user=env.webhdfs_user)
     auth_mechanism = env.auth_mechanism
     if auth_mechanism == 'GSSAPI' or auth_mechanism == 'LDAP':
         print("Warning: ignoring invalid Certificate Authority errors")
     return ibis.impala.connect(host=env.impala_host,
                                port=env.impala_port,
                                auth_mechanism=env.auth_mechanism,
                                hdfs_client=hdfs_client,
                                database='ibis_testing')
コード例 #22
0
ファイル: conftest.py プロジェクト: sanjc/ibis
def hdfs(env, tmp_dir):
    pytest.importorskip('requests')

    if env.auth_mechanism in {'GSSAPI', 'LDAP'}:
        warnings.warn("Ignoring invalid Certificate Authority errors")

    client = ibis.hdfs_connect(host=env.nn_host,
                               port=env.webhdfs_port,
                               auth_mechanism=env.auth_mechanism,
                               verify=env.auth_mechanism
                               not in {'GSSAPI', 'LDAP'},
                               user=env.webhdfs_user)

    if not client.exists(tmp_dir):
        client.mkdir(tmp_dir)
    client.chmod(tmp_dir, '777')
    return client
コード例 #23
0
def connect_test(env, with_hdfs=True):
    con = ibis.impala_connect(host=env.impala_host,
                              protocol=env.impala_protocol,
                              database=env.test_data_db,
                              port=env.impala_port,
                              use_kerberos=env.use_kerberos,
                              pool_size=2)
    if with_hdfs:
        if env.use_kerberos:
            print("Warning: ignoring invalid Certificate Authority errors")
        hdfs_client = ibis.hdfs_connect(host=env.nn_host,
                                        port=env.webhdfs_port,
                                        use_kerberos=env.use_kerberos,
                                        verify=(not env.use_kerberos))
    else:
        hdfs_client = None
    return ibis.make_client(con, hdfs_client)
コード例 #24
0
ファイル: util.py プロジェクト: zuxfoucault/ibis
def connect_test(env, with_hdfs=True):
    con = ibis.impala_connect(host=env.impala_host,
                              protocol=env.impala_protocol,
                              database=env.test_data_db,
                              port=env.impala_port,
                              use_kerberos=env.use_kerberos,
                              pool_size=2)
    if with_hdfs:
        if env.use_kerberos:
            print("Warning: ignoring invalid Certificate Authority errors")
        hdfs_client = ibis.hdfs_connect(host=env.nn_host,
                                        port=env.webhdfs_port,
                                        use_kerberos=env.use_kerberos,
                                        verify=(not env.use_kerberos))
    else:
        hdfs_client = None
    return ibis.make_client(con, hdfs_client)
コード例 #25
0
ファイル: impalamgr.py プロジェクト: cloudera/ibis
def make_ibis_client(env):
    hc = ibis.hdfs_connect(
        host=env.nn_host,
        port=env.webhdfs_port,
        auth_mechanism=env.auth_mechanism,
        verify=env.auth_mechanism not in ['GSSAPI', 'LDAP'],
        user=env.webhdfs_user,
    )
    auth_mechanism = env.auth_mechanism
    if auth_mechanism == 'GSSAPI' or auth_mechanism == 'LDAP':
        logger.warning('Ignoring invalid Certificate Authority errors')
    return ibis.impala.connect(
        host=env.impala_host,
        port=env.impala_port,
        auth_mechanism=env.auth_mechanism,
        hdfs_client=hc,
        pool_size=16,
    )
コード例 #26
0
ファイル: common.py プロジェクト: BabelTower/ibis
def connect_test(env, with_hdfs=True):
    if with_hdfs:
        if env.auth_mechanism in ['GSSAPI', 'LDAP']:
            print("Warning: ignoring invalid Certificate Authority errors")
        hdfs_client = ibis.hdfs_connect(host=env.nn_host,
                                        port=env.webhdfs_port,
                                        auth_mechanism=env.auth_mechanism,
                                        verify=(env.auth_mechanism
                                                not in ['GSSAPI', 'LDAP']))
    else:
        hdfs_client = None

    return ibis.impala.connect(host=env.impala_host,
                               database=env.test_data_db,
                               port=env.impala_port,
                               auth_mechanism=env.auth_mechanism,
                               pool_size=2,
                               hdfs_client=hdfs_client)
コード例 #27
0
ファイル: common.py プロジェクト: laserson/ibis
def connect_test(env, with_hdfs=True):
    if with_hdfs:
        if env.auth_mechanism in ['GSSAPI', 'LDAP']:
            print("Warning: ignoring invalid Certificate Authority errors")
        hdfs_client = ibis.hdfs_connect(host=env.nn_host,
                                        port=env.webhdfs_port,
                                        auth_mechanism=env.auth_mechanism,
                                        verify=(env.auth_mechanism
                                                not in ['GSSAPI', 'LDAP']))
    else:
        hdfs_client = None

    return ibis.impala.connect(host=env.impala_host,
                               database=env.test_data_db,
                               port=env.impala_port,
                               auth_mechanism=env.auth_mechanism,
                               pool_size=2,
                               hdfs_client=hdfs_client)
コード例 #28
0
ファイル: conftest.py プロジェクト: cloudera/ibis
def hdfs(env, tmp_dir):
    pytest.importorskip('requests')

    if env.auth_mechanism in {'GSSAPI', 'LDAP'}:
        warnings.warn("Ignoring invalid Certificate Authority errors")

    client = ibis.hdfs_connect(
        host=env.nn_host,
        port=env.webhdfs_port,
        auth_mechanism=env.auth_mechanism,
        verify=env.auth_mechanism not in {'GSSAPI', 'LDAP'},
        user=env.webhdfs_user,
    )

    if not client.exists(tmp_dir):
        client.mkdir(tmp_dir)
    client.chmod(tmp_dir, '777')
    return client
コード例 #29
0
ファイル: impala.py プロジェクト: shubhampachori12110095/ibis
    def connect(cls, module):
        hc = ibis.hdfs_connect(
            host=ENV.nn_host,
            port=ENV.webhdfs_port,
            auth_mechanism=ENV.auth_mechanism,
            verify=ENV.auth_mechanism not in ['GSSAPI', 'LDAP'],
            user=ENV.webhdfs_user
        )

        auth_mechanism = ENV.auth_mechanism
        if auth_mechanism == 'GSSAPI' or auth_mechanism == 'LDAP':
            print("Warning: ignoring invalid Certificate Authority errors")

        return module.connect(
            host=ENV.impala_host,
            port=ENV.impala_port,
            auth_mechanism=ENV.auth_mechanism,
            hdfs_client=hc,
            database='ibis_testing'
        )
コード例 #30
0
    def connect(data_directory: Path) -> ibis.client.Client:
        from ibis.backends.impala.tests.conftest import IbisTestEnv

        env = IbisTestEnv()
        hdfs_client = ibis.hdfs_connect(
            host=env.nn_host,
            port=env.webhdfs_port,
            auth_mechanism=env.auth_mechanism,
            verify=env.auth_mechanism not in ['GSSAPI', 'LDAP'],
            user=env.webhdfs_user,
        )
        auth_mechanism = env.auth_mechanism
        if auth_mechanism == 'GSSAPI' or auth_mechanism == 'LDAP':
            print("Warning: ignoring invalid Certificate Authority errors")
        return ibis.impala.connect(
            host=env.impala_host,
            port=env.impala_port,
            auth_mechanism=env.auth_mechanism,
            hdfs_client=hdfs_client,
            database='ibis_testing',
        )
コード例 #31
0
ファイル: test_ddl.py プロジェクト: koverholt/ibis
    def test_create_table_with_location(self):
        base = pjoin(self.tmp_dir, util.guid())
        name = 'test_{0}'.format(util.guid())
        tmp_path = pjoin(base, name)

        # impala user has trouble writing to jenkins-owned dir so here we give
        # the tmp dir 777
        superuser_hdfs = ibis.hdfs_connect(host=ENV.nn_host,
                                           port=ENV.webhdfs_port,
                                           auth_mechanism=ENV.auth_mechanism,
                                           verify=(ENV.auth_mechanism
                                                   not in ['GSSAPI', 'LDAP']),
                                           user=ENV.hdfs_superuser)
        superuser_hdfs.mkdir(base)
        superuser_hdfs.chmod(base, '777')

        expr = self.alltypes
        table_name = _random_table_name()

        self.con.create_table(table_name, obj=expr, location=tmp_path,
                              database=self.test_data_db)
        self.temp_tables.append('.'.join([self.test_data_db, table_name]))
        assert self.hdfs.exists(tmp_path)
コード例 #32
0
# Note: must run 4.0_sparklyr.R to create the airlinse database

import ibis
import os
ibis.options.interactive = True
#host where httpfs lives
# different parameters if using security
hdfs_host = os.getenv('HDFS_HOST', 'ip-10-0-0-99.us-west-2.compute.internal')
hdfs = ibis.hdfs_connect(host=hdfs_host,
                         auth_mechanism='PLAIN',
                         verify=False,
                         use_https=False)
hdfs.ls('/tmp')
#host where impala daemon lives
# impala_host = os.getenv('IMPALA_HOST', 'se-central-cdsw-cluster-2.vpc.cloudera.com')
impala_host = os.getenv('IMPALA_HOST',
                        'ip-10-0-0-154.us-west-2.compute.internal')
con = ibis.impala.connect('ip-10-0-0-154.us-west-2.compute.internal',
                          hdfs_client=hdfs,
                          database='flights')
#con = ibis.impala.connect(host=impala_host, port=21050,
#                          database='flights', hdfs_client=hdfs,
#                          auth_mechanism='PLAIN', use_ssl=False)
con.list_tables()
import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt
airlines = con.table('airlines_bi_pq')
airlines.limit(10).execute()
airports = con.table('airports')
airports.limit(10).execute()
コード例 #33
0
ファイル: test11.py プロジェクト: wingzero321/py_code
# test.py

# from impala.dbapi import connect
# conn = connect(host='10.0.0.228', port=21080)
# cursor = conn.cursor()
# cursor.execute('SELECT * FROM mytable LIMIT 100')
# print cursor.description # prints the result set's schema
# results = cursor.fetchall()



# from pyhive import presto
# cursor = presto.connect('10.0.0.228',21080).cursor()
# cursor.execute('SELECT * FROM my_awesome_data LIMIT 10')
# print cursor.fetchone()
# print cursor.fetchall()


import ibis

impala_host ='10.0.0.228'
impala_port = 21000

webhdfs_host = '10.0.0.227'
webhdfs_port = 50010

hdfs = ibis.hdfs_connect(host=webhdfs_host, port=webhdfs_port)
con = ibis.impala.connect(host=impala_host, port=impala_port,
                          hdfs_client=hdfs)
コード例 #34
0
ファイル: impala_api.py プロジェクト: Data-drone/dash_testing
import hdfs
import os

# Testing impala connection
impala_host = os.environ['IMPALA_HOST']
impala_port = int(os.environ['IMPALA_PORT'])
webhdfs_host = os.environ['WEBHDFS_HOST']
webhdfs_port = int(os.environ['WEBHDFS_PORT'])

# should replace with env var?
#webhdfs_host = 'ec2-54-66-248-84.ap-southeast-2.compute.amazonaws.com'
#webhdfs_port = 9870
#impala_host = 'ec2-54-66-248-84.ap-southeast-2.compute.amazonaws.com'
#impala_port = 21050

hdfs = ibis.hdfs_connect(host=webhdfs_host, port=webhdfs_port)
client = ibis.impala.connect(host=impala_host,
                             port=impala_port,
                             hdfs_client=hdfs)
db = client.database('default')


def get_wind_data(start: int, end: int) -> pd.DataFrame:

    table = db.wind
    filtered = table.filter([table.rowid > start, table.rowid < end])
    df = filtered['speed', 'speederror', 'direction']

    return df.execute()

コード例 #35
0
#
# ## Interactive Mode
# Ibis also allows and interactive mode that automatically executes all
# expressions. This can be useful in a notebook or repl. I personally prefer
# to epxlicitly execute expresssions, but this is a personal preference.
# If you use the interactive mode, I recommnd setting the defaultlimit low to
# prevent accidentally trying to return an unreasonable number of rows to your
# local process. To safely turn on interactive mode, you would run somehting
# like the two commands:
#
# ibis.options.sql.default_limit = 10
# ibis.options.interactive = True

ibis.options.sql.default_limit = None

hdfs_conn = ibis.hdfs_connect(host='')
ibis_conn = ibis.impala.connect(host='', hdfs_client=hdfs_conn)

pageviews_tbl = ibis_conn.table('wiki_pageviews', database='u_juliet')

# What is in a project name? What does this data look like?

project_names_expr = pageviews_tbl.project_name.distinct()
project_names = ibis_conn.execute(project_names_expr)
project_names

# From the data docs, we know that the post fixes have the following meanings:
#
#     wikibooks: ".b"
#     wiktionary: ".d"
#     wikimedia: ".m"
コード例 #36
0
ファイル: test_hive.py プロジェクト: bbonnin/saagie
# ====== Ibis conf (pour contournement d'un bug) ======
with ibis.config.config_prefix('impala'):
    ibis.config.set_option('temp_db', '`__ibis_tmp`')

# ====== Connexion ======
# Connecting to Hive by providing Hive host ip and port (10000 by default) and a Webhdfs client
# - Pour ajouter les valeurs de ces variables d'env. dans la plateforme Saagie:
#    - allez sur la manager, choisissez votre plateforme
#    - pour avoir la valeur de IP_HDFS:  cliquez sur HDFS: dans le panneau qui s'ouvre,
#      notez la valeur de l'IP dans la section WebHDFS
#    - pour avoir la valeur de IP_HIVE: cliquez sur Hive: dans le panneau qui s'ouvre,
#      notez la valeur de l'IP dans la section HiveServer2
#    - allez dans les Settings, ajoutez ces variables d'env.

hdfs = ibis.hdfs_connect(host=os.environ['IP_HDFS'], port=50070)

client = ibis.impala.connect(host=os.environ['IP_HIVE'],
                             port=10000,
                             hdfs_client=hdfs,
                             user='******',
                             password='******',
                             auth_mechanism='PLAIN')

# ====== Ecriture dans la table ======
# Creation d'une simple DataFrame pandas with 2 colonnes
liste_hello = ['hello1', 'hello2']
liste_world = ['world1', 'world2']
df = pd.DataFrame(data={'hello': liste_hello, 'world': liste_world})

# Ecriture de la dataframe dans Hive si la table n'existe pas
コード例 #37
0
import ibis
import os
ibis.options.interactive = True

#host where httpfs lives
hdfs_host = os.getenv('HDFS_HOST', 'ip-10-0-0-168.us-west-2.compute.internal')
hdfs = ibis.hdfs_connect(host=hdfs_host,
                         port=14000,
                         auth_mechanism='GSSAPI',
                         verify=True,
                         use_https=False)
hdfs.ls('/tmp')

#host where impala daemon lives
impala_host = os.getenv('IMPALA_HOST',
                        'ip-10-0-0-150.us-west-2.compute.internal')
con = ibis.impala.connect(host=impala_host,
                          port=21050,
                          database='flights',
                          hdfs_client=hdfs,
                          auth_mechanism='GSSAPI',
                          use_ssl=False)
con.list_tables()

import matplotlib
import seaborn as sns
import matplotlib.pyplot as plt
airlines = con.table('airlines_bi_pq')
airlines.limit(10).execute()

airports = con.table('airports')
コード例 #38
0
# Using stats fro mhttps://wiki.cloudera.com/pages/viewpage.action?spaceKey=EDH&title=Ad-hoc+Data+Analytics+over+Clusterstats+Data

import ibis, os, sys
import pandas as pd
from IPython.display import display

# connect to the EDH
ibis.options.sql.default_limit = None
hdfs_conn = ibis.hdfs_connect(host='lannister-001.edh.cloudera.com')
ibis_conn = ibis.impala.connect(host='westeros.edh.cloudera.com',
                                    port=21050,
                                    auth_mechanism='GSSAPI', use_ssl=True,
                                    kerberos_service_name='impala',
                                    hdfs_client=hdfs_conn, timeout=300)

# for interactive muddling.
ibis.options.interactive = True ;
# pd.set_option('display.width', 1000);
pd.set_option('max_colwidth', 100);
pd.set_option('display.max_rows', 500);

UUID = "3f75166e-682e-4ad1-b5fe-33171a198e58"
clustername = "PROD"
collectts = 1506704298000
roletype = "DATANODE"

def slowBlockreceiverCounts(UUID,clustername,collectts,roletype):
  sql="""
    select host,count(message) as message_count
    from customer_logs
    where customerUUID = "{}" and 
コード例 #39
0
)

# clickhouse
conf['clickhouse'] = dict(
    host='localhost',
    port=9000,
    user='******',
    password='',
    database='ibis_testing'
)

# impala
_hdfs_client = ibis.hdfs_connect(
    host='impala',
    port=50070,
    auth_mechanism='NOSASL',
    verify=True,
    user='******',
)
conf['impala'] = dict(
    host='localhost',
    port=21050,
    auth_mechanism='NOSASL',
    hdfs_client=_hdfs_client,
    database='ibis_testing',
)

# spark
conf['pyspark'] = dict(
    session=SparkSession.builder.getOrCreate()
)