Esempio n. 1
0
def connect(host='localhost',
            port=21050,
            protocol='hiveserver2',
            database=None,
            timeout=45,
            use_ssl=False,
            ca_cert=None,
            use_ldap=False,
            ldap_user=None,
            ldap_password=None,
            use_kerberos=False,
            kerberos_service_name='impala'):
    # PEP 249
    if protocol.lower() == 'beeswax':
        warn_deprecate_hs2()
        service = connect_to_beeswax(host, port, timeout, use_ssl, ca_cert,
                                     use_ldap, ldap_user, ldap_password,
                                     use_kerberos, kerberos_service_name)
        return BeeswaxConnection(service, default_db=database)
    elif protocol.lower() == 'hiveserver2':
        service = connect_to_hiveserver2(host, port, timeout, use_ssl, ca_cert,
                                         use_ldap, ldap_user, ldap_password,
                                         use_kerberos, kerberos_service_name)
        return HiveServer2Connection(service, default_db=database)
    else:
        raise NotSupportedError(
            "The specified protocol '%s' is not supported." % protocol)
Esempio n. 2
0
 def fetchcbatch(self):
     '''Return a CBatch object of any data currently in the buffer or
        if no data currently in buffer then fetch a batch'''
     if not self._last_operation.is_columnar:
         raise NotSupportedError("Server does not support columnar "
                                 "fetching")
     if not self.has_result_set:
         raise ProgrammingError(
             "Trying to fetch results on an operation with no results.")
     if len(self._buffer) > 0:
         log.debug(
             'fetchcbatch: buffer has data in. Returning it and wiping buffer'
         )
         batch = self._buffer
         self._buffer = Batch()
         return batch
     elif self._last_operation_active:
         log.debug('fetchcbatch: buffer empty and op is active => fetching '
                   'more data')
         batch = (self._last_operation.fetch(
             self.description,
             self.buffersize,
             convert_types=self.convert_types))
         if len(batch) == 0:
             return None
         return batch
     else:
         return None
Esempio n. 3
0
 def fetchcolumnar(self):
     """Executes a fetchall operation returning a list of CBatches"""
     if not self._last_operation.is_columnar:
         raise NotSupportedError("Server does not support columnar "
                                 "fetching")
     batches = []
     while True:
         batch = (self._last_operation.fetch(
                      self.description,
                      self.buffersize,
                      convert_types=self.convert_types))
         if len(batch) == 0:
             break
         batches.append(batch)
     return batches
Esempio n. 4
0
 def fetchcolumnar(self):
     """Executes a fetchall operation returning a list of CBatches"""
     if not _is_columnar_protocol(self.hs2_protocol_version):
         raise NotSupportedError("HiveServer2 protocol version ({0}) does "
                                 "not support columnar fetching".format(
                                     self.hs2_protocol_version))
     batches = []
     while True:
         batch = fetch_results(self.service, self._last_operation_handle,
                               self.hs2_protocol_version, self.description,
                               self.buffersize)
         if len(batch) == 0:
             break
         batches.append(batch)
     return batches
Esempio n. 5
0
def connect(host='localhost',
            port=21050,
            database=None,
            timeout=None,
            use_ssl=False,
            ca_cert=None,
            auth_mechanism='NOSASL',
            user=None,
            password=None,
            kerberos_service_name='impala',
            use_ldap=None,
            ldap_user=None,
            ldap_password=None,
            use_kerberos=None,
            protocol=None):
    # pylint: disable=too-many-locals
    if use_kerberos is not None:
        warn_deprecate('use_kerberos', 'auth_mechanism="GSSAPI"')
        if use_kerberos:
            auth_mechanism = 'GSSAPI'

    if use_ldap is not None:
        warn_deprecate('use_ldap', 'auth_mechanism="LDAP"')
        if use_ldap:
            auth_mechanism = 'LDAP'

    if auth_mechanism:
        auth_mechanism = auth_mechanism.upper()
    else:
        auth_mechanism = 'NOSASL'

    if auth_mechanism not in AUTH_MECHANISMS:
        raise NotSupportedError(
            'Unsupported authentication mechanism: {0}'.format(auth_mechanism))

    if ldap_user is not None:
        warn_deprecate('ldap_user', 'user')
        user = ldap_user

    if ldap_password is not None:
        warn_deprecate('ldap_password', 'password')
        password = ldap_password

    if protocol is not None:
        if protocol.lower() == 'hiveserver2':
            warn_protocol_param()
        else:
            raise NotSupportedError(
                "'{0}' is not a supported protocol; only HiveServer2 is "
                "supported".format(protocol))

    service = hs2.connect(host=host,
                          port=port,
                          timeout=timeout,
                          use_ssl=use_ssl,
                          ca_cert=ca_cert,
                          user=user,
                          password=password,
                          kerberos_service_name=kerberos_service_name,
                          auth_mechanism=auth_mechanism)
    return hs2.HiveServer2Connection(service, default_db=database)
Esempio n. 6
0
def connect(host='localhost', port=21050, database=None, timeout=None,
            use_ssl=False, ca_cert=None, auth_mechanism='NOSASL', user=None,
            password=None, kerberos_service_name='impala', use_ldap=None,
            ldap_user=None, ldap_password=None, use_kerberos=None,
            protocol=None,username=None):
    """Get a connection to HiveServer2 (HS2).

    These options are largely compatible with the impala-shell command line
    arguments. See those docs for more information.

    Parameters
    ----------
    host : str
        The hostname for HS2. For Impala, this can be any of the `impalad`s.
    port : int, optional
        The port number for HS2. The Impala default is 21050. The Hive port is
        likely different.
    database : str, optional
        The default database. If `None`, the result is
        implementation-dependent.
    timeout : int, optional
        Connection timeout in seconds. Default is no timeout.
    use_ssl : bool, optional
        Enable SSL.
    ca_cert : str, optional
        Local path to the the third-party CA certificate. If SSL is enabled but
        the certificate is not specified, the server certificate will not be
        validated.
    auth_mechanism : {'NOSASL', 'PLAIN', 'GSSAPI', 'LDAP'}
        Specify the authentication mechanism. `'NOSASL'` for unsecured Impala.
        `'PLAIN'` for unsecured Hive (because Hive requires the SASL
        transport). `'GSSAPI'` for Kerberos and `'LDAP'` for Kerberos with
        LDAP.
    user : str, optional
        LDAP user, if applicable.
    password : str, optional
        LDAP password, if applicable.
    kerberos_service_name : str, optional
        Authenticate to a particular `impalad` service principal. Uses
        `'impala'` by default.
    use_ldap : bool, optional
        Specify `auth_mechanism='LDAP'` instead.

        .. deprecated:: 0.11.0
    ldap_user : str, optional
        Use `user` parameter instead.

        .. deprecated:: 0.11.0
    ldap_password : str, optional
        Use `password` parameter instead.

        .. deprecated:: 0.11.0
    use_kerberos : bool, optional
        Specify `auth_mechanism='GSSAPI'` instead.

        .. deprecated:: 0.11.0
    protocol : str, optional
        Do not use.  HiveServer2 is the only protocol currently supported.

        .. deprecated:: 0.11.0


    Returns
    -------
    HiveServer2Connection
        A `Connection` object (DB API 2.0-compliant).
    """
    # pylint: disable=too-many-locals
    if use_kerberos is not None:
        warn_deprecate('use_kerberos', 'auth_mechanism="GSSAPI"')
        if use_kerberos:
            auth_mechanism = 'GSSAPI'

    if use_ldap is not None:
        warn_deprecate('use_ldap', 'auth_mechanism="LDAP"')
        if use_ldap:
            auth_mechanism = 'LDAP'

    if auth_mechanism:
        auth_mechanism = auth_mechanism.upper()
    else:
        auth_mechanism = 'NOSASL'

    if auth_mechanism not in AUTH_MECHANISMS:
        raise NotSupportedError(
            'Unsupported authentication mechanism: {0}'.format(auth_mechanism))

    if ldap_user is not None:
        warn_deprecate('ldap_user', 'user')
        user = ldap_user

    if ldap_password is not None:
        warn_deprecate('ldap_password', 'password')
        password = ldap_password

    if protocol is not None:
        if protocol.lower() == 'hiveserver2':
            warn_protocol_param()
        else:
            raise NotSupportedError(
                "'{0}' is not a supported protocol; only HiveServer2 is "
                "supported".format(protocol))

    service = hs2.connect(host=host, port=port,
                          timeout=timeout, use_ssl=use_ssl,
                          ca_cert=ca_cert, user=user, password=password,
                          kerberos_service_name=kerberos_service_name,
                          auth_mechanism=auth_mechanism)
    return hs2.HiveServer2Connection(service, default_db=database, impersonate=username)
Esempio n. 7
0
def connect(host='localhost',
            port=21050,
            database=None,
            timeout=None,
            use_ssl=False,
            ca_cert=None,
            auth_mechanism='NOSASL',
            user=None,
            password=None,
            kerberos_service_name='impala',
            use_ldap=None,
            ldap_user=None,
            ldap_password=None,
            use_kerberos=None,
            protocol=None,
            krb_host=None,
            use_http_transport=False,
            http_path='',
            auth_cookie_names=['impala.auth', 'hive.server2.auth'],
            retries=3):
    """Get a connection to HiveServer2 (HS2).

    These options are largely compatible with the impala-shell command line
    arguments. See those docs for more information.

    Parameters
    ----------
    host : str
        The hostname for HS2. For Impala, this can be any of the `impalad`s.
    port : int, optional
        The port number for HS2. The Impala default is 21050. The Hive port is
        likely different.
    database : str, optional
        The default database. If `None`, the result is
        implementation-dependent.
    timeout : int, optional
        Connection timeout in seconds. Default is no timeout.
    use_ssl : bool, optional
        Enable SSL.
    ca_cert : str, optional
        Local path to the the third-party CA certificate. If SSL is enabled but
        the certificate is not specified, the server certificate will not be
        validated.
    auth_mechanism : {'NOSASL', 'PLAIN', 'GSSAPI', 'LDAP'}
        Specify the authentication mechanism. `'NOSASL'` for unsecured Impala.
        `'PLAIN'` for unsecured Hive (because Hive requires the SASL
        transport). `'GSSAPI'` for Kerberos and `'LDAP'` for Kerberos with
        LDAP.
    user : str, optional
        LDAP user, if applicable.
    password : str, optional
        LDAP password, if applicable.
    kerberos_service_name : str, optional
        Authenticate to a particular `impalad` service principal. Uses
        `'impala'` by default.
    use_ldap : bool, optional
        Specify `auth_mechanism='LDAP'` instead.
    use_http_transport: bool optional
        Set it to True to use http transport of False to use binary transport.
    http_path: str, optional
        Specify the path in the http URL. Used only when `use_http_transport` is True.
    auth_cookie_names: list of str or str, optional
        Specify the list of possible names for the cookie used for cookie-based
        authentication. If the list of names contains one cookie name only, a str value
        can be specified instead of a list.
        Used only when `use_http_transport` is True.
        By default 'auth_cookie_names' is set to the list of auth cookie names used by
        Impala and Hive.
        If 'auth_cookie_names' is explicitly set to an empty value (None, [], or ''),
        Impyla won't attempt to do cookie based authentication.
        Currently cookie-based authentication is only supported for GSSAPI over http.

        .. deprecated:: 0.11.0
    ldap_user : str, optional
        Use `user` parameter instead.

        .. deprecated:: 0.11.0
    ldap_password : str, optional
        Use `password` parameter instead.

        .. deprecated:: 0.11.0
    use_kerberos : bool, optional
        Specify `auth_mechanism='GSSAPI'` instead.

        .. deprecated:: 0.11.0
    protocol : str, optional
        Do not use.  HiveServer2 is the only protocol currently supported.

        .. deprecated:: 0.11.0


    Returns
    -------
    HiveServer2Connection
        A `Connection` object (DB API 2.0-compliant).
    """
    # pylint: disable=too-many-locals
    if use_kerberos is not None:
        warn_deprecate('use_kerberos', 'auth_mechanism="GSSAPI"')
        if use_kerberos:
            auth_mechanism = 'GSSAPI'

    if use_ldap is not None:
        warn_deprecate('use_ldap', 'auth_mechanism="LDAP"')
        if use_ldap:
            auth_mechanism = 'LDAP'

    if auth_mechanism:
        auth_mechanism = auth_mechanism.upper()
    else:
        auth_mechanism = 'NOSASL'

    if auth_mechanism not in AUTH_MECHANISMS:
        raise NotSupportedError(
            'Unsupported authentication mechanism: {0}'.format(auth_mechanism))

    if ldap_user is not None:
        warn_deprecate('ldap_user', 'user')
        user = ldap_user

    if ldap_password is not None:
        warn_deprecate('ldap_password', 'password')
        password = ldap_password

    if protocol is not None:
        if protocol.lower() == 'hiveserver2':
            warn_protocol_param()
        else:
            raise NotSupportedError(
                "'{0}' is not a supported protocol; only HiveServer2 is "
                "supported".format(protocol))

    service = hs2.connect(host=host,
                          port=port,
                          timeout=timeout,
                          use_ssl=use_ssl,
                          ca_cert=ca_cert,
                          user=user,
                          password=password,
                          kerberos_service_name=kerberos_service_name,
                          auth_mechanism=auth_mechanism,
                          krb_host=krb_host,
                          use_http_transport=use_http_transport,
                          http_path=http_path,
                          auth_cookie_names=auth_cookie_names,
                          retries=retries)
    return hs2.HiveServer2Connection(service, default_db=database)
Esempio n. 8
0
def connect(host,
            port,
            timeout=None,
            use_ssl=False,
            ca_cert=None,
            user=None,
            password=None,
            kerberos_service_name='impala',
            auth_mechanism=None,
            krb_host=None,
            use_http_transport=False,
            http_path=''):
    log.debug(
        'Connecting to HiveServer2 %s:%s with %s authentication '
        'mechanism', host, port, auth_mechanism)

    if use_http_transport:
        # TODO(#362): Add server authentication with thrift 0.12.
        if ca_cert:
            raise NotSupportedError("Server authentication is not supported " +
                                    "with HTTP endpoints")
        if krb_host:
            raise NotSupportedError("Kerberos authentication is not " +
                                    "supported with HTTP endpoints")
        transport = get_http_transport(host,
                                       port,
                                       http_path=http_path,
                                       use_ssl=use_ssl,
                                       ca_cert=ca_cert,
                                       user=user,
                                       password=password,
                                       auth_mechanism=auth_mechanism)
    else:
        sock = get_socket(host, port, use_ssl, ca_cert)

        if krb_host:
            kerberos_host = krb_host
        else:
            kerberos_host = host

        if timeout is not None:
            timeout = timeout * 1000.  # TSocket expects millis
        if six.PY2:
            sock.setTimeout(timeout)
        elif six.PY3:
            try:
                # thriftpy has a release where set_timeout is missing
                sock.set_timeout(timeout)
            except AttributeError:
                sock.socket_timeout = timeout
                sock.connect_timeout = timeout
        log.debug('sock=%s', sock)
        transport = get_transport(sock, kerberos_host, kerberos_service_name,
                                  auth_mechanism, user, password)

    transport.open()
    protocol = TBinaryProtocol(transport)
    if six.PY2:
        # ThriftClient == ImpalaHiveServer2Service.Client
        service = ThriftClient(protocol)
    elif six.PY3:
        # ThriftClient == TClient
        service = ThriftClient(ImpalaHiveServer2Service, protocol)
    log.debug('transport=%s protocol=%s service=%s', transport, protocol,
              service)

    return HS2Service(service)
Esempio n. 9
0
def connect(host='localhost',
            port=21050,
            protocol=None,
            database=None,
            timeout=45,
            use_ssl=False,
            ca_cert=None,
            auth_mechanism='NOSASL',
            user=None,
            password=None,
            kerberos_service_name='impala',
            use_ldap=None,
            ldap_user=None,
            ldap_password=None,
            use_kerberos=None):
    if use_kerberos is not None:
        warn_deprecate('use_kerberos', 'auth_mechanism="GSSAPI"')
        if use_kerberos:
            auth_mechanism = 'GSSAPI'

    if use_ldap is not None:
        warn_deprecate('use_ldap', 'auth_mechanism="LDAP"')
        if use_ldap:
            auth_mechanism = 'LDAP'

    if auth_mechanism:
        auth_mechanism = auth_mechanism.upper()
    else:
        auth_mechanism = 'NOSASL'

    if auth_mechanism not in AUTH_MECHANISMS:
        raise NotSupportedError(
            'Unsupported authentication mechanism: {0}'.format(auth_mechanism))

    if ldap_user is not None:
        warn_deprecate('ldap_user', 'user')
        user = ldap_user

    if ldap_password is not None:
        warn_deprecate('ldap_password', 'password')
        password = ldap_password

    if protocol is None or protocol.lower() == 'hiveserver2':
        if protocol:
            warn_deprecate_protocol()
        service = hs2.connect(host=host,
                              port=port,
                              timeout=timeout,
                              use_ssl=use_ssl,
                              ca_cert=ca_cert,
                              user=user,
                              password=password,
                              kerberos_service_name=kerberos_service_name,
                              auth_mechanism=auth_mechanism)
        return hs2.HiveServer2Connection(service, default_db=database)
    if protocol.lower() == 'beeswax':
        warn_deprecate_hs2()
        service = beeswax.connect(host=host,
                                  port=port,
                                  timeout=timeout,
                                  use_ssl=use_ssl,
                                  ca_cert=ca_cert,
                                  user=user,
                                  password=password,
                                  kerberos_service_name=kerberos_service_name,
                                  auth_mechanism=auth_mechanism)
        return beeswax.BeeswaxConnection(service, default_db=database)
    raise NotSupportedError("The specified protocol '%s' is not supported." %
                            protocol)