Ejemplo n.º 1
0
    def _execute(self, func_name, request):
        # pylint: disable=protected-access
        # get the thrift transport
        transport = self.client._iprot.trans
        tries_left = self.retries
        while tries_left > 0:
            try:
                log.debug('Attempting to open transport (tries_left=%s)',
                          tries_left)
                open_transport(transport)
                log.debug('Transport opened')
                func = getattr(self.client, func_name)
                return func(request)
            except socket.error:
                log.exception('Failed to open transport (tries_left=%s)',
                              tries_left)
            except TTransportException:
                log.exception('Failed to open transport (tries_left=%s)',
                              tries_left)
            except Exception:
                raise
            log.debug('Closing transport (tries_left=%s)', tries_left)
            transport.close()
            tries_left -= 1

        raise HiveServer2Error('Failed after retrying {0} times'.format(
            self.retries))
Ejemplo n.º 2
0
    def create_table(self, table, columns):
        """
        Looks like there is some issue with Impala JDBC driver
        CREATE TABLE test_two ("x" INTEGER )
        String literal is emited instead of x
        Due to which, STRING LITERAL error is fired
        taking alternative route, creating table manually.
        """
        columns = [k + " " + v for k, v in columns.items()]
        columns = ",".join(columns)
        CREATE = "CREATE TABLE {table} ({columns});".format(table=table,
                                                            columns=columns)

        impala = ImpalaDatastore()
        impala.connect()

        try:
            impala.execute(CREATE, fetch=False)
        except HiveServer2Error as ex:
            # No Table exists exception is available
            # so handeling the dirty way.
            if not "Table already exists" in str(ex):
                raise HiveServer2Error(ex)
            else:
                self.log("looks like table already exists")
Ejemplo n.º 3
0
def fetch_results(service, operation_handle, hs2_protocol_version, schema=None,
                  max_rows=1024, orientation=TFetchOrientation.FETCH_NEXT):
    if not operation_handle.hasResultSet:
        return None

    # the schema is necessary to pull the proper values (i.e., coalesce)
    if schema is None:
        schema = get_result_schema(service, operation_handle)

    req = TFetchResultsReq(operationHandle=operation_handle,
                           orientation=orientation,
                           maxRows=max_rows)
    resp = service.FetchResults(req)
    err_if_rpc_not_ok(resp)

    if hs2_protocol_version == TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6:
        tcols = [_TTypeId_to_TColumnValue_getters[schema[i][1]](col)
                 for (i, col) in enumerate(resp.results.columns)]
        num_cols = len(tcols)
        num_rows = len(tcols[0].values)
        rows = []
        for i in xrange(num_rows):
            row = []
            for j in xrange(num_cols):
                type_ = schema[j][1]
                values = tcols[j].values
                nulls = tcols[j].nulls
                # i / 8 is the byte, i % 8 is position in the byte; get the int
                # repr and pull out the bit at the corresponding pos
                is_null = ord(nulls[i / 8]) & (1 << (i % 8))
                if is_null:
                    row.append(None)
                elif type_ == 'TIMESTAMP':
                    row.append(_parse_timestamp(values[i]))
                elif type_ == 'DECIMAL':
                    row.append(Decimal(values[i]))
                else:
                    row.append(values[i])
            rows.append(tuple(row))
    elif hs2_protocol_version in _pre_columnar_protocols:
        rows = []
        for trow in resp.results.rows:
            row = []
            for (i, col_val) in enumerate(trow.colVals):
                type_ = schema[i][1]
                value = _TTypeId_to_TColumnValue_getters[type_](col_val).value
                if type_ == 'TIMESTAMP':
                    value = _parse_timestamp(value)
                elif type_ == 'DECIMAL':
                    if value:
                        value = Decimal(value)
                row.append(value)
            rows.append(tuple(row))
    else:
        raise HiveServer2Error(
            ("Got HiveServer2 version %s. " %
                TProtocolVersion._VALUES_TO_NAMES[hs2_protocol_version]) +
            "Expected V1 - V6")
    return rows
Ejemplo n.º 4
0
    def __init__(self, service, handle, config, hs2_protocol_version,
                 retries=3):
        # pylint: disable=protected-access
        self.service = service
        self.handle = handle
        self.config = config
        self.hs2_protocol_version = hs2_protocol_version

        if hs2_protocol_version not in TProtocolVersion._VALUES_TO_NAMES:
            raise HiveServer2Error("Got HiveServer2 version {0}; "
                                   "expected V1 - V6"
                                   .format(hs2_protocol_version))

        ThriftRPC.__init__(self, self.service.client, retries=retries)
Ejemplo n.º 5
0
def err_if_rpc_not_ok(resp):
    if (resp.status.statusCode != TStatusCode.SUCCESS_STATUS and
            resp.status.statusCode != TStatusCode.SUCCESS_WITH_INFO_STATUS and
            resp.status.statusCode != TStatusCode.STILL_EXECUTING_STATUS):
        raise HiveServer2Error(resp.status.errorMessage)
Ejemplo n.º 6
0
def fetch_results(service,
                  operation_handle,
                  hs2_protocol_version,
                  schema=None,
                  max_rows=1024,
                  orientation=TFetchOrientation.FETCH_NEXT):
    if not operation_handle.hasResultSet:
        log.debug('fetch_results: operation_handle.hasResultSet=False')
        return None

    # the schema is necessary to pull the proper values (i.e., coalesce)
    if schema is None:
        schema = get_result_schema(service, operation_handle)

    req = TFetchResultsReq(operationHandle=operation_handle,
                           orientation=orientation,
                           maxRows=max_rows)
    log.debug(
        'fetch_results: hs2_protocol_version=%s max_rows=%s '
        'orientation=%s req=%s', hs2_protocol_version, max_rows, orientation,
        req)
    resp = service.FetchResults(req)
    err_if_rpc_not_ok(resp)

    if hs2_protocol_version == TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6:
        tcols = [
            _TTypeId_to_TColumnValue_getters[schema[i][1]](col)
            for (i, col) in enumerate(resp.results.columns)
        ]
        num_cols = len(tcols)
        num_rows = len(tcols[0].values)
        log.debug('fetch_results: COLUMNAR num_cols=%s num_rows=%s tcols=%s',
                  num_cols, num_rows, tcols)

        column_data = []
        for j in range(num_cols):
            type_ = schema[j][1]
            nulls = tcols[j].nulls
            values = tcols[j].values

            # thriftpy sometimes returns unicode instead of bytes
            if six.PY3 and isinstance(nulls, str):
                nulls = nulls.encode('utf-8')

            is_null = bitarray(endian='little')
            is_null.frombytes(nulls)

            # Ref HUE-2722, HiveServer2 sometimes does not add trailing '\x00'
            if len(values) > len(nulls):
                to_append = ((len(values) - len(nulls) + 7) // 8)
                is_null.frombytes(b'\x00' * to_append)

            if type_ == 'TIMESTAMP':
                for i in range(num_rows):
                    values[i] = (None if is_null[i] else _parse_timestamp(
                        values[i]))
            elif type_ == 'DECIMAL':
                for i in range(num_rows):
                    values[i] = (None if is_null[i] else Decimal(values[i]))
            else:
                for i in range(num_rows):
                    if is_null[i]:
                        values[i] = None
            column_data.append(values)

        # TODO: enable columnar fetch
        rows = lzip(*column_data)
    elif hs2_protocol_version in _pre_columnar_protocols:
        log.debug('fetch_results: ROWS num-rows=%s', len(resp.results.rows))
        rows = []
        for trow in resp.results.rows:
            row = []
            for (i, col_val) in enumerate(trow.colVals):
                type_ = schema[i][1]
                value = _TTypeId_to_TColumnValue_getters[type_](col_val).value
                if type_ == 'TIMESTAMP':
                    value = _parse_timestamp(value)
                elif type_ == 'DECIMAL':
                    if value:
                        value = Decimal(value)
                row.append(value)
            rows.append(tuple(row))
    else:
        raise HiveServer2Error(
            "Got HiveServer2 version {0}; expected V1 - V6".format(
                TProtocolVersion._VALUES_TO_NAMES[hs2_protocol_version]))
    return rows
Ejemplo n.º 7
0
def err_if_rpc_not_ok(resp):
    if (resp.status.statusCode !=
            TStatusCode._NAMES_TO_VALUES['SUCCESS_STATUS']
            and resp.status.statusCode !=
            TStatusCode._NAMES_TO_VALUES['SUCCESS_WITH_INFO_STATUS']):
        raise HiveServer2Error(resp.status.errorMessage)