def _execute(self, func_name, request): # pylint: disable=protected-access # get the thrift transport transport = self.client._iprot.trans tries_left = self.retries while tries_left > 0: try: log.debug('Attempting to open transport (tries_left=%s)', tries_left) open_transport(transport) log.debug('Transport opened') func = getattr(self.client, func_name) return func(request) except socket.error: log.exception('Failed to open transport (tries_left=%s)', tries_left) except TTransportException: log.exception('Failed to open transport (tries_left=%s)', tries_left) except Exception: raise log.debug('Closing transport (tries_left=%s)', tries_left) transport.close() tries_left -= 1 raise HiveServer2Error('Failed after retrying {0} times'.format( self.retries))
def create_table(self, table, columns): """ Looks like there is some issue with Impala JDBC driver CREATE TABLE test_two ("x" INTEGER ) String literal is emited instead of x Due to which, STRING LITERAL error is fired taking alternative route, creating table manually. """ columns = [k + " " + v for k, v in columns.items()] columns = ",".join(columns) CREATE = "CREATE TABLE {table} ({columns});".format(table=table, columns=columns) impala = ImpalaDatastore() impala.connect() try: impala.execute(CREATE, fetch=False) except HiveServer2Error as ex: # No Table exists exception is available # so handeling the dirty way. if not "Table already exists" in str(ex): raise HiveServer2Error(ex) else: self.log("looks like table already exists")
def fetch_results(service, operation_handle, hs2_protocol_version, schema=None, max_rows=1024, orientation=TFetchOrientation.FETCH_NEXT): if not operation_handle.hasResultSet: return None # the schema is necessary to pull the proper values (i.e., coalesce) if schema is None: schema = get_result_schema(service, operation_handle) req = TFetchResultsReq(operationHandle=operation_handle, orientation=orientation, maxRows=max_rows) resp = service.FetchResults(req) err_if_rpc_not_ok(resp) if hs2_protocol_version == TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6: tcols = [_TTypeId_to_TColumnValue_getters[schema[i][1]](col) for (i, col) in enumerate(resp.results.columns)] num_cols = len(tcols) num_rows = len(tcols[0].values) rows = [] for i in xrange(num_rows): row = [] for j in xrange(num_cols): type_ = schema[j][1] values = tcols[j].values nulls = tcols[j].nulls # i / 8 is the byte, i % 8 is position in the byte; get the int # repr and pull out the bit at the corresponding pos is_null = ord(nulls[i / 8]) & (1 << (i % 8)) if is_null: row.append(None) elif type_ == 'TIMESTAMP': row.append(_parse_timestamp(values[i])) elif type_ == 'DECIMAL': row.append(Decimal(values[i])) else: row.append(values[i]) rows.append(tuple(row)) elif hs2_protocol_version in _pre_columnar_protocols: rows = [] for trow in resp.results.rows: row = [] for (i, col_val) in enumerate(trow.colVals): type_ = schema[i][1] value = _TTypeId_to_TColumnValue_getters[type_](col_val).value if type_ == 'TIMESTAMP': value = _parse_timestamp(value) elif type_ == 'DECIMAL': if value: value = Decimal(value) row.append(value) rows.append(tuple(row)) else: raise HiveServer2Error( ("Got HiveServer2 version %s. " % TProtocolVersion._VALUES_TO_NAMES[hs2_protocol_version]) + "Expected V1 - V6") return rows
def __init__(self, service, handle, config, hs2_protocol_version, retries=3): # pylint: disable=protected-access self.service = service self.handle = handle self.config = config self.hs2_protocol_version = hs2_protocol_version if hs2_protocol_version not in TProtocolVersion._VALUES_TO_NAMES: raise HiveServer2Error("Got HiveServer2 version {0}; " "expected V1 - V6" .format(hs2_protocol_version)) ThriftRPC.__init__(self, self.service.client, retries=retries)
def err_if_rpc_not_ok(resp): if (resp.status.statusCode != TStatusCode.SUCCESS_STATUS and resp.status.statusCode != TStatusCode.SUCCESS_WITH_INFO_STATUS and resp.status.statusCode != TStatusCode.STILL_EXECUTING_STATUS): raise HiveServer2Error(resp.status.errorMessage)
def fetch_results(service, operation_handle, hs2_protocol_version, schema=None, max_rows=1024, orientation=TFetchOrientation.FETCH_NEXT): if not operation_handle.hasResultSet: log.debug('fetch_results: operation_handle.hasResultSet=False') return None # the schema is necessary to pull the proper values (i.e., coalesce) if schema is None: schema = get_result_schema(service, operation_handle) req = TFetchResultsReq(operationHandle=operation_handle, orientation=orientation, maxRows=max_rows) log.debug( 'fetch_results: hs2_protocol_version=%s max_rows=%s ' 'orientation=%s req=%s', hs2_protocol_version, max_rows, orientation, req) resp = service.FetchResults(req) err_if_rpc_not_ok(resp) if hs2_protocol_version == TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6: tcols = [ _TTypeId_to_TColumnValue_getters[schema[i][1]](col) for (i, col) in enumerate(resp.results.columns) ] num_cols = len(tcols) num_rows = len(tcols[0].values) log.debug('fetch_results: COLUMNAR num_cols=%s num_rows=%s tcols=%s', num_cols, num_rows, tcols) column_data = [] for j in range(num_cols): type_ = schema[j][1] nulls = tcols[j].nulls values = tcols[j].values # thriftpy sometimes returns unicode instead of bytes if six.PY3 and isinstance(nulls, str): nulls = nulls.encode('utf-8') is_null = bitarray(endian='little') is_null.frombytes(nulls) # Ref HUE-2722, HiveServer2 sometimes does not add trailing '\x00' if len(values) > len(nulls): to_append = ((len(values) - len(nulls) + 7) // 8) is_null.frombytes(b'\x00' * to_append) if type_ == 'TIMESTAMP': for i in range(num_rows): values[i] = (None if is_null[i] else _parse_timestamp( values[i])) elif type_ == 'DECIMAL': for i in range(num_rows): values[i] = (None if is_null[i] else Decimal(values[i])) else: for i in range(num_rows): if is_null[i]: values[i] = None column_data.append(values) # TODO: enable columnar fetch rows = lzip(*column_data) elif hs2_protocol_version in _pre_columnar_protocols: log.debug('fetch_results: ROWS num-rows=%s', len(resp.results.rows)) rows = [] for trow in resp.results.rows: row = [] for (i, col_val) in enumerate(trow.colVals): type_ = schema[i][1] value = _TTypeId_to_TColumnValue_getters[type_](col_val).value if type_ == 'TIMESTAMP': value = _parse_timestamp(value) elif type_ == 'DECIMAL': if value: value = Decimal(value) row.append(value) rows.append(tuple(row)) else: raise HiveServer2Error( "Got HiveServer2 version {0}; expected V1 - V6".format( TProtocolVersion._VALUES_TO_NAMES[hs2_protocol_version])) return rows
def err_if_rpc_not_ok(resp): if (resp.status.statusCode != TStatusCode._NAMES_TO_VALUES['SUCCESS_STATUS'] and resp.status.statusCode != TStatusCode._NAMES_TO_VALUES['SUCCESS_WITH_INFO_STATUS']): raise HiveServer2Error(resp.status.errorMessage)