def _fetch_sql_results(self, query, cursor, sql_writer, log_file): '''Execute the query using the cursor and set the result or exception on the local thread. ''' try: log_file.write('/***** Start Query *****/\n') if sql_writer.DIALECT == self.flatten_dialect: # Converts the query model for the flattened version of the data. This is for # testing of Impala nested types support. query = deepcopy(query) QueryFlattener().flatten(query) if query.execution == StatementExecutionMode.CREATE_TABLE_AS: setup_sql = sql_writer.write_create_table_as(query, self._table_or_view_name) query_sql = 'SELECT * FROM ' + self._table_or_view_name elif query.execution == StatementExecutionMode.CREATE_VIEW_AS: setup_sql = sql_writer.write_create_view(query, self._table_or_view_name) query_sql = 'SELECT * FROM ' + self._table_or_view_name elif isinstance(query, (InsertStatement,)): setup_sql = sql_writer.write_query(query) # TODO: improve validation (IMPALA-4599). This is good enough for looking for # crashes on DML statements query_sql = 'SELECT COUNT(*) FROM ' + self._table_or_view_name else: setup_sql = None query_sql = sql_writer.write_query(query) if setup_sql: LOG.debug("Executing on %s:\n%s", cursor.db_type, setup_sql) current_thread().sql = setup_sql + ';\n' log_file.write(setup_sql + ';\n') log_file.flush() cursor.execute(setup_sql) LOG.debug("Executing on %s:\n%s", cursor.db_type, query_sql) current_thread().sql += query_sql log_file.write(query_sql + ';\n') log_file.write('/***** End Query *****/\n') log_file.flush() cursor.execute(query_sql) col_count = len(cursor.description) batch_size = max(10000 / col_count, 1) row_limit = self.TOO_MUCH_DATA / col_count data_set = list() current_thread().data_set = data_set current_thread().cursor_description = cursor.description LOG.debug("Fetching results from %s", cursor.db_type) while True: batch = cursor.fetchmany(batch_size) data_set.extend(batch) if len(batch) < batch_size: if cursor.db_type == IMPALA: impala_log = cursor.get_log() if 'Expression overflowed, returning NULL' in impala_log: raise TypeOverflow('Numeric overflow; data may not match') break if len(data_set) > row_limit: raise DataLimitExceeded('Too much data') if isinstance(query, (InsertStatement,)): LOG.debug('Total row count for {0}: {1}'.format( cursor.db_type, str(data_set))) except Exception as e: current_thread().exception = e
def _write_column(self, col): def first_non_struct_ancestor(col): col = col.owner while isinstance(col, StructColumn): col = col.owner return col return '%s.%s' % (first_non_struct_ancestor(col).identifier, QueryFlattener.flat_column_name(col))
def _write_collection_column(self, collection_col): return '%s %s' % (QueryFlattener.flat_collection_name(collection_col), collection_col.identifier)