def _fetch_sql_results(self, query, cursor, sql_writer, log_file): '''Execute the query using the cursor and set the result or exception on the local thread. ''' try: log_file.write('/***** Start Query *****/\n') if sql_writer.DIALECT == self.flatten_dialect: # Converts the query model for the flattened version of the data. This is for # testing of Impala nested types support. query = deepcopy(query) QueryFlattener().flatten(query) if query.execution == 'CREATE_TABLE_AS': setup_sql = sql_writer.write_create_table_as( query, self._table_or_view_name) query_sql = 'SELECT * FROM ' + self._table_or_view_name elif query.execution == 'VIEW': setup_sql = sql_writer.write_create_view( query, self._table_or_view_name) query_sql = 'SELECT * FROM ' + self._table_or_view_name else: setup_sql = None query_sql = sql_writer.write_query(query) if setup_sql: LOG.debug("Executing on %s:\n%s", cursor.db_type, setup_sql) current_thread().sql = setup_sql + ';\n' log_file.write(setup_sql + ';\n') log_file.flush() cursor.execute(setup_sql) LOG.debug("Executing on %s:\n%s", cursor.db_type, query_sql) current_thread().sql += query_sql log_file.write(query_sql + ';\n') log_file.write('/***** End Query *****/\n') log_file.flush() cursor.execute(query_sql) col_count = len(cursor.description) batch_size = max(10000 / col_count, 1) row_limit = self.TOO_MUCH_DATA / col_count data_set = list() current_thread().data_set = data_set current_thread().cursor_description = cursor.description LOG.debug("Fetching results from %s", cursor.db_type) while True: batch = cursor.fetchmany(batch_size) data_set.extend(batch) if len(batch) < batch_size: if cursor.db_type == IMPALA: impala_log = cursor.get_log() if 'Expression overflowed, returning NULL' in impala_log: raise TypeOverflow( 'Numeric overflow; data may not match') break if len(data_set) > row_limit: raise DataLimitExceeded('Too much data') except Exception as e: current_thread().exception = e finally: if query.execution == 'CREATE_TABLE_AS': cursor.drop_table(self._table_or_view_name) elif query.execution == 'VIEW': cursor.drop_view(self._table_or_view_name)
def _write_column(self, col): def first_non_struct_ancestor(col): col = col.owner while isinstance(col, StructColumn): col = col.owner return col return '%s.%s' % (first_non_struct_ancestor(col).identifier, QueryFlattener.flat_column_name(col))
def _write_collection_column(self, collection_col): return '%s %s' % (QueryFlattener.flat_collection_name(collection_col), collection_col.identifier)