def table_exprs(self): '''Provides a list of all table_exprs that are declared within this FROM block. ''' table_exprs = \ TableExprList(join_clause.table_expr for join_clause in self.join_clauses) table_exprs.append(self.table_expr) return table_exprs
def describe_common_tables(cursors): '''Find and return a TableExprList containing Table objects that the given conns have in common. ''' common_table_names = None for cursor in cursors: table_names = set(cursor.list_table_names()) if common_table_names is None: common_table_names = table_names else: common_table_names &= table_names common_table_names = sorted(common_table_names) tables = TableExprList() for table_name in common_table_names: common_table = None mismatch = False for cursor in cursors: table = cursor.describe_table(table_name) if common_table is None: common_table = table continue if not table.cols: LOG.debug('%s has no remaining columns', table_name) mismatch = True break if len(common_table.cols) != len(table.cols): LOG.debug( 'Ignoring table %s.' ' It has a different number of columns across databases.', table_name) mismatch = True break if common_table.primary_key_names != table.primary_key_names: LOG.debug( 'Ignoring table {name} because of differing primary keys: ' '{common_table_keys} vs. {table_keys}'.format( name=table_name, common_table_keys=common_table.primary_key_names, table_keys=table.primary_key_names)) mismatch = True break for left, right in izip(common_table.cols, table.cols): if not (left.name == right.name and left.type == right.type): LOG.debug( 'Ignoring table %s. It has different columns %s vs %s.' % (table_name, left, right)) mismatch = True break if mismatch: break if not mismatch: tables.append(common_table) return tables
def table_exprs(self): """ Return a list of all table expressions that are declared by this query. This is abstract as the clauses that do this differ across query types. Since all supported queries may have a WITH clause, getting table expressions from the WITH clause is supported here. """ # This is an abstractproperty because it's only a *partial* implementation, however # for any statement or query that has a WITH clause, we can handle that here. table_exprs = TableExprList([]) if self.with_clause: table_exprs.extend(self.with_clause.table_exprs) return table_exprs
def describe_common_tables(cursors): '''Find and return a TableExprList containing Table objects that the given conns have in common. ''' common_table_names = None for cursor in cursors: table_names = set(cursor.list_table_names()) if common_table_names is None: common_table_names = table_names else: common_table_names &= table_names common_table_names = sorted(common_table_names) tables = TableExprList() for table_name in common_table_names: common_table = None mismatch = False for cursor in cursors: table = cursor.describe_table(table_name) if common_table is None: common_table = table continue if not table.cols: LOG.debug('%s has no remaining columns', table_name) mismatch = True break if len(common_table.cols) != len(table.cols): LOG.debug('Ignoring table %s.' ' It has a different number of columns across databases.', table_name) mismatch = True break if common_table.primary_key_names != table.primary_key_names: LOG.debug( 'Ignoring table {name} because of differing primary keys: ' '{common_table_keys} vs. {table_keys}'.format( name=table_name, common_table_keys=common_table.primary_key_names, table_keys=table.primary_key_names)) mismatch = True break for left, right in izip(common_table.cols, table.cols): if not (left.name == right.name and left.type == right.type): LOG.debug('Ignoring table %s. It has different columns %s vs %s.' % (table_name, left, right)) mismatch = True break if mismatch: break if not mismatch: tables.append(common_table) return tables
def test_use_nested_width_subquery(): """ Tests that setting DefaultProfile.use_nested_with to False works properly. Setting this method to return False should prevent a WITH clause from being used inside a sub-query. """ class MockQueryProfile(DefaultProfile): """ A mock QueryProfile that sets use_nested_with to False and forces the QueryGenerator to created nested queries. """ def __init__(self): super(MockQueryProfile, self).__init__() # Force the QueryGenerator to create nested queries self._bounds['MAX_NESTED_QUERY_COUNT'] = (4, 4) # Force the QueryGenerator to use WITH clauses whenever possible self._probabilities['OPTIONAL_QUERY_CLAUSES']['WITH'] = 1 # Force the QueryGenerator to create inline views whenever possible self._probabilities['MISC']['INLINE_VIEW'] = 1 def use_nested_with(self): return False mock_query_gen = QueryGenerator(MockQueryProfile()) # Create two tables table_expr_list = TableExprList() right_table = Table("right_table") right_table.add_col(Column("right_table", "right_col", Int)) table_expr_list.append(right_table) left_table = Table("left_table") left_table.add_col(Column("left_table", "left_col", Int)) table_expr_list.append(left_table) # Check that each nested_query doesn't have a with clause for nested_query in mock_query_gen.create_query(table_expr_list).nested_queries: assert nested_query.with_clause is None
def describe_common_tables(db_connections): '''Find and return a TableExprList containing Table objects that the given connections have in common. ''' common_table_names = None for db_connection in db_connections: table_names = set(db_connection.list_table_names()) if common_table_names is None: common_table_names = table_names else: common_table_names &= table_names common_table_names = sorted(common_table_names) tables = TableExprList() for table_name in common_table_names: common_table = None mismatch = False for db_connection in db_connections: table = db_connection.describe_table(table_name) if common_table is None: common_table = table continue if not table.cols: LOG.debug('%s has no remaining columns', table_name) mismatch = True break if len(common_table.cols) != len(table.cols): LOG.debug('Ignoring table %s.' ' It has a different number of columns across databases.', table_name) mismatch = True break for left, right in izip(common_table.cols, table.cols): if not left.name == right.name and left.type == right.type: LOG.debug('Ignoring table %s. It has different columns %s vs %s.' % (table_name, left, right)) mismatch = True break if mismatch: break if not mismatch: tables.append(common_table) return tables
def test_hive_create_equality_only_joins(): """ Tests that QueryGenerator produces a join condition with only equality functions if the HiveProfile is used. """ class FakeHiveQueryProfile(HiveProfile): """ A fake QueryProfile that extends the HiveProfile, various weights are modified in order to ensure that this test is deterministic. """ def choose_join_condition_count(self): """ There should be only one operator in the JOIN condition """ return 1 def choose_conjunct_disjunct_fill_ratio(self): """ There should be no AND or OR operators """ return 0 def choose_relational_func_fill_ratio(self): """ Force all operators to be relational """ return 1 query_generator = QueryGenerator(FakeHiveQueryProfile()) # Create two tables that have one joinable Column right_table_expr_list = TableExprList() right_table = Table("right_table") right_table.add_col(Column("right_table", "right_col", Int)) right_table_expr_list.append(right_table) left_table_expr_list = TableExprList() left_table = Table("left_table") left_table.add_col(Column("left_table", "left_col", Int)) left_table_expr_list.append(left_table) # Validate the root predicate is an Equals funcs assert isinstance(query_generator._create_relational_join_condition( right_table_expr_list, left_table_expr_list), Equals)
def test_use_nested_width_subquery(): """ Tests that setting DefaultProfile.use_nested_with to False works properly. Setting this method to return False should prevent a WITH clause from being used inside a sub-query. """ class MockQueryProfile(DefaultProfile): """ A mock QueryProfile that sets use_nested_with to False and forces the QueryGenerator to created nested queries. """ def __init__(self): super(MockQueryProfile, self).__init__() # Force the QueryGenerator to create nested queries self._bounds['MAX_NESTED_QUERY_COUNT'] = (4, 4) # Force the QueryGenerator to use WITH clauses whenever possible self._probabilities['OPTIONAL_QUERY_CLAUSES']['WITH'] = 1 # Force the QueryGenerator to create inline views whenever possible self._probabilities['MISC']['INLINE_VIEW'] = 1 def use_nested_with(self): return False mock_query_gen = QueryGenerator(MockQueryProfile()) # Create two tables table_expr_list = TableExprList() right_table = Table("right_table") right_table.add_col(Column("right_table", "right_col", Int)) table_expr_list.append(right_table) left_table = Table("left_table") left_table.add_col(Column("left_table", "left_col", Int)) table_expr_list.append(left_table) # Check that each nested_query doesn't have a with clause for nested_query in mock_query_gen.generate_statement(table_expr_list).nested_queries: assert nested_query.with_clause is None
# multiple FakeQueries. SIMPLE_TABLE = FakeTable('fake_table', [ FakeColumn('int_col', Int), FakeColumn('char_col', Char), ]) KUDU_TABLE = FakeTable('kudu_table', [ FakeColumn('int_col', Int, is_primary_key=True), FakeColumn('char_col', Char), ]) # This can't be used inline because we need its table expressions later. SIMPLE_WITH_CLAUSE = WithClause( TableExprList([ WithClauseInlineView( FakeQuery(select_clause=FakeSelectClause(SIMPLE_TABLE.cols[0]), from_clause=FromClause(SIMPLE_TABLE)), 'with_view') ])) # All tests involving SELECT queries should be written to use this data set. SELECT_QUERY_TEST_CASES = [ QueryTest( testid='select col from table', query=FakeQuery( select_clause=FakeSelectClause(*SIMPLE_TABLE.cols), from_clause=FromClause(SIMPLE_TABLE), ), impala_query_string=('SELECT\n' 'fake_table.int_col,\n' 'TRIM(fake_table.char_col)\n' 'FROM fake_table'),
def visible_table_exprs(self): '''Provides a list of all table_exprs that are declared within this FROM block and may be referenced in other clauses such as SELECT or WHERE. ''' return TableExprList(table_expr for table_expr in self.table_exprs if table_expr.is_visible)