def create_table_from_describe(self, table_name, describe_rows): primary_key_names = self._fetch_primary_key_names(table_name) table = Table(table_name.lower()) for row in describe_rows: col_name, data_type = row[:2] col_type = self.parse_col_desc(data_type) col = self.create_column(col_name, col_type) col.is_primary_key = col_name in primary_key_names table.add_col(col) return table
def FakeTable(name, fake_columns): """ Return a Table consisting of one or more FakeColumns. Because Columns are added via method, we support nesting here instead. """ table = Table(name) if not fake_columns: raise Exception('You must supply at least one FakeColumn argument') for fake_column in fake_columns: table.add_col(fake_column) return table
def FakeTable(name, fake_columns, storage_format='TEXTFILE'): """ Return a Table consisting of one or more FakeColumns. Because Columns are added via method, we support nesting here instead. """ table = Table(name) if not fake_columns: raise Exception('You must supply at least one FakeColumn argument') for fake_column in fake_columns: table.add_col(fake_column) table.storage_format = storage_format return table
def test_table_model(self, cursor, hive_cursor): table = Table("some_test_table") cursor.drop_table(table.name, if_exists=True) table.storage_format = 'textfile' table.add_col(Column(table, "bigint_col", BigInt)) table.add_col(Column(table, "string_col", String)) cursor.create_table(table) try: other = hive_cursor.describe_table(table.name) assert other.name == table.name assert other.cols == table.cols finally: cursor.drop_table(table.name)
def test_hive_create_equality_only_joins(): """ Tests that QueryGenerator produces a join condition with only equality functions if the HiveProfile is used. """ class FakeHiveQueryProfile(HiveProfile): """ A fake QueryProfile that extends the HiveProfile, various weights are modified in order to ensure that this test is deterministic. """ def choose_join_condition_count(self): """ There should be only one operator in the JOIN condition """ return 1 def choose_conjunct_disjunct_fill_ratio(self): """ There should be no AND or OR operators """ return 0 def choose_relational_func_fill_ratio(self): """ Force all operators to be relational """ return 1 query_generator = QueryGenerator(FakeHiveQueryProfile()) # Create two tables that have one joinable Column right_table_expr_list = TableExprList() right_table = Table("right_table") right_table.add_col(Column("right_table", "right_col", Int)) right_table_expr_list.append(right_table) left_table_expr_list = TableExprList() left_table = Table("left_table") left_table.add_col(Column("left_table", "left_col", Int)) left_table_expr_list.append(left_table) # Validate the root predicate is an Equals funcs assert isinstance(query_generator._create_relational_join_condition( right_table_expr_list, left_table_expr_list), Equals)
def test_use_nested_width_subquery(): """ Tests that setting DefaultProfile.use_nested_with to False works properly. Setting this method to return False should prevent a WITH clause from being used inside a sub-query. """ class MockQueryProfile(DefaultProfile): """ A mock QueryProfile that sets use_nested_with to False and forces the QueryGenerator to created nested queries. """ def __init__(self): super(MockQueryProfile, self).__init__() # Force the QueryGenerator to create nested queries self._bounds['MAX_NESTED_QUERY_COUNT'] = (4, 4) # Force the QueryGenerator to use WITH clauses whenever possible self._probabilities['OPTIONAL_QUERY_CLAUSES']['WITH'] = 1 # Force the QueryGenerator to create inline views whenever possible self._probabilities['MISC']['INLINE_VIEW'] = 1 def use_nested_with(self): return False mock_query_gen = QueryGenerator(MockQueryProfile()) # Create two tables table_expr_list = TableExprList() right_table = Table("right_table") right_table.add_col(Column("right_table", "right_col", Int)) table_expr_list.append(right_table) left_table = Table("left_table") left_table.add_col(Column("left_table", "left_col", Int)) table_expr_list.append(left_table) # Check that each nested_query doesn't have a with clause for nested_query in mock_query_gen.create_query(table_expr_list).nested_queries: assert nested_query.with_clause is None
def test_use_nested_width_subquery(): """ Tests that setting DefaultProfile.use_nested_with to False works properly. Setting this method to return False should prevent a WITH clause from being used inside a sub-query. """ class MockQueryProfile(DefaultProfile): """ A mock QueryProfile that sets use_nested_with to False and forces the QueryGenerator to created nested queries. """ def __init__(self): super(MockQueryProfile, self).__init__() # Force the QueryGenerator to create nested queries self._bounds['MAX_NESTED_QUERY_COUNT'] = (4, 4) # Force the QueryGenerator to use WITH clauses whenever possible self._probabilities['OPTIONAL_QUERY_CLAUSES']['WITH'] = 1 # Force the QueryGenerator to create inline views whenever possible self._probabilities['MISC']['INLINE_VIEW'] = 1 def use_nested_with(self): return False mock_query_gen = QueryGenerator(MockQueryProfile()) # Create two tables table_expr_list = TableExprList() right_table = Table("right_table") right_table.add_col(Column("right_table", "right_col", Int)) table_expr_list.append(right_table) left_table = Table("left_table") left_table.add_col(Column("left_table", "left_col", Int)) table_expr_list.append(left_table) # Check that each nested_query doesn't have a with clause for nested_query in mock_query_gen.generate_statement(table_expr_list).nested_queries: assert nested_query.with_clause is None