def test_compound_join_different_length(self): """Test compound (multi-column) join key""" joins = [ to_join(( { "table": "test", "column": ["category", "category"] }, { "table":"dim_category", "column": ["category"] })) ] mappings = { "category": Column(None, "test", "category", None, None), "amount": Column(None, "test", "amount", None, None), "category_label": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) # Doe we have the joined table in the table list? with self.assertRaisesRegex(ModelError, "different number"): schema.get_star(["category_label"])
def test_compound_join_different_length(self): """Test compound (multi-column) join key""" joins = [ to_join(({ "table": "test", "column": ["category", "category"] }, { "table": "dim_category", "column": ["category"] })) ] mappings = { "category": Column(None, "test", "category", None, None), "amount": Column(None, "test", "amount", None, None), "category_label": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) # Doe we have the joined table in the table list? with self.assertRaisesRegex(ModelError, "different number"): schema.get_star(["category_label"])
def test_snowflake_aliased_joins(self): """Test master-detail-detail snowflake chain joins""" joins = [ to_join(("test.category", "dim_category.category", "dim_fruit")), to_join(("dim_fruit.size", "dim_size.size")) ] mappings = { "category": Column(None, "test", "category", None, None), "category_label": Column(None, "dim_fruit", "label", None, None), "size": Column(None, "dim_fruit", "size", None, None), "size_label": Column(None, "dim_size", "label", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) table = schema.table((None, "dim_fruit")) self.assertTrue(table.table.is_derived_from(self.dim_category)) table = schema.table((None, "dim_size")) self.assertTrue(table.table.is_derived_from(self.dim_size)) # Check columns self.assertColumnEqual(schema.column("size_label"), self.dim_size.columns["label"]) # Construct the select for the very last attribute in the snowflake # arm star = schema.get_star(["size_label"]) select = sql.expression.select([schema.column("size_label")], from_obj=star) result = self.engine.execute(select) sizes = [r["size_label"] for r in result] self.assertCountEqual(sizes, ["medium", "small", "large", "small"])
def test_snowflake_joins(self): """Test master-detail-detail snowflake chain joins""" joins = [ to_join(("test.category", "dim_category.category")), to_join(("dim_category.size", "dim_size.size")), ] mappings = { "category": Column(None, "test", "category", None, None), "category_label": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), "size_label": Column(None, "dim_size", "label", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) # Construct the select for the very last attribute in the snowflake # arm # star = schema.star(["category_label", "size_label"]) star = schema.get_star(["size_label", "category_label"]) select = sql.expression.select([schema.column("size_label")], from_obj=star) result = self.engine.execute(select) sizes = [r["size_label"] for r in result] self.assertCountEqual(sizes, ["medium", "small", "large", "small"])
def test_compound_join_key(self): """Test compound (multi-column) join key""" joins = [ to_join(( { "table": "test", "column": ["category", "category"] }, { "table":"dim_category", "column": ["category", "category"] })) ] mappings = { "category": Column(None, "test", "category", None, None), "amount": Column(None, "test", "amount", None, None), "category_label": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) # Doe we have the joined table in the table list? table = schema.table((None, "dim_category")) self.assertEqual(table.table, self.dim_category) tables = schema.required_tables(["category"]) self.assertEqual(len(tables), 1) tables = schema.required_tables(["amount"]) self.assertEqual(len(tables), 1) # Check columns self.assertColumnEqual(schema.column("category"), self.fact.columns["category"]) self.assertColumnEqual(schema.column("category_label"), self.dim_category.columns["label"]) self.assertColumnEqual(schema.column("size"), self.dim_category.columns["size"]) schema.get_star(["category_label"])
def test_compound_join_key(self): """Test compound (multi-column) join key""" joins = [ to_join(({ "table": "test", "column": ["category", "category"] }, { "table": "dim_category", "column": ["category", "category"] })) ] mappings = { "category": Column(None, "test", "category", None, None), "amount": Column(None, "test", "amount", None, None), "category_label": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) # Doe we have the joined table in the table list? table = schema.table((None, "dim_category")) self.assertEqual(table.table, self.dim_category) tables = schema.required_tables(["category"]) self.assertEqual(len(tables), 1) tables = schema.required_tables(["amount"]) self.assertEqual(len(tables), 1) # Check columns self.assertColumnEqual(schema.column("category"), self.fact.columns["category"]) self.assertColumnEqual(schema.column("category_label"), self.dim_category.columns["label"]) self.assertColumnEqual(schema.column("size"), self.dim_category.columns["size"]) schema.get_star(["category_label"])
def test_statement_table(self): """Test using a statement as a table""" joins = [ to_join(("test.category", "dim_category.category")) ] mappings = { "code": Column(None, "test", "category", None, None), "fruit": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), } fact_statement = sa.select(self.fact.columns, from_obj=self.fact, whereclause=self.fact.c.category == 'A') cat_statement = sa.select(self.dim_category.columns, from_obj=self.dim_category, whereclause=self.dim_category.c.category == 'A') tables = { "dim_category": cat_statement } with self.assertRaisesRegex(ArgumentError, "requires alias"): StarSchema("star", self.md, mappings, fact=fact_statement, tables=tables, joins=joins) tables = { "dim_category": cat_statement.alias("dim_category") } schema = StarSchema("star", self.md, mappings, fact=fact_statement.alias("test"), tables=tables, joins=joins) star = schema.get_star(["size"]) selection = [schema.column("size")] select = sql.expression.select(selection, from_obj=star) result = self.engine.execute(select) sizes = [r["size"] for r in result] self.assertCountEqual(sizes, [2])
def test_statement_table(self): """Test using a statement as a table""" joins = [to_join(("test.category", "dim_category.category"))] mappings = { "code": Column(None, "test", "category", None, None), "fruit": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), } fact_statement = sa.select(self.fact.columns, from_obj=self.fact, whereclause=self.fact.c.category == 'A') cat_statement = sa.select( self.dim_category.columns, from_obj=self.dim_category, whereclause=self.dim_category.c.category == 'A') tables = {"dim_category": cat_statement} with self.assertRaisesRegex(ArgumentError, "requires alias"): StarSchema("star", self.md, mappings, fact=fact_statement, tables=tables, joins=joins) tables = {"dim_category": cat_statement.alias("dim_category")} schema = StarSchema("star", self.md, mappings, fact=fact_statement.alias("test"), tables=tables, joins=joins) star = schema.get_star(["size"]) selection = [schema.column("size")] select = sql.expression.select(selection, from_obj=star) result = self.engine.execute(select) sizes = [r["size"] for r in result] self.assertCountEqual(sizes, [2])
def test_fact_is_included(self): """Test whether the fact will be included in the star schema """ joins = [ to_join(("test.category", "dim_category.category", "dim_fruit")) ] mappings = { "code": Column(None, "test", "category", None, None), "fruit": Column(None, "dim_fruit", "label", None, None), "size": Column(None, "dim_fruit", "size", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) star = schema.get_star(["size"]) selection = [schema.column("size")] select = sql.expression.select(selection, from_obj=star) result = self.engine.execute(select) sizes = [r["size"] for r in result] self.assertCountEqual(sizes, [2, 1, 4, 1])
def test_join_alias(self): """Test single aliased join, test two joins on same table, one aliased """ joins = [ to_join(("test.category", "dim_category.category", "dim_fruit")) ] mappings = { "code": Column(None, "test", "category", None, None), "fruit": Column(None, "dim_fruit", "label", None, None), "size": Column(None, "dim_fruit", "size", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) # Doe we have the joined table in the table list? table = schema.table((None, "dim_fruit")) self.assertTrue(table.table.is_derived_from(self.dim_category)) tables = schema.required_tables(["fruit"]) self.assertEqual(len(tables), 2) # Check columns self.assertColumnEqual(schema.column("code"), self.fact.columns["category"]) self.assertColumnEqual(schema.column("fruit"), self.dim_category.columns["label"]) self.assertColumnEqual(schema.column("size"), self.dim_category.columns["size"]) # Check selectable statement star = schema.get_star(["code", "size"]) selection = [schema.column("code"), schema.column("size")] select = sql.expression.select(selection, from_obj=star) result = self.engine.execute(select) sizes = [r["size"] for r in result] self.assertCountEqual(sizes, [2, 1, 4, 1])
def test_star_basic(self): """Test selection from the very basic star – no joins, just one table""" mappings = { "category": Column(None, "test", "category", None, None), "total": Column(None, "test", "amount", None, None), "year": Column(None, "test", "date", "year", None), } schema = StarSchema("star", self.md, mappings, self.test_fact) star = schema.get_star(["category", "total"]) selection = [schema.column("category"), schema.column("total")] statement = sql.expression.select(selection, from_obj=star) result = self.engine.execute(statement) amounts = [] for row in result: # We are testing proper column labeling amounts.append(row["total"]) self.assertCountEqual(amounts, [1, 2, 4, 8])