def test_collected_tables_fact_only(self): """Test single table references""" key = (None, "test") star = StarSchema("star", self.md, {}, self.test_fact) ref = star.table(key) self.assertIs(ref.table, self.test_fact) self.assertEqual(ref.name, "test") self.assertEqual(ref.alias, "test") self.assertEqual(ref.key, key) # Test passing fact by name star = StarSchema("star", self.md, {}, "test") ref = star.table(key) self.assertIs(ref.table, self.test_fact) # Test passing fact by name and in a list of tables star = StarSchema("star", self.md, {}, "test", tables={"test": self.test_fact}) ref = star.table(key) self.assertIs(ref.table, self.test_fact) # Table does not exist with self.assertRaises(SchemaError): star.table((None, "imaginary"))
def test_collected_tables_fact_only(self): """Test single table references""" key = (None, "test") star = StarSchema("star", self.md, {}, self.test_fact) ref = star.table(key) self.assertIs(ref.table, self.test_fact) self.assertEqual(ref.name, "test") self.assertEqual(ref.alias, "test") self.assertEqual(ref.key, key) # Test passing fact by name star = StarSchema("star", self.md, {}, "test") ref = star.table(key) self.assertIs(ref.table, self.test_fact) # Test passing fact by name and in a list of tables star = StarSchema("star", self.md, {}, "test", tables = {"test": self.test_fact}) ref = star.table(key) self.assertIs(ref.table, self.test_fact) # Table does not exist with self.assertRaises(SchemaError): star.table((None, "imaginary"))
def test_snowflake_aliased_joins(self): """Test master-detail-detail snowflake chain joins""" joins = [ to_join(("test.category", "dim_category.category", "dim_fruit")), to_join(("dim_fruit.size", "dim_size.size")) ] mappings = { "category": Column(None, "test", "category", None, None), "category_label": Column(None, "dim_fruit", "label", None, None), "size": Column(None, "dim_fruit", "size", None, None), "size_label": Column(None, "dim_size", "label", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) table = schema.table((None, "dim_fruit")) self.assertTrue(table.table.is_derived_from(self.dim_category)) table = schema.table((None, "dim_size")) self.assertTrue(table.table.is_derived_from(self.dim_size)) # Check columns self.assertColumnEqual(schema.column("size_label"), self.dim_size.columns["label"]) # Construct the select for the very last attribute in the snowflake # arm star = schema.get_star(["size_label"]) select = sql.expression.select([schema.column("size_label")], from_obj=star) result = self.engine.execute(select) sizes = [r["size_label"] for r in result] self.assertCountEqual(sizes, ["medium", "small", "large", "small"])
def test_required_tables(self): """Test master-detail-detail snowflake chain joins""" joins = [ to_join(("test.category", "dim_category.category")), to_join(("dim_category.size", "dim_size.size")), ] mappings = { "amount": Column(None, "test", "amount", None, None), "category": Column(None, "test", "category", None, None), "category_label": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), "size_label": Column(None, "dim_size", "label", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) test_table = schema.table((None, "test")) category_table = schema.table((None, "dim_category")) size_table = schema.table((None, "dim_size")) all_tables = [test_table, category_table, size_table] tables = schema.required_tables(["size_label"]) self.assertEqual(len(tables), 3) self.assertCountEqual(tables, all_tables) tables = schema.required_tables(["size_label", "category_label"]) self.assertCountEqual(tables, all_tables) # Swap the attributes – it should return the same order tables = schema.required_tables(["category_label", "size_label"]) self.assertCountEqual(tables, all_tables)
def test_join(self): """Test single join, two joins""" joins = [to_join(("test.category", "dim_category.category"))] mappings = { "category": Column(None, "test", "category", None, None), "amount": Column(None, "test", "amount", None, None), "category_label": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) # Doe we have the joined table in the table list? table = schema.table((None, "dim_category")) self.assertEqual(table.table, self.dim_category) tables = schema.required_tables(["category"]) self.assertEqual(len(tables), 1) tables = schema.required_tables(["amount"]) self.assertEqual(len(tables), 1) # Check columns self.assertColumnEqual(schema.column("category"), self.fact.columns["category"]) self.assertColumnEqual(schema.column("category_label"), self.dim_category.columns["label"]) self.assertColumnEqual(schema.column("size"), self.dim_category.columns["size"])
def test_join(self): """Test single join, two joins""" joins = [ to_join(("test.category", "dim_category.category")) ] mappings = { "category": Column(None, "test", "category", None, None), "amount": Column(None, "test", "amount", None, None), "category_label": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) # Doe we have the joined table in the table list? table = schema.table((None, "dim_category")) self.assertEqual(table.table, self.dim_category) tables = schema.required_tables(["category"]) self.assertEqual(len(tables), 1) tables = schema.required_tables(["amount"]) self.assertEqual(len(tables), 1) # Check columns self.assertColumnEqual(schema.column("category"), self.fact.columns["category"]) self.assertColumnEqual(schema.column("category_label"), self.dim_category.columns["label"]) self.assertColumnEqual(schema.column("size"), self.dim_category.columns["size"])
def test_compound_join_key(self): """Test compound (multi-column) join key""" joins = [ to_join(( { "table": "test", "column": ["category", "category"] }, { "table":"dim_category", "column": ["category", "category"] })) ] mappings = { "category": Column(None, "test", "category", None, None), "amount": Column(None, "test", "amount", None, None), "category_label": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) # Doe we have the joined table in the table list? table = schema.table((None, "dim_category")) self.assertEqual(table.table, self.dim_category) tables = schema.required_tables(["category"]) self.assertEqual(len(tables), 1) tables = schema.required_tables(["amount"]) self.assertEqual(len(tables), 1) # Check columns self.assertColumnEqual(schema.column("category"), self.fact.columns["category"]) self.assertColumnEqual(schema.column("category_label"), self.dim_category.columns["label"]) self.assertColumnEqual(schema.column("size"), self.dim_category.columns["size"]) schema.get_star(["category_label"])
def test_compound_join_key(self): """Test compound (multi-column) join key""" joins = [ to_join(({ "table": "test", "column": ["category", "category"] }, { "table": "dim_category", "column": ["category", "category"] })) ] mappings = { "category": Column(None, "test", "category", None, None), "amount": Column(None, "test", "amount", None, None), "category_label": Column(None, "dim_category", "label", None, None), "size": Column(None, "dim_category", "size", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) # Doe we have the joined table in the table list? table = schema.table((None, "dim_category")) self.assertEqual(table.table, self.dim_category) tables = schema.required_tables(["category"]) self.assertEqual(len(tables), 1) tables = schema.required_tables(["amount"]) self.assertEqual(len(tables), 1) # Check columns self.assertColumnEqual(schema.column("category"), self.fact.columns["category"]) self.assertColumnEqual(schema.column("category_label"), self.dim_category.columns["label"]) self.assertColumnEqual(schema.column("size"), self.dim_category.columns["size"]) schema.get_star(["category_label"])
def test_join_alias(self): """Test single aliased join, test two joins on same table, one aliased """ joins = [ to_join(("test.category", "dim_category.category", "dim_fruit")) ] mappings = { "code": Column(None, "test", "category", None, None), "fruit": Column(None, "dim_fruit", "label", None, None), "size": Column(None, "dim_fruit", "size", None, None), } schema = StarSchema("star", self.md, mappings, self.fact, joins=joins) # Doe we have the joined table in the table list? table = schema.table((None, "dim_fruit")) self.assertTrue(table.table.is_derived_from(self.dim_category)) tables = schema.required_tables(["fruit"]) self.assertEqual(len(tables), 2) # Check columns self.assertColumnEqual(schema.column("code"), self.fact.columns["category"]) self.assertColumnEqual(schema.column("fruit"), self.dim_category.columns["label"]) self.assertColumnEqual(schema.column("size"), self.dim_category.columns["size"]) # Check selectable statement star = schema.get_star(["code", "size"]) selection = [schema.column("code"), schema.column("size")] select = sql.expression.select(selection, from_obj=star) result = self.engine.execute(select) sizes = [r["size"] for r in result] self.assertCountEqual(sizes, [2, 1, 4, 1])