def setUp(self): super(MyriaLPlatformTestHarness, self).setUp() self.tables = {} for name in ['R', 'S', 'T', 'I', 'D', 'C']: for width in [1, 2, 3]: tablename = "%s%d" % (name, width) fullname = "public:adhoc:%s" % tablename self.tables[tablename] = fullname if name == 'D': rest_type = types.DOUBLE_TYPE elif name == 'C': rest_type = types.STRING_TYPE else: rest_type = types.LONG_TYPE one = [("a", types.LONG_TYPE)] two = one + [("b", rest_type)] three = two + [("c", rest_type)] # ingest fake data; data is already generated separately for # now if width == 1: self.db.ingest(fullname, Counter(), scheme.Scheme(one)) elif width == 2: self.db.ingest(fullname, Counter(), scheme.Scheme(two)) else: self.db.ingest(fullname, Counter(), scheme.Scheme(three))
class PageRankTest(myrial_test.MyrialTestCase): edge_table = collections.Counter([(0, 3), (1, 0), (1, 2), (2, 0), (2, 1), (2, 3), (3, 0), (3, 1), (3, 2), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3), (4, 4)]) edge_schema = scheme.Scheme([("src", types.LONG_TYPE), ("dst", types.LONG_TYPE)]) edge_key = "public:adhoc:edges" vertex_table = collections.Counter([(x, ) for x in range(5)]) vertex_key = "public:adhoc:vertices" vertex_schema = scheme.Scheme([("id", types.LONG_TYPE)]) def setUp(self): super(PageRankTest, self).setUp() self.db.ingest(PageRankTest.edge_key, PageRankTest.edge_table, PageRankTest.edge_schema) self.db.ingest(PageRankTest.vertex_key, PageRankTest.vertex_table, PageRankTest.vertex_schema) def __do_test(self, phile): with open(phile) as fh: query = fh.read() result = self.execute_query(query) d = dict(result.elements()) self.assertAlmostEqual(d[0], 0.23576110832410296) self.assertAlmostEqual(d[1], 0.16544845649781043) self.assertAlmostEqual(d[2], 0.18370688939571236) self.assertAlmostEqual(d[3], 0.3016893082129546) self.assertAlmostEqual(d[4], 0.11339423756941983) def test_pagerank(self): self.__do_test('examples/pagerank.myl') def verify_undefined(self, var): with self.assertRaises(KeyError): self.db.get_temp_table(var) def test_pagerank_deadcode(self): """Test of page rank with numerous dead code statements.""" self.__do_test('examples/pagerank_dead.myl') # Verify that D0, D1 tables are compiled out self.verify_undefined("D0") self.verify_undefined("D1")
def test_simple_scheme(self): sch = scheme.Scheme() sch.addAttribute('w', types.FLOAT_TYPE) sch.addAttribute('x', types.INT_TYPE) sch.addAttribute('y', types.LONG_TYPE) sch.addAttribute('z', types.STRING_TYPE) self.db.add_table('simple', sch) sch2 = self.db.get_scheme('simple') self.assertEquals(sch, sch2)
class FakeData(object): emp_table = collections.Counter([ # id dept_id name salary (1, 2, "Bill Howe", 25000), (2, 1, "Dan Halperin", 90000), (3, 1, "Andrew Whitaker", 5000), (4, 2, "Shumo Chu", 5000), (5, 1, "Victor Almeida", 25000), (6, 3, "Dan Suciu", 90000), (7, 1, "Magdalena Balazinska", 25000) ]) emp_schema = scheme.Scheme([("id", types.INT_TYPE), ("dept_id", types.INT_TYPE), ("name", types.STRING_TYPE), ("salary", types.LONG_TYPE)]) emp_key = "public:adhoc:employee" dept_table = collections.Counter([(1, "accounting", 5), (2, "human resources", 2), (3, "engineering", 2), (4, "sales", 7)]) dept_schema = scheme.Scheme([("id", types.LONG_TYPE), ("name", types.STRING_TYPE), ("manager", types.LONG_TYPE)]) dept_key = "public:adhoc:department" numbers_table = collections.Counter([(1, 3), (2, 5), (3, -2), (16, -4.3)]) numbers_schema = scheme.Scheme([("id", types.LONG_TYPE), ("val", types.DOUBLE_TYPE)]) numbers_key = "public:adhoc:numbers" test_function = ("test", "function_text", 1, "id (INT_TYPE), dept_id (INT_TYPE)", "INT_TYPE", "test_body")
def get_scheme(self, rel_key): relation_args = { 'userName': rel_key.user, 'programName': rel_key.program, 'relationName': rel_key.relation } if not self.connection: raise ValueError( "no schema for relation %s because no connection" % rel_key) try: dataset_info = self.connection.dataset(relation_args) except myria.MyriaError: raise ValueError(rel_key) schema = dataset_info['schema'] return scheme.Scheme(zip(schema['columnNames'], schema['columnTypes']))
class SQLTestCase(unittest.TestCase): """A base for testing the compilation of RACO programs to SQL queries""" emp_table = [ # id dept_id name salary (0, 1, "Hank Levy", 1000000, -1), (1, 2, "Bill Howe", 25000, 0), (2, 1, "Dan Halperin", 90000, 0), (3, 1, "Andrew Whitaker", 5000, 0), (4, 2, "Shumo Chu", 5000, 0), (5, 1, "Victor Almeida", 25000, 0), (6, 3, "Dan Suciu", 90000, 0), (7, 1, "Magdalena Balazinska", 25000, 0) ] emp_schema = scheme.Scheme([("id", types.INT_TYPE), ("dept_id", types.INT_TYPE), ("name", types.STRING_TYPE), ("salary", types.LONG_TYPE), ("mgr_id", types.INT_TYPE)]) emp_key = "public:adhoc:employee" def setUp(self): # SQLAlchemy self.db = SQLCatalog( sqlalchemy.create_engine('sqlite:///:memory:', echo=True)) self.db.add_table(self.emp_key, self.emp_schema) self.db.add_tuples(self.emp_key, self.emp_schema, self.emp_table) # MyriaL self.parser = parser.Parser() self.processor = interpreter.StatementProcessor(self.db) def query_to_phys_plan(self, query, **kwargs): statements = self.parser.parse(query) self.processor.evaluate(statements) p = self.processor.get_logical_plan(**kwargs) p = optimize_by_rules(p, OptLogicalAlgebra.opt_rules()) if isinstance(p, (algebra.Store, algebra.StoreTemp)): p = p.input return p def execute(self, query, expected, **kwargs): p = self.query_to_phys_plan(query, **kwargs) ans = self.db.evaluate(p) self.assertEquals(expected, Counter(ans))
class KmeansTest(myrial_test.MyrialTestCase): points = [(1, 1.0, 1.0), (2, .99, .99), (3, 1.01, 1.01), (4, 10.0, 10.0), (5, 10.99, 10.99), (6, 10.01, 10.01), (7, 100.0, 100.0), (8, 100.99, 100.99), (9, 100.01, 100.01)] points_table = collections.Counter(points) points_schema = scheme.Scheme([('id', types.LONG_TYPE), ('x', types.DOUBLE_TYPE), ('y', types.DOUBLE_TYPE)]) points_key = "public:adhoc:points" def setUp(self): super(KmeansTest, self).setUp() self.db.ingest(KmeansTest.points_key, KmeansTest.points_table, KmeansTest.points_schema) def test_kmeans(self): with open('examples/kmeans.myl') as fh: query = fh.read() self.execute_query(query, skip_json=True)
class SigmaClippingTest(myrial_test.MyrialTestCase): points = [ 25.0, 27.2, 23.4, 25.1, 26.3, 24.9, 23.5, 22.7, 108.2, 26.2, 25.3, 24.7, 25.01, 26.1, 22.8, 2.2, 24.8, 25.05, 25.15 ] points_tuples = [(i, x) for i, x in enumerate(points)] points_table = collections.Counter(points_tuples) points_schema = scheme.Scheme([('id', types.LONG_TYPE), ('v', types.DOUBLE_TYPE)]) # noqa points_key = "public:adhoc:sc_points" def setUp(self): super(SigmaClippingTest, self).setUp() self.db.ingest(SigmaClippingTest.points_key, SigmaClippingTest.points_table, SigmaClippingTest.points_schema) # TODO: Better support for empty relations in the language self.db.ingest("empty", collections.Counter(), SigmaClippingTest.points_schema) def run_it(self, query): points = [(i, x) for i, x in self.points_tuples if x < 28 and x > 22] expected = collections.Counter(points) self.check_result(query, expected, output='sc_points_clipped') def test_v0(self): with open('examples/sigma-clipping-v0.myl') as fh: query = fh.read() self.run_it(query) def test_v2(self): with open('examples/sigma-clipping.myl') as fh: query = fh.read() self.run_it(query)
class ReachableTest(myrial_test.MyrialTestCase): edge_table = collections.Counter([ (1, 2), (2, 3), (3, 4), (4, 3), (3, 5), (4, 13), (5, 4), (1, 9), (7, 1), (6, 1), (10, 11), (11, 12), (12, 10), (13, 4), (10, 1)]) edge_schema = scheme.Scheme([("src", types.LONG_TYPE), ("dst", types.LONG_TYPE)]) edge_key = "public:adhoc:edges" def setUp(self): super(ReachableTest, self).setUp() self.db.ingest(ReachableTest.edge_key, ReachableTest.edge_table, ReachableTest.edge_schema) def test_reachable(self): with open('examples/reachable.myl') as fh: query = fh.read() expected = collections.Counter([ (1,), (2,), (3,), (4,), (5,), (9,), (13,), ]) self.check_result(query, expected, skip_json=True) def test_multi_condition_join(self): query = """ Edge = SCAN(public:adhoc:edges); Symmetric = [FROM Edge AS E1, Edge AS E2 WHERE E1.src==E2.dst AND E2.src==E1.dst AND E1.src < E1.dst EMIT E1.src AS src, E1.dst AS dst]; STORE(Symmetric, OUTPUT); """ table = ReachableTest.edge_table expected = collections.Counter( [(a, b) for (a, b) in table for (c, d) in table if a == d and b == c and a < b]) self.check_result(query, expected) def test_cross_plus_selection_becomes_join(self): """Test that the optimizer compiles away cross-products.""" with open('examples/reachable.myl') as fh: query = fh.read() def plan_contains_cross(plan): def f(op): if isinstance(op, raco.algebra.CrossProduct) and not \ isinstance(op.left, raco.algebra.SingletonRelation): yield True return any(plan.postorder(f)) statements = self.parser.parse(query) self.processor.evaluate(statements) lp = self.processor.get_logical_plan() self.assertTrue(plan_contains_cross(lp)) pp = self.processor.get_physical_plan() self.assertFalse(plan_contains_cross(pp))
def p_expression_load(p): 'expression : LOAD LPAREN STRING_LITERAL COMMA file_parser_fun RPAREN' format, schema, options = p[5] p[0] = ('LOAD', p[3], format, scheme.Scheme(schema), options)
def p_expression_empty(p): 'expression : EMPTY LPAREN column_def_list RPAREN' p[0] = ('EMPTY', scheme.Scheme(p[3]))
class SetopTestFunctions(myrial_test.MyrialTestCase): emp_table1 = collections.Counter([ (1, 2, "Bill Howe", 25000), (1, 2, "Bill Howe", 25000), (2, 1, "Dan Halperin", 90000), (3, 1, "Andrew Whitaker", 5000), (3, 1, "Andrew Whitaker", 5000), (4, 2, "Shumo Chu", 5000), (5, 1, "Victor Almeida", 25000), (6, 3, "Dan Suciu", 90000), (7, 1, "Magdalena Balazinska", 25000)]) emp_key1 = "andrew:adhoc:employee1" emp_table2 = collections.Counter([ (1, 2, "Bill Howe", 25000), (7, 1, "Magdalena Balazinska", 25000), (7, 1, "Magdalena Balazinska", 25000), (8, 2, "JingJing Wang", 47000)]) emp_key2 = "andrew:adhoc:employee2" emp_schema = scheme.Scheme([("id", types.LONG_TYPE), ("dept_id", types.LONG_TYPE), ("name", types.STRING_TYPE), ("salary", types.LONG_TYPE)]) def setUp(self): super(SetopTestFunctions, self).setUp() self.db.ingest(SetopTestFunctions.emp_key1, SetopTestFunctions.emp_table1, SetopTestFunctions.emp_schema) self.db.ingest(SetopTestFunctions.emp_key2, SetopTestFunctions.emp_table2, SetopTestFunctions.emp_schema) def test_unionall(self): query = """ out = SCAN(%s) + SCAN(%s); STORE(out, OUTPUT); """ % (self.emp_key1, self.emp_key2) expected = self.emp_table1 + self.emp_table2 self.check_result(query, expected) def test_union_schema_mismatch(self): query = """ T1 = [FROM SCAN(%s) AS X EMIT id, dept_id, name, salary, 7 as seven]; out = UNION(T1, SCAN(%s)); STORE(out, OUTPUT); """ % (self.emp_key1, self.emp_key2) with self.assertRaises(SchemaMismatchException): self.get_logical_plan(query) def test_unionall_inline(self): query = """ out = SCAN(%s) + SCAN(%s); STORE(out, OUTPUT); """ % (self.emp_key1, self.emp_key2) expected = self.emp_table1 + self.emp_table2 self.check_result(query, expected) def test_unionall_inline_ternary(self): query = """ out = SCAN(%s) + [FROM SCAN(%s) AS X EMIT *] + SCAN(%s); STORE(out, OUTPUT); """ % (self.emp_key1, self.emp_key1, self.emp_key1) expected = self.emp_table1 + self.emp_table1 + self.emp_table1 self.check_result(query, expected) def test_diff1(self): query = """ out = DIFF(SCAN(%s), SCAN(%s)); STORE(out, OUTPUT); """ % (self.emp_key1, self.emp_key2) expected = collections.Counter( set(self.emp_table1).difference(set(self.emp_table2))) self.check_result(query, expected) def test_diff2(self): query = """ out = DIFF(SCAN(%s), SCAN(%s)); STORE(out, OUTPUT); """ % (self.emp_key2, self.emp_key1) expected = collections.Counter( set(self.emp_table2).difference(set(self.emp_table1))) self.check_result(query, expected) def test_diff_schema_mismatch(self): query = """ T1 = [FROM SCAN(%s) AS X EMIT id, dept_id, name]; out = DIFF(SCAN(%s), T1); STORE(out, OUTPUT); """ % (self.emp_key1, self.emp_key2) with self.assertRaises(SchemaMismatchException): self.get_logical_plan(query) def test_diff_while_schema_mismatch(self): query = """ Orig = [2 as x]; T1 = [2 as x]; do Bad = diff(T1, Orig); T1 = [3 as x, 3 as y]; while [from Bad emit count(*) > 0]; store(T1, OUTPUT); """ with self.assertRaises(SchemaMismatchException): # TODO Even if executed, this test does not throw exception self.get_logical_plan(query) def test_diff_while_schema_mismatch2(self): query = """ Orig = [2 as x]; T1 = [3 as x]; do Bad = diff(T1, Orig); T1 = [3 as x, 3 as y]; while [from Bad emit count(*) > 0]; store(T1, OUTPUT); """ with self.assertRaises(SchemaMismatchException): # TODO If executed, this test loops infinitely self.get_logical_plan(query) def test_intersect1(self): query = """ out = INTERSECT(SCAN(%s), SCAN(%s)); STORE(out, OUTPUT); """ % (self.emp_key1, self.emp_key2) expected = collections.Counter( set(self.emp_table2).intersection(set(self.emp_table1))) self.check_result(query, expected, skip_json=True) def test_intersect2(self): query = """ out = INTERSECT(SCAN(%s), SCAN(%s)); STORE(out, OUTPUT); """ % (self.emp_key2, self.emp_key1) expected = collections.Counter( set(self.emp_table1).intersection(set(self.emp_table2))) self.check_result(query, expected, skip_json=True) def test_intersect_schema_mismatch(self): query = """ T1 = [FROM SCAN(%s) AS X EMIT id, dept_id, name]; out = INTERSECT(T1, SCAN(%s)); STORE(out, OUTPUT); """ % (self.emp_key1, self.emp_key2) with self.assertRaises(SchemaMismatchException): self.get_logical_plan(query)
class TestQueryFunctions(datalog_test.DatalogTestCase): emp_table = collections.Counter([ # id dept_id name salary (1, 2, "Bill Howe", 25000), (2, 1, "Dan Halperin", 90000), (3, 1, "Andrew Whitaker", 5000), (4, 2, "Shumo Chu", 5000), (5, 1, "Victor Almeida", 25000), (6, 3, "Dan Suciu", 90000), (7, 1, "Magdalena Balazinska", 25000) ]) emp_schema = scheme.Scheme([("id", types.LONG_TYPE), ("dept_id", types.LONG_TYPE), ("name", types.STRING_TYPE), ("salary", types.LONG_TYPE)]) emp_key = "employee" dept_table = collections.Counter([(1, "accounting", 5), (2, "human resources", 2), (3, "engineering", 2), (4, "sales", 7)]) dept_schema = scheme.Scheme([("id", types.LONG_TYPE), ("name", types.STRING_TYPE), ("manager", types.LONG_TYPE)]) dept_key = "department" edge_table = collections.Counter([(1, 2), (2, 3), (3, 4), (4, 3), (3, 5), (4, 13), (5, 4), (1, 9), (7, 1), (6, 1), (10, 11), (11, 12), (12, 10), (13, 4), (10, 1)]) edge_schema = scheme.Scheme([("src", types.LONG_TYPE), ("dst", types.LONG_TYPE)]) edge_key = "Edge" def setUp(self): super(TestQueryFunctions, self).setUp() self.db.ingest(TestQueryFunctions.emp_key, TestQueryFunctions.emp_table, TestQueryFunctions.emp_schema) self.db.ingest(TestQueryFunctions.dept_key, TestQueryFunctions.dept_table, TestQueryFunctions.dept_schema) self.db.ingest(TestQueryFunctions.edge_key, TestQueryFunctions.edge_table, TestQueryFunctions.edge_schema) def test_simple_join(self): expected = collections.Counter([(e[2], d[1]) for e in self.emp_table.elements() for d in self.dept_table.elements() if e[1] == d[0]]) query = """ EmpDepts(emp_name, dept_name) :- employee(a, dept_id, emp_name, b), department(dept_id, dept_name, c) """ self.check_result(query, expected, output='EmpDepts') def test_filter(self): query = """ RichGuys(name) :- employee(a, b, name, salary), salary > 25000 """ expected = collections.Counter([ (x[2], ) for x in TestQueryFunctions.emp_table.elements() if x[3] > 25000 ]) self.check_result(query, expected, output='RichGuys') def test_count(self): query = """ OutDegree(src, count(dst)) :- Edge(src, dst) """ counter = collections.Counter() for (src, _) in self.edge_table.elements(): counter[src] += 1 ex = [(src, cnt) for src, cnt in counter.iteritems()] expected = collections.Counter(ex) self.check_result(query, expected, output='OutDegree') def test_sum_reorder(self): query = """ SalaryByDept(sum(salary), dept_id) :- employee(id, dept_id, name, salary);""" # noqa results = collections.Counter() for emp in self.emp_table.elements(): results[emp[1]] += emp[3] expected = collections.Counter([(y, x) for x, y in results.iteritems() ]) # noqa self.check_result(query, expected, output='SalaryByDept') def test_aggregate_no_groups(self): query = """ Total(count(x)) :- Edge(x, y) """ expected = collections.Counter([(len(self.edge_table), )]) self.check_result(query, expected, output='Total') def test_multiway_join_chained(self): query = """ OneHop(x) :- Edge(1, x); TwoHop(x) :- OneHop(y), Edge(y, x); ThreeHop(x) :- TwoHop(y), Edge(y, x) """ expected = collections.Counter([(4, ), (5, )]) self.check_result(query, expected, output='ThreeHop') def test_triangles(self): # TODO. Right now we have do this separately so that the x<y and y<z # conditions are not put in the Join, rather rendered as Selects. # Myrialang barfs on theta-joins. query = """ T(x,y,z) :- Edge(x,y), Edge(y,z), Edge(z,x); A(x,y,z) :- T(x,y,z), x < y, x < z. """ expected = collections.Counter([(3, 5, 4), (10, 11, 12)]) self.check_result(query, expected, output='A') def test_multiway_join(self): query = """ ThreeHop(z) :- Edge(1, x), Edge(x,y), Edge(y, z); """ expected = collections.Counter([(4, ), (5, )]) self.check_result(query, expected, output='ThreeHop') def test_multiway_join_hyper_cube(self): query = """ ThreeHop(z) :- Edge(1, x), Edge(x,y), Edge(y, z); """ expected = collections.Counter([(4, ), (5, )]) self.check_result(query, expected, output='ThreeHop', algebra=MyriaHyperCubeAlgebra) def test_union(self): query = """ OUTPUT(b) :- {emp}(a, b, c, d) OUTPUT(b) :- {edge}(b, a) """.format(emp=self.emp_key, edge=self.edge_key) expected = collections.Counter([(b, ) for (a, b, c, d) in self.emp_table] + [(b, ) for (b, a) in self.edge_table]) self.check_result(query, expected, test_logical=True) def test_filter_expression(self): query = """ OUTPUT(a, b, c) :- {emp}(a, b, c, d), d >= 25000, d < 91000 """.format(emp=self.emp_key) expected = collections.Counter([(a, b, c) for (a, b, c, d) in self.emp_table if (d >= 25000 and d < 91000)]) self.check_result(query, expected) def test_attributes_forward(self): """test that attributes are correct amid multiple conditions""" query = """ OUTPUT(a) :- {edge}(a, b), {emp}(c, a, x, y), b=c """.format(emp=self.emp_key, edge=self.edge_key) expected = collections.Counter([(a, ) for (a, b) in self.edge_table for (c, a2, x, y) in self.emp_table if (a == a2 and b == c)]) self.check_result(query, expected) def test_attributes_reverse(self): """test that attributes are correct amid multiple conditions and when the order of variables in the terms is the opposite of the explicit condition""" query = """ OUTPUT(a) :- {edge}(a, b), {emp}(c, a, x, y), c=b """.format(emp=self.emp_key, edge=self.edge_key) expected = collections.Counter([(a, ) for (a, b) in self.edge_table for (c, a2, x, y) in self.emp_table if (a == a2 and b == c)]) self.check_result(query, expected) def test_apply_head(self): query = """ OUTPUT(a/b) :- {emp}(a, b, c, d) """.format(emp=self.emp_key) expected = collections.Counter([(a * 1.0 / b, ) for (a, b, _, _) in self.emp_table]) self.check_result(query, expected) def test_aggregate_head(self): query = """ OUTPUT(SUM(a)) :- {emp}(a, b, c, d) """.format(emp=self.emp_key) expected = collections.Counter([ (sum(a for (a, _, _, _) in self.emp_table), ) ]) self.check_result(query, expected) def test_twoaggregate_head(self): query = """ OUTPUT(SUM(a), COUNT(b)) :- {emp}(a, b, c, d) """.format(emp=self.emp_key) expected = collections.Counter([ (sum(a for (a, _, _, _) in self.emp_table), sum(1 for (_, b, _, _) in self.emp_table)) ]) self.check_result(query, expected) def test_aggregate_head_group_self(self): query = """ OUTPUT(SUM(a), b) :- {emp}(a, b, c, d) """.format(emp=self.emp_key) B = set(b for (_, b, _, _) in self.emp_table) expected = collections.Counter([(sum(a for (a, b, _, _) in self.emp_table if b == b2), b2) for b2 in B]) self.check_result(query, expected) def test_aggregate_head_group_swap(self): query = """ OUTPUT(b,SUM(a)) :- {emp}(a, b, c, d) """.format(emp=self.emp_key) B = set(b for (_, b, _, _) in self.emp_table) expected = collections.Counter([(b2, sum(a for (a, b, _, _) in self.emp_table if b == b2)) for b2 in B]) self.check_result(query, expected) def test_binop_aggregates(self): query = """ OUTPUT(SUM(b)+SUM(a)) :- {emp}(a, b, c, d) """.format(emp=self.emp_key) expected = collections.Counter([ (sum(b for (a, b, _, _) in self.emp_table) + sum(a for (a, b, _, _) in self.emp_table), ) ]) self.check_result(query, expected) def test_aggregate_of_binop(self): query = """ OUTPUT(SUM(b+a)) :- {emp}(a, b, c, d) """.format(emp=self.emp_key) expected = collections.Counter([ (sum([(a + b) for (a, b, c, d) in self.emp_table]), ) ]) self.check_result(query, expected) def test_literal_expr(self): query = """ OUTPUT(z+1) :- Edge(z, y) """ expected = collections.Counter([(z + 1, ) for (z, _) in self.edge_table]) self.check_result(query, expected)
class CFGTest(myrial_test.MyrialTestCase): points_table = collections.Counter() points_schema = scheme.Scheme([('id', types.LONG_TYPE), ('x', types.DOUBLE_TYPE), ('y', types.DOUBLE_TYPE)]) points_key = "public:adhoc:points" def setUp(self): super(CFGTest, self).setUp() self.db.ingest(CFGTest.points_key, CFGTest.points_table, CFGTest.points_schema) def test_cfg(self): query = """ Point = SCAN(public:adhoc:points); DO Big = [FROM Point WHERE x * y > 100 EMIT *]; Continue = [FROM Big, Point EMIT COUNT(*) > 0 AS cnt]; WHILE Continue; STORE(Big, OUTPUT); """ statements = self.parser.parse(query) self.processor.evaluate(statements) expected = nx.DiGraph() expected.add_node(0, def_var="Point", uses=set()) expected.add_node(1, def_var="Big", uses={"Point"}) expected.add_node(2, def_var="Continue", uses={"Big", "Point"}) expected.add_node(3, def_var=None, uses={"Continue"}) expected.add_node(4, def_var=None, uses={"Big"}) for i in range(4): expected.add_edge(i, i + 1) expected.add_edge(3, 1) actual = self.processor.cfg.graph self.assertEquals(actual.adj, expected.adj) self.assertEquals(len(actual), len(expected)) for n in expected: self.assertIn(n, actual) self.assertEquals(actual.node[n]['uses'], expected.node[n]['uses']) self.assertEquals(actual.node[n]['def_var'], expected.node[n]['def_var']) live_in, live_out = self.processor.cfg.compute_liveness() self.assertEquals( live_out, { 0: {'Point'}, 1: {'Point', 'Big'}, 2: {'Continue', 'Big', 'Point'}, 3: {'Big', 'Point'}, 4: set() }) self.assertEquals( live_in, { 0: set(), 1: {'Point'}, 2: {'Big', 'Point'}, 3: {'Big', 'Point', 'Continue'}, 4: {'Big'} }) def test_dead_code_elim(self): with open('examples/deadcode.myl') as fh: query = fh.read() statements = self.parser.parse(query) self.processor.evaluate(statements) self.assertEquals(set(self.processor.cfg.graph.nodes()), set(range(9))) self.processor.cfg.dead_code_elimination() self.assertEquals(set(self.processor.cfg.graph.nodes()), {2, 6, 7, 8}) def test_bug_245_dead_loop_elim_do_while(self): with open('examples/deadcode2.myl') as fh: query = fh.read() statements = self.parser.parse(query) self.processor.evaluate(statements) self.assertEquals(set(self.processor.cfg.graph.nodes()), set(range(3))) self.processor.cfg.dead_loop_elimination() self.processor.cfg.dead_code_elimination() self.assertEquals(set(self.processor.cfg.graph.nodes()), set()) def test_dead_loop_interior(self): """Test of a dead loop before the end of the program.""" query = """ x = [0 as val, 1 as exp]; y = x; do x = [from x emit val+1 as val, 2*exp as exp]; while [from x emit val < 5]; store(y, OUTPUT); """ statements = self.parser.parse(query) self.processor.evaluate(statements) self.assertEquals(set(self.processor.cfg.graph.nodes()), set(range(5))) self.processor.cfg.dead_loop_elimination() self.processor.cfg.dead_code_elimination() self.assertEquals(set(self.processor.cfg.graph.nodes()), {0, 1, 4}) def test_two_dead_loops(self): """Test of two unrelated dead loops.""" query = """ x = [0 as val, 1 as exp]; y = x; z = y; do x = [from x emit val+1 as val, 2*exp as exp]; while [from x emit val < 5]; do z = [from z emit val+1 as val, 2*exp as exp]; while [from z emit val < 5]; store(y, OUTPUT); """ statements = self.parser.parse(query) self.processor.evaluate(statements) self.assertEquals(set(self.processor.cfg.graph.nodes()), set(range(8))) self.processor.cfg.dead_loop_elimination() self.processor.cfg.dead_code_elimination() self.assertEquals(set(self.processor.cfg.graph.nodes()), {0, 1, 7}) def test_two_dead_loops_samevar(self): """Test that recursive calls to dead_loop_elimination remove repeated dead loops reading/writing the same variable.""" query = """ x = [0 as val, 1 as exp]; y = x; do x = [from x emit val+1 as val, 2*exp as exp]; while [from x emit val < 5]; do x = [from x emit val+1 as val, 2*exp as exp]; while [from x emit val < 5]; store(y, OUTPUT); """ statements = self.parser.parse(query) self.processor.evaluate(statements) self.assertEquals(set(self.processor.cfg.graph.nodes()), set(range(7))) self.processor.cfg.dead_loop_elimination() self.processor.cfg.dead_code_elimination() self.assertEquals(set(self.processor.cfg.graph.nodes()), {0, 1, 6}) def test_chaining(self): query = """ A = SCAN(public:adhoc:points); B = SCAN(public:adhoc:points); C = UNIONALL(A, B); D = DISTINCT(C); E = SCAN(public:adhoc:points); F = DIFF(E, D); G = DISTINCT(F); STORE(G, OUTPUT); """ statements = self.parser.parse(query) self.processor.evaluate(statements) self.assertEquals(len(self.processor.cfg.graph), 8) self.processor.cfg.apply_chaining() self.assertEquals(len(self.processor.cfg.graph), 1) def test_chaining_variable_reuse(self): """Test of chaining with re-used variable names.""" query = """ X = SCAN(public:adhoc:points); Y = SCAN(public:adhoc:points); X = [FROM X, Y WHERE X.x == Y.y EMIT Y.*]; X = DISTINCT(X); STORE(X, OUTPUT); """ statements = self.parser.parse(query) self.processor.evaluate(statements) self.assertEquals(len(self.processor.cfg.graph), 5) self.processor.cfg.apply_chaining() self.assertEquals(self.processor.cfg.graph.nodes(), [4]) self.assertEquals(len(self.processor.cfg.graph.node[4]['uses']), 0) def test_chaining_dead_code_elim(self): query = """ Q = DISTINCT(SCAN(public:adhoc:points)); DO A = SCAN(public:adhoc:points); B = SCAN(public:adhoc:points); P = DISTINCT(A); C = DIFF(A, B); Continue = COUNTALL(C); WHILE Continue; STORE(C, OUTPUT); X = SCAN(public:adhoc:points); """ statements = self.parser.parse(query) self.processor.evaluate(statements) self.assertEquals(len(self.processor.cfg.graph), 9) self.processor.cfg.dead_code_elimination() self.assertEquals(set(self.processor.cfg.graph.nodes()), {1, 2, 4, 5, 6, 7}) self.processor.cfg.apply_chaining() self.assertEquals(set(self.processor.cfg.graph.nodes()), {4, 6, 7})
def get_scheme(self, rel_key): table = self.metadata.tables[str(rel_key)] return scheme.Scheme((c.name, type_to_raco[type(c.type)]) for c in table.columns)