Example #1
0
    def setUp(self):
        super(MyriaLPlatformTestHarness, self).setUp()

        self.tables = {}
        for name in ['R', 'S', 'T', 'I', 'D', 'C']:
            for width in [1, 2, 3]:
                tablename = "%s%d" % (name, width)
                fullname = "public:adhoc:%s" % tablename
                self.tables[tablename] = fullname

                if name == 'D':
                    rest_type = types.DOUBLE_TYPE
                elif name == 'C':
                    rest_type = types.STRING_TYPE
                else:
                    rest_type = types.LONG_TYPE

                one = [("a", types.LONG_TYPE)]
                two = one + [("b", rest_type)]
                three = two + [("c", rest_type)]
                # ingest fake data; data is already generated separately for
                # now
                if width == 1:
                    self.db.ingest(fullname, Counter(), scheme.Scheme(one))
                elif width == 2:
                    self.db.ingest(fullname, Counter(), scheme.Scheme(two))
                else:
                    self.db.ingest(fullname, Counter(), scheme.Scheme(three))
Example #2
0
class PageRankTest(myrial_test.MyrialTestCase):

    edge_table = collections.Counter([(0, 3), (1, 0), (1, 2), (2, 0), (2, 1),
                                      (2, 3), (3, 0), (3, 1), (3, 2), (3, 4),
                                      (4, 0), (4, 1), (4, 2), (4, 3), (4, 4)])

    edge_schema = scheme.Scheme([("src", types.LONG_TYPE),
                                 ("dst", types.LONG_TYPE)])
    edge_key = "public:adhoc:edges"

    vertex_table = collections.Counter([(x, ) for x in range(5)])
    vertex_key = "public:adhoc:vertices"
    vertex_schema = scheme.Scheme([("id", types.LONG_TYPE)])

    def setUp(self):
        super(PageRankTest, self).setUp()

        self.db.ingest(PageRankTest.edge_key, PageRankTest.edge_table,
                       PageRankTest.edge_schema)

        self.db.ingest(PageRankTest.vertex_key, PageRankTest.vertex_table,
                       PageRankTest.vertex_schema)

    def __do_test(self, phile):
        with open(phile) as fh:
            query = fh.read()

        result = self.execute_query(query)
        d = dict(result.elements())

        self.assertAlmostEqual(d[0], 0.23576110832410296)
        self.assertAlmostEqual(d[1], 0.16544845649781043)
        self.assertAlmostEqual(d[2], 0.18370688939571236)
        self.assertAlmostEqual(d[3], 0.3016893082129546)
        self.assertAlmostEqual(d[4], 0.11339423756941983)

    def test_pagerank(self):
        self.__do_test('examples/pagerank.myl')

    def verify_undefined(self, var):
        with self.assertRaises(KeyError):
            self.db.get_temp_table(var)

    def test_pagerank_deadcode(self):
        """Test of page rank with numerous dead code statements."""
        self.__do_test('examples/pagerank_dead.myl')

        # Verify that D0, D1 tables are compiled out
        self.verify_undefined("D0")
        self.verify_undefined("D1")
Example #3
0
    def test_simple_scheme(self):
        sch = scheme.Scheme()
        sch.addAttribute('w', types.FLOAT_TYPE)
        sch.addAttribute('x', types.INT_TYPE)
        sch.addAttribute('y', types.LONG_TYPE)
        sch.addAttribute('z', types.STRING_TYPE)

        self.db.add_table('simple', sch)

        sch2 = self.db.get_scheme('simple')
        self.assertEquals(sch, sch2)
Example #4
0
class FakeData(object):
    emp_table = collections.Counter([
        # id dept_id name salary
        (1, 2, "Bill Howe", 25000),
        (2, 1, "Dan Halperin", 90000),
        (3, 1, "Andrew Whitaker", 5000),
        (4, 2, "Shumo Chu", 5000),
        (5, 1, "Victor Almeida", 25000),
        (6, 3, "Dan Suciu", 90000),
        (7, 1, "Magdalena Balazinska", 25000)
    ])

    emp_schema = scheme.Scheme([("id", types.INT_TYPE),
                                ("dept_id", types.INT_TYPE),
                                ("name", types.STRING_TYPE),
                                ("salary", types.LONG_TYPE)])

    emp_key = "public:adhoc:employee"

    dept_table = collections.Counter([(1, "accounting", 5),
                                      (2, "human resources", 2),
                                      (3, "engineering", 2), (4, "sales", 7)])

    dept_schema = scheme.Scheme([("id", types.LONG_TYPE),
                                 ("name", types.STRING_TYPE),
                                 ("manager", types.LONG_TYPE)])

    dept_key = "public:adhoc:department"

    numbers_table = collections.Counter([(1, 3), (2, 5), (3, -2), (16, -4.3)])

    numbers_schema = scheme.Scheme([("id", types.LONG_TYPE),
                                    ("val", types.DOUBLE_TYPE)])

    numbers_key = "public:adhoc:numbers"

    test_function = ("test", "function_text", 1,
                     "id (INT_TYPE), dept_id (INT_TYPE)", "INT_TYPE",
                     "test_body")
Example #5
0
 def get_scheme(self, rel_key):
     relation_args = {
         'userName': rel_key.user,
         'programName': rel_key.program,
         'relationName': rel_key.relation
     }
     if not self.connection:
         raise ValueError(
             "no schema for relation %s because no connection" % rel_key)
     try:
         dataset_info = self.connection.dataset(relation_args)
     except myria.MyriaError:
         raise ValueError(rel_key)
     schema = dataset_info['schema']
     return scheme.Scheme(zip(schema['columnNames'], schema['columnTypes']))
Example #6
0
class SQLTestCase(unittest.TestCase):
    """A base for testing the compilation of RACO programs to SQL queries"""

    emp_table = [
        # id dept_id name salary
        (0, 1, "Hank Levy", 1000000, -1),
        (1, 2, "Bill Howe", 25000, 0),
        (2, 1, "Dan Halperin", 90000, 0),
        (3, 1, "Andrew Whitaker", 5000, 0),
        (4, 2, "Shumo Chu", 5000, 0),
        (5, 1, "Victor Almeida", 25000, 0),
        (6, 3, "Dan Suciu", 90000, 0),
        (7, 1, "Magdalena Balazinska", 25000, 0)
    ]

    emp_schema = scheme.Scheme([("id", types.INT_TYPE),
                                ("dept_id", types.INT_TYPE),
                                ("name", types.STRING_TYPE),
                                ("salary", types.LONG_TYPE),
                                ("mgr_id", types.INT_TYPE)])

    emp_key = "public:adhoc:employee"

    def setUp(self):
        # SQLAlchemy
        self.db = SQLCatalog(
            sqlalchemy.create_engine('sqlite:///:memory:', echo=True))
        self.db.add_table(self.emp_key, self.emp_schema)
        self.db.add_tuples(self.emp_key, self.emp_schema, self.emp_table)
        # MyriaL
        self.parser = parser.Parser()
        self.processor = interpreter.StatementProcessor(self.db)

    def query_to_phys_plan(self, query, **kwargs):
        statements = self.parser.parse(query)
        self.processor.evaluate(statements)
        p = self.processor.get_logical_plan(**kwargs)
        p = optimize_by_rules(p, OptLogicalAlgebra.opt_rules())
        if isinstance(p, (algebra.Store, algebra.StoreTemp)):
            p = p.input
        return p

    def execute(self, query, expected, **kwargs):
        p = self.query_to_phys_plan(query, **kwargs)
        ans = self.db.evaluate(p)
        self.assertEquals(expected, Counter(ans))
Example #7
0
class KmeansTest(myrial_test.MyrialTestCase):
    points = [(1, 1.0, 1.0), (2, .99, .99), (3, 1.01, 1.01), (4, 10.0, 10.0),
              (5, 10.99, 10.99), (6, 10.01, 10.01), (7, 100.0, 100.0),
              (8, 100.99, 100.99), (9, 100.01, 100.01)]
    points_table = collections.Counter(points)

    points_schema = scheme.Scheme([('id', types.LONG_TYPE),
                                   ('x', types.DOUBLE_TYPE),
                                   ('y', types.DOUBLE_TYPE)])
    points_key = "public:adhoc:points"

    def setUp(self):
        super(KmeansTest, self).setUp()

        self.db.ingest(KmeansTest.points_key, KmeansTest.points_table,
                       KmeansTest.points_schema)

    def test_kmeans(self):
        with open('examples/kmeans.myl') as fh:
            query = fh.read()
        self.execute_query(query, skip_json=True)
class SigmaClippingTest(myrial_test.MyrialTestCase):
    points = [
        25.0, 27.2, 23.4, 25.1, 26.3, 24.9, 23.5, 22.7, 108.2, 26.2, 25.3,
        24.7, 25.01, 26.1, 22.8, 2.2, 24.8, 25.05, 25.15
    ]
    points_tuples = [(i, x) for i, x in enumerate(points)]
    points_table = collections.Counter(points_tuples)

    points_schema = scheme.Scheme([('id', types.LONG_TYPE),
                                   ('v', types.DOUBLE_TYPE)])  # noqa
    points_key = "public:adhoc:sc_points"

    def setUp(self):
        super(SigmaClippingTest, self).setUp()

        self.db.ingest(SigmaClippingTest.points_key,
                       SigmaClippingTest.points_table,
                       SigmaClippingTest.points_schema)

        # TODO: Better support for empty relations in the language
        self.db.ingest("empty", collections.Counter(),
                       SigmaClippingTest.points_schema)

    def run_it(self, query):
        points = [(i, x) for i, x in self.points_tuples if x < 28 and x > 22]
        expected = collections.Counter(points)
        self.check_result(query, expected, output='sc_points_clipped')

    def test_v0(self):
        with open('examples/sigma-clipping-v0.myl') as fh:
            query = fh.read()
        self.run_it(query)

    def test_v2(self):
        with open('examples/sigma-clipping.myl') as fh:
            query = fh.read()
        self.run_it(query)
Example #9
0
class ReachableTest(myrial_test.MyrialTestCase):

    edge_table = collections.Counter([
        (1, 2),
        (2, 3),
        (3, 4),
        (4, 3),
        (3, 5),
        (4, 13),
        (5, 4),
        (1, 9),
        (7, 1),
        (6, 1),
        (10, 11),
        (11, 12),
        (12, 10),
        (13, 4),
        (10, 1)])

    edge_schema = scheme.Scheme([("src", types.LONG_TYPE),
                                 ("dst", types.LONG_TYPE)])
    edge_key = "public:adhoc:edges"

    def setUp(self):
        super(ReachableTest, self).setUp()

        self.db.ingest(ReachableTest.edge_key,
                       ReachableTest.edge_table,
                       ReachableTest.edge_schema)

    def test_reachable(self):
        with open('examples/reachable.myl') as fh:
            query = fh.read()

        expected = collections.Counter([
            (1,),
            (2,),
            (3,),
            (4,),
            (5,),
            (9,),
            (13,),
        ])

        self.check_result(query, expected, skip_json=True)

    def test_multi_condition_join(self):
        query = """
        Edge = SCAN(public:adhoc:edges);
        Symmetric = [FROM Edge AS E1, Edge AS E2
                     WHERE E1.src==E2.dst
                       AND E2.src==E1.dst
                       AND E1.src < E1.dst
                     EMIT E1.src AS src, E1.dst AS dst];
        STORE(Symmetric, OUTPUT);
        """
        table = ReachableTest.edge_table
        expected = collections.Counter(
            [(a, b) for (a, b) in table for (c, d) in table
             if a == d and b == c and a < b])
        self.check_result(query, expected)

    def test_cross_plus_selection_becomes_join(self):
        """Test that the optimizer compiles away cross-products."""
        with open('examples/reachable.myl') as fh:
            query = fh.read()

        def plan_contains_cross(plan):
            def f(op):
                if isinstance(op, raco.algebra.CrossProduct) and not \
                   isinstance(op.left, raco.algebra.SingletonRelation):
                    yield True

            return any(plan.postorder(f))

        statements = self.parser.parse(query)
        self.processor.evaluate(statements)

        lp = self.processor.get_logical_plan()
        self.assertTrue(plan_contains_cross(lp))

        pp = self.processor.get_physical_plan()
        self.assertFalse(plan_contains_cross(pp))
Example #10
0
 def p_expression_load(p):
     'expression : LOAD LPAREN STRING_LITERAL COMMA file_parser_fun RPAREN'
     format, schema, options = p[5]
     p[0] = ('LOAD', p[3], format, scheme.Scheme(schema), options)
Example #11
0
 def p_expression_empty(p):
     'expression : EMPTY LPAREN column_def_list RPAREN'
     p[0] = ('EMPTY', scheme.Scheme(p[3]))
Example #12
0
class SetopTestFunctions(myrial_test.MyrialTestCase):

    emp_table1 = collections.Counter([
        (1, 2, "Bill Howe", 25000),
        (1, 2, "Bill Howe", 25000),
        (2, 1, "Dan Halperin", 90000),
        (3, 1, "Andrew Whitaker", 5000),
        (3, 1, "Andrew Whitaker", 5000),
        (4, 2, "Shumo Chu", 5000),
        (5, 1, "Victor Almeida", 25000),
        (6, 3, "Dan Suciu", 90000),
        (7, 1, "Magdalena Balazinska", 25000)])

    emp_key1 = "andrew:adhoc:employee1"

    emp_table2 = collections.Counter([
        (1, 2, "Bill Howe", 25000),
        (7, 1, "Magdalena Balazinska", 25000),
        (7, 1, "Magdalena Balazinska", 25000),
        (8, 2, "JingJing Wang", 47000)])

    emp_key2 = "andrew:adhoc:employee2"

    emp_schema = scheme.Scheme([("id", types.LONG_TYPE),
                                ("dept_id", types.LONG_TYPE),
                                ("name", types.STRING_TYPE),
                                ("salary", types.LONG_TYPE)])

    def setUp(self):
        super(SetopTestFunctions, self).setUp()

        self.db.ingest(SetopTestFunctions.emp_key1,
                       SetopTestFunctions.emp_table1,
                       SetopTestFunctions.emp_schema)

        self.db.ingest(SetopTestFunctions.emp_key2,
                       SetopTestFunctions.emp_table2,
                       SetopTestFunctions.emp_schema)

    def test_unionall(self):
        query = """
        out = SCAN(%s) + SCAN(%s);
        STORE(out, OUTPUT);
        """ % (self.emp_key1, self.emp_key2)

        expected = self.emp_table1 + self.emp_table2
        self.check_result(query, expected)

    def test_union_schema_mismatch(self):
        query = """
        T1 = [FROM SCAN(%s) AS X EMIT id, dept_id, name, salary, 7 as seven];
        out = UNION(T1, SCAN(%s));
        STORE(out, OUTPUT);
        """ % (self.emp_key1, self.emp_key2)

        with self.assertRaises(SchemaMismatchException):
            self.get_logical_plan(query)

    def test_unionall_inline(self):
        query = """
        out = SCAN(%s) + SCAN(%s);
        STORE(out, OUTPUT);
        """ % (self.emp_key1, self.emp_key2)

        expected = self.emp_table1 + self.emp_table2
        self.check_result(query, expected)

    def test_unionall_inline_ternary(self):
        query = """
        out = SCAN(%s) + [FROM SCAN(%s) AS X EMIT *] + SCAN(%s);
        STORE(out, OUTPUT);
        """ % (self.emp_key1, self.emp_key1, self.emp_key1)

        expected = self.emp_table1 + self.emp_table1 + self.emp_table1
        self.check_result(query, expected)

    def test_diff1(self):
        query = """
        out = DIFF(SCAN(%s), SCAN(%s));
        STORE(out, OUTPUT);
        """ % (self.emp_key1, self.emp_key2)

        expected = collections.Counter(
            set(self.emp_table1).difference(set(self.emp_table2)))
        self.check_result(query, expected)

    def test_diff2(self):
        query = """
        out = DIFF(SCAN(%s), SCAN(%s));
        STORE(out, OUTPUT);
        """ % (self.emp_key2, self.emp_key1)

        expected = collections.Counter(
            set(self.emp_table2).difference(set(self.emp_table1)))
        self.check_result(query, expected)

    def test_diff_schema_mismatch(self):
        query = """
        T1 = [FROM SCAN(%s) AS X EMIT id, dept_id, name];
        out = DIFF(SCAN(%s), T1);
        STORE(out, OUTPUT);
        """ % (self.emp_key1, self.emp_key2)

        with self.assertRaises(SchemaMismatchException):
            self.get_logical_plan(query)

    def test_diff_while_schema_mismatch(self):
        query = """
        Orig = [2 as x];
        T1 = [2 as x];
        do
          Bad = diff(T1, Orig);
          T1 = [3 as x, 3 as y];
        while [from Bad emit count(*) > 0];
        store(T1, OUTPUT);
        """

        with self.assertRaises(SchemaMismatchException):
            # TODO Even if executed, this test does not throw exception
            self.get_logical_plan(query)

    def test_diff_while_schema_mismatch2(self):
        query = """
        Orig = [2 as x];
        T1 = [3 as x];
        do
          Bad = diff(T1, Orig);
          T1 = [3 as x, 3 as y];
        while [from Bad emit count(*) > 0];
        store(T1, OUTPUT);
        """

        with self.assertRaises(SchemaMismatchException):
            # TODO If executed, this test loops infinitely
            self.get_logical_plan(query)

    def test_intersect1(self):
        query = """
        out = INTERSECT(SCAN(%s), SCAN(%s));
        STORE(out, OUTPUT);
        """ % (self.emp_key1, self.emp_key2)

        expected = collections.Counter(
            set(self.emp_table2).intersection(set(self.emp_table1)))
        self.check_result(query, expected, skip_json=True)

    def test_intersect2(self):
        query = """
        out = INTERSECT(SCAN(%s), SCAN(%s));
        STORE(out, OUTPUT);
        """ % (self.emp_key2, self.emp_key1)

        expected = collections.Counter(
            set(self.emp_table1).intersection(set(self.emp_table2)))
        self.check_result(query, expected, skip_json=True)

    def test_intersect_schema_mismatch(self):
        query = """
        T1 = [FROM SCAN(%s) AS X EMIT id, dept_id, name];
        out = INTERSECT(T1, SCAN(%s));
        STORE(out, OUTPUT);
        """ % (self.emp_key1, self.emp_key2)

        with self.assertRaises(SchemaMismatchException):
            self.get_logical_plan(query)
Example #13
0
class TestQueryFunctions(datalog_test.DatalogTestCase):
    emp_table = collections.Counter([
        # id dept_id name salary
        (1, 2, "Bill Howe", 25000),
        (2, 1, "Dan Halperin", 90000),
        (3, 1, "Andrew Whitaker", 5000),
        (4, 2, "Shumo Chu", 5000),
        (5, 1, "Victor Almeida", 25000),
        (6, 3, "Dan Suciu", 90000),
        (7, 1, "Magdalena Balazinska", 25000)
    ])

    emp_schema = scheme.Scheme([("id", types.LONG_TYPE),
                                ("dept_id", types.LONG_TYPE),
                                ("name", types.STRING_TYPE),
                                ("salary", types.LONG_TYPE)])

    emp_key = "employee"

    dept_table = collections.Counter([(1, "accounting", 5),
                                      (2, "human resources", 2),
                                      (3, "engineering", 2), (4, "sales", 7)])

    dept_schema = scheme.Scheme([("id", types.LONG_TYPE),
                                 ("name", types.STRING_TYPE),
                                 ("manager", types.LONG_TYPE)])

    dept_key = "department"

    edge_table = collections.Counter([(1, 2), (2, 3), (3, 4), (4, 3), (3, 5),
                                      (4, 13), (5, 4), (1, 9), (7, 1), (6, 1),
                                      (10, 11), (11, 12), (12, 10), (13, 4),
                                      (10, 1)])

    edge_schema = scheme.Scheme([("src", types.LONG_TYPE),
                                 ("dst", types.LONG_TYPE)])
    edge_key = "Edge"

    def setUp(self):
        super(TestQueryFunctions, self).setUp()

        self.db.ingest(TestQueryFunctions.emp_key,
                       TestQueryFunctions.emp_table,
                       TestQueryFunctions.emp_schema)

        self.db.ingest(TestQueryFunctions.dept_key,
                       TestQueryFunctions.dept_table,
                       TestQueryFunctions.dept_schema)

        self.db.ingest(TestQueryFunctions.edge_key,
                       TestQueryFunctions.edge_table,
                       TestQueryFunctions.edge_schema)

    def test_simple_join(self):
        expected = collections.Counter([(e[2], d[1])
                                        for e in self.emp_table.elements()
                                        for d in self.dept_table.elements()
                                        if e[1] == d[0]])

        query = """
        EmpDepts(emp_name, dept_name) :- employee(a, dept_id, emp_name, b),
                department(dept_id, dept_name, c)
        """

        self.check_result(query, expected, output='EmpDepts')

    def test_filter(self):
        query = """
        RichGuys(name) :- employee(a, b, name, salary), salary > 25000
        """

        expected = collections.Counter([
            (x[2], ) for x in TestQueryFunctions.emp_table.elements()
            if x[3] > 25000
        ])
        self.check_result(query, expected, output='RichGuys')

    def test_count(self):
        query = """
        OutDegree(src, count(dst)) :- Edge(src, dst)
        """

        counter = collections.Counter()
        for (src, _) in self.edge_table.elements():
            counter[src] += 1

        ex = [(src, cnt) for src, cnt in counter.iteritems()]
        expected = collections.Counter(ex)
        self.check_result(query, expected, output='OutDegree')

    def test_sum_reorder(self):
        query = """
        SalaryByDept(sum(salary), dept_id) :- employee(id, dept_id, name, salary);"""  # noqa
        results = collections.Counter()
        for emp in self.emp_table.elements():
            results[emp[1]] += emp[3]
        expected = collections.Counter([(y, x) for x, y in results.iteritems()
                                        ])  # noqa
        self.check_result(query, expected, output='SalaryByDept')

    def test_aggregate_no_groups(self):
        query = """
        Total(count(x)) :- Edge(x, y)
        """
        expected = collections.Counter([(len(self.edge_table), )])
        self.check_result(query, expected, output='Total')

    def test_multiway_join_chained(self):
        query = """
        OneHop(x) :- Edge(1, x);
        TwoHop(x) :- OneHop(y), Edge(y, x);
        ThreeHop(x) :- TwoHop(y), Edge(y, x)
        """

        expected = collections.Counter([(4, ), (5, )])
        self.check_result(query, expected, output='ThreeHop')

    def test_triangles(self):
        # TODO. Right now we have do this separately so that the x<y and y<z
        # conditions are not put in the Join, rather rendered as Selects.
        # Myrialang barfs on theta-joins.
        query = """
        T(x,y,z) :- Edge(x,y), Edge(y,z), Edge(z,x);
        A(x,y,z) :- T(x,y,z), x < y, x < z.
        """

        expected = collections.Counter([(3, 5, 4), (10, 11, 12)])
        self.check_result(query, expected, output='A')

    def test_multiway_join(self):
        query = """
        ThreeHop(z) :- Edge(1, x), Edge(x,y), Edge(y, z);
        """
        expected = collections.Counter([(4, ), (5, )])
        self.check_result(query, expected, output='ThreeHop')

    def test_multiway_join_hyper_cube(self):
        query = """
        ThreeHop(z) :- Edge(1, x), Edge(x,y), Edge(y, z);
        """
        expected = collections.Counter([(4, ), (5, )])
        self.check_result(query,
                          expected,
                          output='ThreeHop',
                          algebra=MyriaHyperCubeAlgebra)

    def test_union(self):
        query = """
        OUTPUT(b) :- {emp}(a, b, c, d)
        OUTPUT(b) :- {edge}(b, a)
        """.format(emp=self.emp_key, edge=self.edge_key)
        expected = collections.Counter([(b, )
                                        for (a, b, c, d) in self.emp_table] +
                                       [(b, ) for (b, a) in self.edge_table])
        self.check_result(query, expected, test_logical=True)

    def test_filter_expression(self):
        query = """
        OUTPUT(a, b, c) :- {emp}(a, b, c, d), d >= 25000, d < 91000
        """.format(emp=self.emp_key)
        expected = collections.Counter([(a, b, c)
                                        for (a, b, c, d) in self.emp_table
                                        if (d >= 25000 and d < 91000)])
        self.check_result(query, expected)

    def test_attributes_forward(self):
        """test that attributes are correct amid multiple conditions"""
        query = """
        OUTPUT(a) :- {edge}(a, b), {emp}(c, a, x, y), b=c
        """.format(emp=self.emp_key, edge=self.edge_key)
        expected = collections.Counter([(a, ) for (a, b) in self.edge_table
                                        for (c, a2, x, y) in self.emp_table
                                        if (a == a2 and b == c)])
        self.check_result(query, expected)

    def test_attributes_reverse(self):
        """test that attributes are correct amid multiple conditions and when
        the order of variables in the terms is the opposite of the explicit
        condition"""
        query = """
        OUTPUT(a) :- {edge}(a, b), {emp}(c, a, x, y), c=b
        """.format(emp=self.emp_key, edge=self.edge_key)
        expected = collections.Counter([(a, ) for (a, b) in self.edge_table
                                        for (c, a2, x, y) in self.emp_table
                                        if (a == a2 and b == c)])
        self.check_result(query, expected)

    def test_apply_head(self):
        query = """
        OUTPUT(a/b) :- {emp}(a, b, c, d)
        """.format(emp=self.emp_key)
        expected = collections.Counter([(a * 1.0 / b, )
                                        for (a, b, _, _) in self.emp_table])
        self.check_result(query, expected)

    def test_aggregate_head(self):
        query = """
        OUTPUT(SUM(a)) :- {emp}(a, b, c, d)
        """.format(emp=self.emp_key)
        expected = collections.Counter([
            (sum(a for (a, _, _, _) in self.emp_table), )
        ])
        self.check_result(query, expected)

    def test_twoaggregate_head(self):
        query = """
        OUTPUT(SUM(a), COUNT(b)) :- {emp}(a, b, c, d)
        """.format(emp=self.emp_key)
        expected = collections.Counter([
            (sum(a for (a, _, _, _) in self.emp_table),
             sum(1 for (_, b, _, _) in self.emp_table))
        ])
        self.check_result(query, expected)

    def test_aggregate_head_group_self(self):
        query = """
        OUTPUT(SUM(a), b) :- {emp}(a, b, c, d)
        """.format(emp=self.emp_key)
        B = set(b for (_, b, _, _) in self.emp_table)
        expected = collections.Counter([(sum(a
                                             for (a, b, _, _) in self.emp_table
                                             if b == b2), b2) for b2 in B])
        self.check_result(query, expected)

    def test_aggregate_head_group_swap(self):
        query = """
        OUTPUT(b,SUM(a)) :- {emp}(a, b, c, d)
        """.format(emp=self.emp_key)
        B = set(b for (_, b, _, _) in self.emp_table)
        expected = collections.Counter([(b2,
                                         sum(a
                                             for (a, b, _, _) in self.emp_table
                                             if b == b2)) for b2 in B])
        self.check_result(query, expected)

    def test_binop_aggregates(self):
        query = """
        OUTPUT(SUM(b)+SUM(a)) :- {emp}(a, b, c, d)
        """.format(emp=self.emp_key)
        expected = collections.Counter([
            (sum(b for (a, b, _, _) in self.emp_table) +
             sum(a for (a, b, _, _) in self.emp_table), )
        ])
        self.check_result(query, expected)

    def test_aggregate_of_binop(self):
        query = """
        OUTPUT(SUM(b+a)) :- {emp}(a, b, c, d)
        """.format(emp=self.emp_key)
        expected = collections.Counter([
            (sum([(a + b) for (a, b, c, d) in self.emp_table]), )
        ])
        self.check_result(query, expected)

    def test_literal_expr(self):
        query = """
        OUTPUT(z+1) :- Edge(z, y)
        """
        expected = collections.Counter([(z + 1, )
                                        for (z, _) in self.edge_table])
        self.check_result(query, expected)
Example #14
0
class CFGTest(myrial_test.MyrialTestCase):
    points_table = collections.Counter()
    points_schema = scheme.Scheme([('id', types.LONG_TYPE),
                                   ('x', types.DOUBLE_TYPE),
                                   ('y', types.DOUBLE_TYPE)])
    points_key = "public:adhoc:points"

    def setUp(self):
        super(CFGTest, self).setUp()

        self.db.ingest(CFGTest.points_key, CFGTest.points_table,
                       CFGTest.points_schema)

    def test_cfg(self):
        query = """
        Point = SCAN(public:adhoc:points);

        DO
          Big = [FROM Point WHERE x * y > 100 EMIT *];
          Continue = [FROM Big, Point EMIT COUNT(*) > 0 AS cnt];
        WHILE Continue;

        STORE(Big, OUTPUT);
        """

        statements = self.parser.parse(query)
        self.processor.evaluate(statements)

        expected = nx.DiGraph()
        expected.add_node(0, def_var="Point", uses=set())
        expected.add_node(1, def_var="Big", uses={"Point"})
        expected.add_node(2, def_var="Continue", uses={"Big", "Point"})
        expected.add_node(3, def_var=None, uses={"Continue"})
        expected.add_node(4, def_var=None, uses={"Big"})

        for i in range(4):
            expected.add_edge(i, i + 1)
        expected.add_edge(3, 1)

        actual = self.processor.cfg.graph

        self.assertEquals(actual.adj, expected.adj)
        self.assertEquals(len(actual), len(expected))

        for n in expected:
            self.assertIn(n, actual)
            self.assertEquals(actual.node[n]['uses'], expected.node[n]['uses'])
            self.assertEquals(actual.node[n]['def_var'],
                              expected.node[n]['def_var'])

        live_in, live_out = self.processor.cfg.compute_liveness()

        self.assertEquals(
            live_out, {
                0: {'Point'},
                1: {'Point', 'Big'},
                2: {'Continue', 'Big', 'Point'},
                3: {'Big', 'Point'},
                4: set()
            })

        self.assertEquals(
            live_in, {
                0: set(),
                1: {'Point'},
                2: {'Big', 'Point'},
                3: {'Big', 'Point', 'Continue'},
                4: {'Big'}
            })

    def test_dead_code_elim(self):
        with open('examples/deadcode.myl') as fh:
            query = fh.read()

        statements = self.parser.parse(query)
        self.processor.evaluate(statements)
        self.assertEquals(set(self.processor.cfg.graph.nodes()), set(range(9)))

        self.processor.cfg.dead_code_elimination()
        self.assertEquals(set(self.processor.cfg.graph.nodes()), {2, 6, 7, 8})

    def test_bug_245_dead_loop_elim_do_while(self):
        with open('examples/deadcode2.myl') as fh:
            query = fh.read()

        statements = self.parser.parse(query)
        self.processor.evaluate(statements)
        self.assertEquals(set(self.processor.cfg.graph.nodes()), set(range(3)))

        self.processor.cfg.dead_loop_elimination()
        self.processor.cfg.dead_code_elimination()
        self.assertEquals(set(self.processor.cfg.graph.nodes()), set())

    def test_dead_loop_interior(self):
        """Test of a dead loop before the end of the program."""
        query = """
        x = [0 as val, 1 as exp];
        y = x;

        do
            x = [from x emit val+1 as val, 2*exp as exp];
        while [from x emit val < 5];
        store(y, OUTPUT);
        """

        statements = self.parser.parse(query)
        self.processor.evaluate(statements)
        self.assertEquals(set(self.processor.cfg.graph.nodes()), set(range(5)))

        self.processor.cfg.dead_loop_elimination()
        self.processor.cfg.dead_code_elimination()

        self.assertEquals(set(self.processor.cfg.graph.nodes()), {0, 1, 4})

    def test_two_dead_loops(self):
        """Test of two unrelated dead loops."""
        query = """
        x = [0 as val, 1 as exp];
        y = x;
        z = y;

        do
            x = [from x emit val+1 as val, 2*exp as exp];
        while [from x emit val < 5];
        do
            z = [from z emit val+1 as val, 2*exp as exp];
        while [from z emit val < 5];
        store(y, OUTPUT);
        """

        statements = self.parser.parse(query)
        self.processor.evaluate(statements)
        self.assertEquals(set(self.processor.cfg.graph.nodes()), set(range(8)))

        self.processor.cfg.dead_loop_elimination()
        self.processor.cfg.dead_code_elimination()
        self.assertEquals(set(self.processor.cfg.graph.nodes()), {0, 1, 7})

    def test_two_dead_loops_samevar(self):
        """Test that recursive calls to dead_loop_elimination remove
        repeated dead loops reading/writing the same variable."""
        query = """
        x = [0 as val, 1 as exp];
        y = x;

        do
            x = [from x emit val+1 as val, 2*exp as exp];
        while [from x emit val < 5];

        do
            x = [from x emit val+1 as val, 2*exp as exp];
        while [from x emit val < 5];

        store(y, OUTPUT);
        """

        statements = self.parser.parse(query)
        self.processor.evaluate(statements)
        self.assertEquals(set(self.processor.cfg.graph.nodes()), set(range(7)))

        self.processor.cfg.dead_loop_elimination()
        self.processor.cfg.dead_code_elimination()
        self.assertEquals(set(self.processor.cfg.graph.nodes()), {0, 1, 6})

    def test_chaining(self):
        query = """
        A = SCAN(public:adhoc:points);
        B = SCAN(public:adhoc:points);
        C = UNIONALL(A, B);
        D = DISTINCT(C);
        E = SCAN(public:adhoc:points);
        F = DIFF(E, D);
        G = DISTINCT(F);
        STORE(G, OUTPUT);
        """

        statements = self.parser.parse(query)
        self.processor.evaluate(statements)
        self.assertEquals(len(self.processor.cfg.graph), 8)

        self.processor.cfg.apply_chaining()
        self.assertEquals(len(self.processor.cfg.graph), 1)

    def test_chaining_variable_reuse(self):
        """Test of chaining with re-used variable names."""
        query = """
        X = SCAN(public:adhoc:points);
        Y = SCAN(public:adhoc:points);
        X = [FROM X, Y WHERE X.x == Y.y EMIT Y.*];
        X = DISTINCT(X);
        STORE(X, OUTPUT);
        """
        statements = self.parser.parse(query)
        self.processor.evaluate(statements)
        self.assertEquals(len(self.processor.cfg.graph), 5)

        self.processor.cfg.apply_chaining()
        self.assertEquals(self.processor.cfg.graph.nodes(), [4])
        self.assertEquals(len(self.processor.cfg.graph.node[4]['uses']), 0)

    def test_chaining_dead_code_elim(self):
        query = """
        Q = DISTINCT(SCAN(public:adhoc:points));
        DO
            A = SCAN(public:adhoc:points);
            B = SCAN(public:adhoc:points);
            P = DISTINCT(A);
            C = DIFF(A, B);
            Continue = COUNTALL(C);
        WHILE Continue;
        STORE(C, OUTPUT);
        X = SCAN(public:adhoc:points);
        """

        statements = self.parser.parse(query)
        self.processor.evaluate(statements)
        self.assertEquals(len(self.processor.cfg.graph), 9)

        self.processor.cfg.dead_code_elimination()
        self.assertEquals(set(self.processor.cfg.graph.nodes()),
                          {1, 2, 4, 5, 6, 7})

        self.processor.cfg.apply_chaining()
        self.assertEquals(set(self.processor.cfg.graph.nodes()), {4, 6, 7})
Example #15
0
 def get_scheme(self, rel_key):
     table = self.metadata.tables[str(rel_key)]
     return scheme.Scheme((c.name, type_to_raco[type(c.type)])
                          for c in table.columns)