def _check(self, input, expected): dd = {} dd["Person"] = {"name": "string", "age": "integer", "gender": "string"} dd["Eats"] = {"name": "string", "pizza": "string"} dd["Serves"] = { "pizzeria": "string", "pizza": "string", "price": "integer" } ra0 = radb.parse.one_statement_from_string(input) ra1 = raopt.rule_break_up_selections(ra0) ra2 = raopt.rule_push_down_selections(ra1, dd) ra3 = raopt.rule_merge_selections(ra2) ra4 = raopt.rule_introduce_joins(ra3) computed_expr = ra4 expected_expr = radb.parse.one_statement_from_string(expected) self.assertEqual(str(computed_expr), str(expected_expr))
def _evaluate(self, sqlstring): dd = {} dd["Person"] = {"name": "string", "age": "integer", "gender": "string"} dd["Eats"] = {"name": "string", "pizza": "string"} dd["Serves"] = {"pizzeria": "string", "pizza": "string", "price": "integer"} stmt = sqlparse.parse(sqlstring)[0] ra0 = sql2ra.translate(stmt) ra1 = raopt.rule_break_up_selections(ra0) ra2 = raopt.rule_push_down_selections(ra1, dd) ra3 = raopt.rule_merge_selections(ra2) ra4 = raopt.rule_introduce_joins(ra3) task = ra2mr.task_factory(ra4, env=ra2mr.ExecEnv.MOCK) luigi.build([task], local_scheduler=True) f = task.output().open('r') lines = [] for line in f: lines.append(line) return lines
def _check(self, input, expected): computed_expr = raopt.rule_break_up_selections( radb.parse.one_statement_from_string(input)) expected_expr = radb.parse.one_statement_from_string(expected) self.assertEqual(str(computed_expr), str(expected_expr))
def _evaluate(self, sqlstring): dd = {} dd["Person"] = {"name": "string", "age": "integer", "gender": "string"} dd["Eats"] = {"name": "string", "pizza": "string"} dd["Serves"] = { "pizzeria": "string", "pizza": "string", "price": "integer" } stmt = sqlparse.parse(sqlstring)[0] ra0 = sql2ra.translate(stmt) print(">>>>> OUTPUT MST1: " + ra0) ra1 = raopt.rule_break_up_selections(ra0) print(">>>>> OUTPUT RULE 1: " + ra1) ra2 = raopt.rule_push_down_selections(ra1, dd) print(">>>>> OUTPUT RULE 2: " + ra2) ra3 = raopt.rule_merge_selections(ra2) print(">>>>> OUTPUT RULE 3: " + ra3) ra4 = raopt.rule_introduce_joins(ra3) print(">>>>> OUTPUT RULE 3: " + ra4) def test_select_person(self): sqlstring = "select distinct * from Person" computed = self._evaluate(sqlstring) self.assertEqual(len(computed), 9) def test_select_person_age_16(self): sqlstring = "select distinct * from Person where age = 16" computed = self._evaluate(sqlstring) self.assertEqual(len(computed), 1) def test_select_person_age_gender(self): sqlstring = "select distinct * from Person where gender='female' and age=16" computed = self._evaluate(sqlstring) self.assertEqual(len(computed), 1) def test_project_name(self): sqlstring = "select distinct name from Person" computed = self._evaluate(sqlstring) self.assertEqual(len(computed), 9) def test_project_name_age(self): sqlstring = "select distinct name, age from Person" computed = self._evaluate(sqlstring) self.assertEqual(len(computed), 9) self.assertIn({ "Person.name": "Amy", "Person.age": 16 }, [json.loads(tuple.split('\t')[1]) for tuple in computed]) def test_person_join_eats(self): sqlstring = "select distinct * from Person, Eats where Person.name = Eats.name" computed = self._evaluate(sqlstring) self.assertEqual(len(computed), 20) def test_project_person_join_eats_join_serves(self): sqlstring = "select distinct Person.name, Serves.pizza from Person, Eats, Serves " \ "where Person.name = Eats.name and Eats.pizza = Serves.pizza" computed = self._evaluate(sqlstring) self.assertEqual(len(computed), 20) def test_project_name_of_person(self): sqlstring = "select distinct X.name from Person X" computed = self._evaluate(sqlstring) self.assertEqual(len(computed), 9) self.assertIn( {"X.name": "Amy"}, [json.loads(tuple.split('\t')[1]) for tuple in computed]) def test_mushroom_lovers(self): sqlstring = "select distinct Person.name, Serves.pizzeria from Person, Eats, Serves " \ "where Person.name = Eats.name and Eats.pizza = Serves.pizza " \ "and Eats.pizza = 'mushroom'" computed = self._evaluate(sqlstring) self.assertEqual(len(computed), 8) # NEW as of 19-MAY-2018 def test_person_join_eats_join_serves_where(self): sqlstring = "select distinct * from Person, Eats, Serves " \ "where Person.name = Eats.name and Eats.pizza = Serves.pizza " \ "and Person.age = 16 and Serves.pizzeria = 'Little Ceasars'" computed = self._evaluate(sqlstring) self.assertEqual(len(computed), 2)
def eval(sf, env, query, optimize): dd = {} dd["PART"] = { "P_PARTKEY": "int", "P_NAME": "string", "P_MFGR": "string", "P_BRAND": "string", "P_TYPE": "string", "P_SIZE": "int", "P_CONTAINER": "string", "P_RETAILPRICE": "float", "P_COMMENT": "STRING" } dd["CUSTOMER"] = { "C_CUSTKEY": "int", "C_NAME": "string", "C_ADDRESS": "string", "C_NATIONKEY": "int", "C_PHONE": "string", "C_ACCTBAL": "float", "C_MKTSEGMENT": "string", "C_COMMENT": "string" } dd["REGION"] = { "R_REGIONKEY": "int", "R_NAME": "string", "R_COMMENT": "string" } dd["ORDERS"] = { "O_ORDERKEY": "int", "O_CUSTKEY": "int", "O_ORDERSTATUS": "string", "O_TOTALPRICE": "float", "O_ORDERDATE": "string", "O_ORDERPRIORITY": "string", "O_CLERK": "string", "O_SHIPPRIORITY": "int", "O_COMMENT": "string" } dd["LINEITEM"] = { "L_ORDERKEY": "int", "L_PARTKEY": "int", "L_SUPPKEY": "int", "L_LINENUMBER": "int", "L_QUANTITY": "int", "L_EXTENDEDPRICE": "float", "L_DISCOUNT": "float", "L_TAX": "float", "L_RETURNFLAG": "string", "L_LINESTATUS": "string", "L_SHIPDATE": "string", "L_COMMITDATE": "string", "L_RECEIPTDATE": "string", "L_SHIPINSTRUCT": "string", "L_SHIPMODE": "string", "L_COMMENT": "string" } dd["NATION"] = { "N_NATIONKEY": "int", "N_NAME": "string", "N_REGIONKEY": "int", "N_COMMENT": "string" } dd["SUPPLIER"] = { "S_SUPPKEY": "int", "S_NAME": "string", "S_ADDRESS": "string", "S_NATIONKEY": "int", "S_PHONE": "string", "S_ACCTBAL": "float", "S_COMMENT": "string" } dd["PARTSUPP"] = { "PS_PARTKEY": "int", "PS_SUPPKEY": "int", "PS_AVAILQTY": "int", "PS_SUPPLYCOST": "float", "PS_COMMENT": "string" } ''' ...................... you may edit code below ........................''' stmt = sqlparse.parse(query)[0] ra0 = sql2ra.translate(stmt, optimize=optimize) ra1 = raopt.rule_break_up_selections(ra0) ra2 = raopt.rule_push_down_selections(ra1, dd) ra3 = raopt.rule_merge_selections(ra2) ra4 = raopt.rule_introduce_joins(ra3) task = ra2mr.task_factory(ra4, env=env, optimize=optimize, after_query=ra4) luigi.build([task], local_scheduler=True) return task.output().open('r') ''' ...................... you may edit code above ........................'''