def test_schema_append_to_file(self): file = "{p}/test_write_catalog.py".format(p=test_file_path) if os.path.isfile(file): os.remove(file) rel_to_add1 = 'public:adhoc:test1' rel_to_add2 = 'public:adhoc:test2' FromFileCatalog.scheme_write_to_file( file, rel_to_add1, "{'columnNames': ['grpID1'], 'columnTypes': ['LONG_TYPE']}") FromFileCatalog.scheme_write_to_file( file, rel_to_add2, "{'columnNames': ['grpID2'], 'columnTypes': ['STRING_TYPE']}") cut = FromFileCatalog.load_from_file( "{p}/test_write_catalog.py".format(p=test_file_path)) self.assertEqual(cut.get_scheme(rel_to_add1).get_names(), ['grpID1']) self.assertEqual(cut.get_scheme(rel_to_add1).get_types(), ['LONG_TYPE']) self.assertEqual(cut.get_scheme(rel_to_add2).get_names(), ['grpID2']) self.assertEqual(cut.get_scheme(rel_to_add2).get_types(), ['STRING_TYPE'])
def main(args): opt = parse_options(args) osutils.mkdir_p("logs") abspath = os.path.abspath("logs") name = opt.file if opt.query: if opt.catalog is None: raise Exception("--query also requires a --catalog") with open(opt.query, 'r') as f: qt = f.read() target_alg = CCAlgebra(emit_print=EMIT_FILE) if opt.platform == 'grappa': target_alg = GrappaAlgebra(emit_print=EMIT_FILE) ClangProcessor(FromFileCatalog.load_from_file(opt.catalog))\ .write_source_code(qt, name, target_alg=target_alg) if opt.platform == 'grappa': runner = GrappalangRunner() runner.run(name, abspath) elif opt.platform == 'cpp': try: runner = ClangRunner() runner.run(name, abspath) except subprocess.CalledProcessError as e: print 'cpp runner for %s failed' % (name) print e.output raise
def main(args): opt = parse_options(args) osutils.mkdir_p("logs") abspath = os.path.abspath("logs") name = opt.file if opt.query: if opt.catalog is None: raise Exception("--query also requires a --catalog") with open(opt.query, 'r') as f: qt = f.read() target_alg = CCAlgebra(emit_print=EMIT_FILE) if opt.platform == 'grappa': target_alg = GrappaAlgebra(emit_print=EMIT_FILE) ClangProcessor(FromFileCatalog.load_from_file(opt.catalog))\ .write_source_code(qt, name, target_alg=target_alg) if opt.platform == 'grappa': runner = GrappalangRunner() runner.run(name, abspath) elif opt.platform == 'clang': try: runner = ClangRunner() runner.run(name, abspath) except subprocess.CalledProcessError as e: print 'clang runner for %s failed' % (name) print e.output raise
def generate_tuple_class_from_file(name, catpath): cat = FromFileCatalog.load_from_file(catpath) if name is not None: rel_key = "public:adhoc:{0}".format(name) return cat, rel_key, generate_tuple_class(rel_key, cat) else: return cat, [(n, generate_tuple_class(n, cat)) for n in cat.get_keys()]
def test_schema_to_file_no_append(self): file = "{p}/test_write_catalog.py".format(p=test_file_path) if os.path.isfile(file): os.remove(file) rel_to_add = 'public:adhoc:test' FromFileCatalog.scheme_write_to_file( "{p}/test_write_catalog.py".format(p=test_file_path), rel_to_add, "{'columnNames': ['grpID'], 'columnTypes': ['LONG_TYPE']}") with self.assertRaises(IOError): FromFileCatalog.scheme_write_to_file( "{p}/test_write_catalog.py".format(p=test_file_path), rel_to_add, "{'columnNames': ['grpID'], 'columnTypes': ['LONG_TYPE']}", append=False)
def test_set_cardinality_relation(self): cut = FromFileCatalog.load_from_file( "{p}/set_cardinality_relation.py".format(p=test_file_path)) self.assertEqual(cut.get_scheme('C').get_names(), ['a', 'b', 'c']) self.assertEqual(cut.num_tuples('B'), DEFAULT_CARDINALITY) self.assertEqual(cut.num_tuples('C'), 12)
def test_missing_relation(self): cut = FromFileCatalog.load_from_file( "{p}/set_cardinality_relation.py".format(p=test_file_path)) with self.assertRaises(Exception): cut.num_tuples('D') with self.assertRaises(Exception): cut.get_scheme('D')
def test_default_cardinality_relation(self): cut = FromFileCatalog.load_from_file( "{p}/default_cardinality_relation.py".format(p=test_file_path)) self.assertEqual(cut.get_scheme('B').get_names(), ['x', 'y', 'z']) self.assertEqual(cut.get_scheme('A').get_types(), ['DOUBLE_TYPE', 'STRING_TYPE']) self.assertEqual(cut.num_tuples('A'), DEFAULT_CARDINALITY) self.assertEqual(cut.num_tuples('B'), DEFAULT_CARDINALITY)
subprocess.check_call('make {0}'.format(convert_exe_name), shell=True) task_message("running binary converter") convert_stdout = subprocess.check_output('./{exe} {file} "{delim}" {burns} {id}'.format(exe=convert_exe_name, file=datafile, delim=args.delim, burns=0, id=False), shell=True) num_tuples = re.search("rows: (\d+)", convert_stdout).group(1) add_data_file(datafile+'.bin') elif args.storage in ['row_ascii', 'row_json']: cat = FromFileCatalog.load_from_file(catalogfile) rel_key = cat.get_keys()[0] if args.splits: num_tuples = subprocess.check_output("wc -l {0}/part-* " "| tail -n 1 " "| awk '{{print $1}}'".format(inputf) , shell=True) else: num_tuples = subprocess.check_output("wc -l {0} | awk '{{print $1}}'".format(inputf), shell=True) add_data_file(datafile) else: raise Exception("Invalid storage format {0}".format(args.storage))
}} """ def generate_tuple_class(rel_key, cat): sch = cat.get_scheme(rel_key) tupleref = StagedTupleRef(None, sch) definition = tupleref.generateDefinition() outfnbase = rel_key.split(':')[2] with open("{0}_convert.cpp".format(outfnbase), 'w') as outf: outf.write(template.format(definition=definition, typ=tupleref.getTupleTypename())) subprocess.check_output(["make", "{fn}.convert".format(fn=outfnbase)]) if __name__ == "__main__": p = argparse.ArgumentParser(prog=sys.argv[0]) p.add_argument("-n", dest="name", help="name of relation [optional]. If not specified then will convert whole catalog") p.add_argument("-c", dest="catpath", help="path of catalog file, see FromFileCatalog for format", required=True) args = p.parse_args(sys.argv[1:]) cat = FromFileCatalog.load_from_file(args.catpath) if args.name is not None: generate_tuple_class("public:adhoc:{0}".format(args.name), cat) else: for n in cat.get_keys(): generate_tuple_class(n, cat)
#--nodes current_schema = (MyriaRelation(relation=nodes_table, connection=connection).schema.to_dict()) columnNames = [x.encode('utf-8') for x in current_schema['columnNames']] columnTypes = [x.encode('utf-8') for x in current_schema['columnTypes']] columns = zip(columnNames, columnTypes) f.write("'" + nodes_table + "' : " + str(columns) + ',\n'); #--edges current_schema = (MyriaRelation(relation=edges_table, connection=connection).schema.to_dict()) columnNames = [x.encode('utf-8') for x in current_schema['columnNames']] columnTypes = [x.encode('utf-8') for x in current_schema['columnTypes']] columns = zip(columnNames, columnTypes) f.write("'" + edges_table + "' : " + str(columns) + ',\n'); f.write("}" + '\n'); f.close() catalog = FromFileCatalog.load_from_file("schema.py") _parser = parser.Parser() #Run the first query current_query = "T1 = scan("+ edges_table + "); store(T1," + table_prefix + "edgesConnectedSplitSort);" statement_list = _parser.parse(current_query); processor = interpreter.StatementProcessor(catalog, True) processor.evaluate(statement_list) p = processor.get_logical_plan() tail = p.args[0].input p.args[0].input = alg.Shuffle(tail, [UnnamedAttributeRef(0)]) p.args[0].input = alg.OrderBy(p.args[0].input, [0,1,3,4], [True, True, True, False]) p = processor.get_physical_plan() finalplan = processor.get_json()
from raco.catalog import FromFileCatalog import raco.myrial.parser as parser import raco.myrial.interpreter as interpreter import raco.algebra as alg from raco.expression.expression import UnnamedAttributeRef catalog = FromFileCatalog.load_from_file("vulcan.py") _parser = parser.Parser() #myrial statements not yet algebra statement_list = _parser.parse("T1 = scan(public:vulcan:edgesConnected);store(T1, public:vulcan:edgesConnectedSort);") processor = interpreter.StatementProcessor(catalog, True) #goes through statement list and gets the logical plan (processor has it) processor.evaluate(statement_list) p = processor.get_logical_plan() tail = p.args[0].input p.args[0].input = alg.Shuffle(tail, [UnnamedAttributeRef(0), UnnamedAttributeRef(1), UnnamedAttributeRef(3)]) p = processor.get_physical_plan() p = processor.get_json() print p
spc = ' ' * indent print '%sDO' % spc for op in body: print_pretty_plan(op, indent + 4) print '%sWHILE' % spc print_pretty_plan(term, indent + 4) elif isinstance(plan, algebra.Sequence): print '%s%s' % (' ' * indent, plan.shortStr()) for child in plan.children(): print_pretty_plan(child, indent + 4) else: print '%s%s' % (' ' * indent, plan) catalog = FromFileCatalog.load_from_file("catalog.py") _parser = parser.Parser() query = "" with open(sys.argv[1], 'r') as f: query = f.read() statement_list = _parser.parse(query) processor = interpreter.StatementProcessor(catalog, True) processor.evaluate(statement_list) # we will add the shuffle into the logical plan print "LOGICAL" p = processor.get_logical_plan() print_pretty_plan(p)
currentChunksToMove = numberChunksToMove[positionCount] chunkTimes = [] runtime = 0 for c in range(1,currentChunksToMove+1): #clear cache subprocess.call(['/bin/bash',"../queries/clear-tpch.sh"]) print("postgres and os cleared") chunkRead = 'public:adhoc10GB' + str(startWorkers) + 'WorkersChunks' + str(correspondingChunks[positionCount]) + ':lineitemPart1' load = 'scan('+ str(chunkRead) + ')' store = 'public:adhoc10GBFromDisk'+ str(startWorkers) + 'to' + str(r) + 'part' + str(c) + ':lineitem' current_query = 'T1 = ' + load + '; Store(T1,' + store + ');'; #schema FromFileCatalog.scheme_write_to_file(path='schema.py',new_rel_key=chunkRead, new_rel_schema=str(schema)) catalog = FromFileCatalog.load_from_file('schema.py') _parser = parser.Parser() statement_list = _parser.parse(current_query); processor = interpreter.StatementProcessor(catalog, True) processor.evaluate(statement_list) p = processor.get_logical_plan() #modify p tail = p.args[0].input p.args[0].input = alg.Shuffle(tail, [UnnamedAttributeRef(0), UnnamedAttributeRef(1)]) p = processor.get_physical_plan() p.input.input.input = MyriaQueryScan(sql="select * from \"" + chunkRead + "\"", scheme=Scheme(zippedSchema)) finalplan = compile_to_json('chunkQuery',p,p) #modify json
task_message("running binary converter") convert_stdout = subprocess.check_output( './{exe} {file} "{delim}" {burns} {id}'.format(exe=convert_exe_name, file=datafile, delim=args.delim, burns=0, id=False), shell=True) num_tuples = re.search("rows: (\d+)", convert_stdout).group(1) add_data_file(datafile + '.bin') elif args.storage in ['row_ascii', 'row_json']: cat = FromFileCatalog.load_from_file(catalogfile) rel_key = cat.get_keys()[0] if args.splits: num_tuples = subprocess.check_output( "wc -l {0}/part-* " "| tail -n 1 " "| awk '{{print $1}}'".format(inputf), shell=True) else: num_tuples = subprocess.check_output( "wc -l {0} | awk '{{print $1}}'".format(inputf), shell=True) add_data_file(datafile) else:
spc = ' ' * indent print '%sDO' % spc for op in body: print_pretty_plan(op, indent + 4) print '%sWHILE' % spc print_pretty_plan(term, indent + 4) elif isinstance(plan, algebra.Sequence): print '%s%s' % (' ' * indent, plan.shortStr()) for child in plan.children(): print_pretty_plan(child, indent + 4) else: print '%s%s' % (' ' * indent, plan) catalog = FromFileCatalog.load_from_file("../../matrices/catalogs/catalog.py") _parser = parser.Parser() query = "" with open(sys.argv[1], 'r') as f: query = f.read() statement_list = _parser.parse(query) processor = interpreter.StatementProcessor(catalog, True) processor.evaluate(statement_list) # we will add the shuffle into the logical plan print "LOGICAL" p = processor.get_logical_plan() print_pretty_plan(p)