Ejemplo n.º 1
0
    def test_schema_append_to_file(self):
        file = "{p}/test_write_catalog.py".format(p=test_file_path)

        if os.path.isfile(file):
            os.remove(file)

        rel_to_add1 = 'public:adhoc:test1'
        rel_to_add2 = 'public:adhoc:test2'

        FromFileCatalog.scheme_write_to_file(
            file, rel_to_add1,
            "{'columnNames': ['grpID1'], 'columnTypes': ['LONG_TYPE']}")

        FromFileCatalog.scheme_write_to_file(
            file, rel_to_add2,
            "{'columnNames': ['grpID2'], 'columnTypes': ['STRING_TYPE']}")

        cut = FromFileCatalog.load_from_file(
            "{p}/test_write_catalog.py".format(p=test_file_path))

        self.assertEqual(cut.get_scheme(rel_to_add1).get_names(),
                         ['grpID1'])
        self.assertEqual(cut.get_scheme(rel_to_add1).get_types(),
                         ['LONG_TYPE'])
        self.assertEqual(cut.get_scheme(rel_to_add2).get_names(),
                         ['grpID2'])
        self.assertEqual(cut.get_scheme(rel_to_add2).get_types(),
                         ['STRING_TYPE'])
Ejemplo n.º 2
0
def main(args):
    opt = parse_options(args)
    osutils.mkdir_p("logs")
    abspath = os.path.abspath("logs")
    name = opt.file

    if opt.query:
        if opt.catalog is None:
            raise Exception("--query also requires a --catalog")

        with open(opt.query, 'r') as f:
            qt = f.read()

        target_alg = CCAlgebra(emit_print=EMIT_FILE)
        if opt.platform == 'grappa':
            target_alg = GrappaAlgebra(emit_print=EMIT_FILE)
        ClangProcessor(FromFileCatalog.load_from_file(opt.catalog))\
            .write_source_code(qt, name, target_alg=target_alg)

    if opt.platform == 'grappa':
        runner = GrappalangRunner()
        runner.run(name, abspath)
    elif opt.platform == 'cpp':
        try:
            runner = ClangRunner()
            runner.run(name, abspath)
        except subprocess.CalledProcessError as e:
            print 'cpp runner for %s failed' % (name)
            print e.output
            raise
Ejemplo n.º 3
0
def main(args):
    opt = parse_options(args)
    osutils.mkdir_p("logs")
    abspath = os.path.abspath("logs")
    name = opt.file

    if opt.query:
        if opt.catalog is None:
            raise Exception("--query also requires a --catalog")

        with open(opt.query, 'r') as f:
            qt = f.read()

        target_alg = CCAlgebra(emit_print=EMIT_FILE)
        if opt.platform == 'grappa':
            target_alg = GrappaAlgebra(emit_print=EMIT_FILE)
        ClangProcessor(FromFileCatalog.load_from_file(opt.catalog))\
            .write_source_code(qt, name, target_alg=target_alg)

    if opt.platform == 'grappa':
        runner = GrappalangRunner()
        runner.run(name, abspath)
    elif opt.platform == 'clang':
        try:
            runner = ClangRunner()
            runner.run(name, abspath)
        except subprocess.CalledProcessError as e:
            print 'clang runner for %s failed' % (name)
            print e.output
            raise
Ejemplo n.º 4
0
def generate_tuple_class_from_file(name, catpath):
    cat = FromFileCatalog.load_from_file(catpath)

    if name is not None:
        rel_key = "public:adhoc:{0}".format(name)
        return cat, rel_key, generate_tuple_class(rel_key, cat)
    else:
        return cat, [(n, generate_tuple_class(n, cat)) for n in cat.get_keys()]
Ejemplo n.º 5
0
    def test_schema_to_file_no_append(self):
        file = "{p}/test_write_catalog.py".format(p=test_file_path)

        if os.path.isfile(file):
            os.remove(file)

        rel_to_add = 'public:adhoc:test'
        FromFileCatalog.scheme_write_to_file(
            "{p}/test_write_catalog.py".format(p=test_file_path), rel_to_add,
            "{'columnNames': ['grpID'], 'columnTypes': ['LONG_TYPE']}")

        with self.assertRaises(IOError):
            FromFileCatalog.scheme_write_to_file(
                "{p}/test_write_catalog.py".format(p=test_file_path),
                rel_to_add,
                "{'columnNames': ['grpID'], 'columnTypes': ['LONG_TYPE']}",
                append=False)
Ejemplo n.º 6
0
    def test_set_cardinality_relation(self):
        cut = FromFileCatalog.load_from_file(
            "{p}/set_cardinality_relation.py".format(p=test_file_path))

        self.assertEqual(cut.get_scheme('C').get_names(),
                         ['a', 'b', 'c'])
        self.assertEqual(cut.num_tuples('B'), DEFAULT_CARDINALITY)
        self.assertEqual(cut.num_tuples('C'), 12)
Ejemplo n.º 7
0
def generate_tuple_class_from_file(name, catpath):
    cat = FromFileCatalog.load_from_file(catpath)

    if name is not None:
        rel_key = "public:adhoc:{0}".format(name)
        return cat, rel_key, generate_tuple_class(rel_key, cat)
    else:
        return cat, [(n, generate_tuple_class(n, cat)) for n in cat.get_keys()]
Ejemplo n.º 8
0
    def test_missing_relation(self):
        cut = FromFileCatalog.load_from_file(
            "{p}/set_cardinality_relation.py".format(p=test_file_path))

        with self.assertRaises(Exception):
            cut.num_tuples('D')

        with self.assertRaises(Exception):
            cut.get_scheme('D')
Ejemplo n.º 9
0
    def test_default_cardinality_relation(self):
        cut = FromFileCatalog.load_from_file(
            "{p}/default_cardinality_relation.py".format(p=test_file_path))

        self.assertEqual(cut.get_scheme('B').get_names(),
                         ['x', 'y', 'z'])
        self.assertEqual(cut.get_scheme('A').get_types(),
                         ['DOUBLE_TYPE', 'STRING_TYPE'])

        self.assertEqual(cut.num_tuples('A'), DEFAULT_CARDINALITY)
        self.assertEqual(cut.num_tuples('B'), DEFAULT_CARDINALITY)
Ejemplo n.º 10
0
    subprocess.check_call('make {0}'.format(convert_exe_name), shell=True)

    task_message("running binary converter")
    convert_stdout = subprocess.check_output('./{exe} {file} "{delim}" {burns} {id}'.format(exe=convert_exe_name,
                                                               file=datafile,
                                                               delim=args.delim,
                                                               burns=0,
                                                               id=False), shell=True)

    num_tuples = re.search("rows: (\d+)", convert_stdout).group(1)

    add_data_file(datafile+'.bin')


elif args.storage in ['row_ascii', 'row_json']:
    cat = FromFileCatalog.load_from_file(catalogfile)
    rel_key = cat.get_keys()[0]

    if args.splits:
        num_tuples = subprocess.check_output("wc -l {0}/part-* "
                                             "| tail -n 1 "
                                             "| awk '{{print $1}}'".format(inputf)
                                             , shell=True)
    else:
        num_tuples = subprocess.check_output("wc -l {0} | awk '{{print $1}}'".format(inputf), shell=True)

    add_data_file(datafile)

else:
    raise Exception("Invalid storage format {0}".format(args.storage))
Ejemplo n.º 11
0
}}
"""


def generate_tuple_class(rel_key, cat):
   sch = cat.get_scheme(rel_key)
   tupleref = StagedTupleRef(None, sch)
   definition = tupleref.generateDefinition()
   outfnbase = rel_key.split(':')[2]
   with open("{0}_convert.cpp".format(outfnbase), 'w') as outf:
       outf.write(template.format(definition=definition, typ=tupleref.getTupleTypename()))

   subprocess.check_output(["make", "{fn}.convert".format(fn=outfnbase)])


if __name__ == "__main__":

    p = argparse.ArgumentParser(prog=sys.argv[0])
    p.add_argument("-n", dest="name", help="name of relation [optional]. If not specified then will convert whole catalog")
    p.add_argument("-c", dest="catpath", help="path of catalog file, see FromFileCatalog for format", required=True)

    args = p.parse_args(sys.argv[1:])
   
    cat = FromFileCatalog.load_from_file(args.catpath)

    if args.name is not None:
      generate_tuple_class("public:adhoc:{0}".format(args.name), cat)
    else:
      for n in cat.get_keys():
        generate_tuple_class(n, cat)
Ejemplo n.º 12
0
#--nodes
current_schema = (MyriaRelation(relation=nodes_table, connection=connection).schema.to_dict())
columnNames = [x.encode('utf-8') for x in current_schema['columnNames']]
columnTypes = [x.encode('utf-8') for x in current_schema['columnTypes']]
columns = zip(columnNames, columnTypes)
f.write("'" + nodes_table + "' : " +  str(columns) + ',\n');
#--edges
current_schema = (MyriaRelation(relation=edges_table, connection=connection).schema.to_dict())
columnNames = [x.encode('utf-8') for x in current_schema['columnNames']]
columnTypes = [x.encode('utf-8') for x in current_schema['columnTypes']]
columns = zip(columnNames, columnTypes)
f.write("'" + edges_table + "' : " +  str(columns) + ',\n');
f.write("}" + '\n');
f.close()

catalog = FromFileCatalog.load_from_file("schema.py")
_parser = parser.Parser()

#Run the first query
current_query = "T1 = scan("+ edges_table + "); store(T1," + table_prefix + "edgesConnectedSplitSort);"

statement_list = _parser.parse(current_query);
processor = interpreter.StatementProcessor(catalog, True)
processor.evaluate(statement_list)
p = processor.get_logical_plan()

tail = p.args[0].input
p.args[0].input = alg.Shuffle(tail, [UnnamedAttributeRef(0)])
p.args[0].input = alg.OrderBy(p.args[0].input, [0,1,3,4], [True, True, True, False])
p = processor.get_physical_plan()
finalplan = processor.get_json()
Ejemplo n.º 13
0
from raco.catalog import FromFileCatalog
import raco.myrial.parser as parser
import raco.myrial.interpreter as interpreter
import raco.algebra as alg
from raco.expression.expression import UnnamedAttributeRef


catalog = FromFileCatalog.load_from_file("vulcan.py")
_parser = parser.Parser()

#myrial statements not yet algebra
statement_list = _parser.parse("T1 = scan(public:vulcan:edgesConnected);store(T1, public:vulcan:edgesConnectedSort);")

processor = interpreter.StatementProcessor(catalog, True)

#goes through statement list and gets the logical plan (processor has it)
processor.evaluate(statement_list)

p = processor.get_logical_plan()

tail = p.args[0].input
p.args[0].input = alg.Shuffle(tail, [UnnamedAttributeRef(0), UnnamedAttributeRef(1), UnnamedAttributeRef(3)])


p = processor.get_physical_plan()

p = processor.get_json()

print p
Ejemplo n.º 14
0
        spc = ' ' * indent
        print '%sDO' % spc
        for op in body:
            print_pretty_plan(op, indent + 4)
        print '%sWHILE' % spc
        print_pretty_plan(term, indent + 4)
    elif isinstance(plan, algebra.Sequence):
        print '%s%s' % (' ' * indent, plan.shortStr())
        for child in plan.children():
            print_pretty_plan(child, indent + 4)
    else:
        print '%s%s' % (' ' * indent, plan)


catalog = FromFileCatalog.load_from_file("catalog.py")
_parser = parser.Parser()

query = ""
with open(sys.argv[1], 'r') as f:
    query = f.read()

statement_list = _parser.parse(query)

processor = interpreter.StatementProcessor(catalog, True)
processor.evaluate(statement_list)

# we will add the shuffle into the logical plan
print "LOGICAL"
p = processor.get_logical_plan()
print_pretty_plan(p)
Ejemplo n.º 15
0
		currentChunksToMove = numberChunksToMove[positionCount]
		chunkTimes = []
		runtime = 0
		for c in range(1,currentChunksToMove+1):
			#clear cache
			subprocess.call(['/bin/bash',"../queries/clear-tpch.sh"])
			print("postgres and os cleared")

			chunkRead = 'public:adhoc10GB' + str(startWorkers) + 'WorkersChunks' + str(correspondingChunks[positionCount]) + ':lineitemPart1'
			load = 'scan('+ str(chunkRead) + ')'
			store = 'public:adhoc10GBFromDisk'+ str(startWorkers) + 'to' + str(r) + 'part' + str(c) + ':lineitem'

			current_query = 'T1 = ' + load + '; Store(T1,' + store + ');';
			
			#schema
			FromFileCatalog.scheme_write_to_file(path='schema.py',new_rel_key=chunkRead, new_rel_schema=str(schema))
			catalog = FromFileCatalog.load_from_file('schema.py')

			_parser = parser.Parser()
			statement_list = _parser.parse(current_query);
			processor = interpreter.StatementProcessor(catalog, True)
			processor.evaluate(statement_list)
			p = processor.get_logical_plan()
			#modify p
			tail = p.args[0].input
			p.args[0].input = alg.Shuffle(tail, [UnnamedAttributeRef(0), UnnamedAttributeRef(1)])
			p = processor.get_physical_plan()
			p.input.input.input = MyriaQueryScan(sql="select * from \"" + chunkRead + "\"", scheme=Scheme(zippedSchema))
			finalplan = compile_to_json('chunkQuery',p,p)

			#modify json
Ejemplo n.º 16
0
    task_message("running binary converter")
    convert_stdout = subprocess.check_output(
        './{exe} {file} "{delim}" {burns} {id}'.format(exe=convert_exe_name,
                                                       file=datafile,
                                                       delim=args.delim,
                                                       burns=0,
                                                       id=False),
        shell=True)

    num_tuples = re.search("rows: (\d+)", convert_stdout).group(1)

    add_data_file(datafile + '.bin')

elif args.storage in ['row_ascii', 'row_json']:
    cat = FromFileCatalog.load_from_file(catalogfile)
    rel_key = cat.get_keys()[0]

    if args.splits:
        num_tuples = subprocess.check_output(
            "wc -l {0}/part-* "
            "| tail -n 1 "
            "| awk '{{print $1}}'".format(inputf),
            shell=True)
    else:
        num_tuples = subprocess.check_output(
            "wc -l {0} | awk '{{print $1}}'".format(inputf), shell=True)

    add_data_file(datafile)

else:
Ejemplo n.º 17
0
        spc = ' ' * indent
        print '%sDO' % spc
        for op in body:
            print_pretty_plan(op, indent + 4)
        print '%sWHILE' % spc
        print_pretty_plan(term, indent + 4)
    elif isinstance(plan, algebra.Sequence):
        print '%s%s' % (' ' * indent, plan.shortStr())
        for child in plan.children():
            print_pretty_plan(child, indent + 4)
    else:
        print '%s%s' % (' ' * indent, plan)


catalog = FromFileCatalog.load_from_file("../../matrices/catalogs/catalog.py")
_parser = parser.Parser()

query = ""
with open(sys.argv[1], 'r') as f:
    query = f.read()

statement_list = _parser.parse(query)

processor = interpreter.StatementProcessor(catalog, True)
processor.evaluate(statement_list)

# we will add the shuffle into the logical plan
print "LOGICAL"
p = processor.get_logical_plan()
print_pretty_plan(p)