Example #1
0
    def test(self):
        twitterData = self.get_file_url('testdata/twitter/TwitterK.csv')
        loadData = """
T1 = load('{}',csv(schema(a:int,b:int)));
T2 = [from T1 as t emit *];
store(T2, twitterOriginal);

T1 = load('{}',csv(schema(a:int,b:int)));
T2 = [from T1 as t where t.a = 17 emit *];
store(T2, twitterSubsetNotBroadcast);

T1 = load('{}',csv(schema(a:int,b:int)));
T2 = [from T1 as t where t.a = 17 emit *];
store(T2, twitterSubsetBroadcast, broadcast());
""".format(twitterData, twitterData, twitterData)
        MyriaQuery.submit(loadData)
        notBroadcastJoin = """
T1 = [from scan(twitterOriginal) as t1, scan(twitterSubsetNotBroadcast) as t2
where t1.a = t2.a emit *];
store(T1, finalNotBroadcast);
"""
        originalResult = MyriaQuery.submit(notBroadcastJoin)
        broadcastJoin = """
T1 = [from scan(twitterOriginal) as t1, scan(twitterSubsetBroadcast) as t2
        where t1.a = t2.a emit *];
store(T1, finalBroadcast);
"""
        broadcastResult = MyriaQuery.submit(broadcastJoin)
        self.assertListOfDictsEqual(originalResult.to_dict(),
                                    broadcastResult.to_dict())
Example #2
0
    def test(self):
        nodesTable = self.get_file_url('testdata/cosmo/nodesTable.csv')
        edgesTable = self.get_file_url('testdata/cosmo/edgesTable.csv')
        loadData = """
T1 = load('{}',csv(schema(grpID:int, timeStep:int, totalMass:float, totalParticles:int), skip=1));
store(T1, nodesTable, hash(grpID, timeStep));

T2 = load('{}',csv(schema(currentGroup:int, currentTime:int, nextGroup:int, nowGroup:int, sharedParticles:int), skip=1));
store(T2, edgesTable, round_robin());
""".format(nodesTable, edgesTable)
        MyriaQuery.submit(loadData)
        order1Query = """
T1 = select currentGroup, grpID, timeStep, nowGroup, sharedParticles
    from scan(nodesTable) as n, scan(edgesTable) as e
    where n.timeStep = e.currentTime
    and n.grpID = e.currentGroup;
store(T1, order1Result);
"""
        order1Result = MyriaQuery.submit(order1Query)
        order2Query = """
T2 = select currentGroup, grpID, timeStep, nowGroup, sharedParticles
    from scan(nodesTable) as n, scan(edgesTable) as e
    where n.grpID = e.currentGroup
    and n.timeStep = e.currentTime;
store(T2, order2Result);
"""
        order2Result = MyriaQuery.submit(order2Query)
        self.assertListOfDictsEqual(
            order1Result.to_dict(), order2Result.to_dict())
Example #3
0
    def test(self):
        twitterData = self.get_file_url('testdata/twitter/TwitterK.csv')
        loadData = """
T1 = load('{}',csv(schema(a:int,b:int)));
T2 = [from T1 as t emit *];
store(T2, twitterOriginal);

T1 = load('{}',csv(schema(a:int,b:int)));
T2 = [from T1 as t where t.a = 17 emit *];
store(T2, twitterSubsetNotBroadcast);

T1 = load('{}',csv(schema(a:int,b:int)));
T2 = [from T1 as t where t.a = 17 emit *];
store(T2, twitterSubsetBroadcast, broadcast());
""".format(twitterData, twitterData, twitterData)
        MyriaQuery.submit(loadData)
        notBroadcastJoin = """
T1 = [from scan(twitterOriginal) as t1, scan(twitterSubsetNotBroadcast) as t2
where t1.a = t2.a emit *];
store(T1, finalNotBroadcast);
"""
        originalResult = MyriaQuery.submit(notBroadcastJoin)
        broadcastJoin = """
T1 = [from scan(twitterOriginal) as t1, scan(twitterSubsetBroadcast) as t2
        where t1.a = t2.a emit *];
store(T1, finalBroadcast);
"""
        broadcastResult = MyriaQuery.submit(broadcastJoin)
        self.assertListOfDictsEqual(
            originalResult.to_dict(), broadcastResult.to_dict())
Example #4
0
    def test(self):
        nodesTable = self.get_file_url('testdata/cosmo/nodesTable.csv')
        edgesTable = self.get_file_url('testdata/cosmo/edgesTable.csv')
        loadData = """
T1 = load('{}',csv(schema(grpID:int, timeStep:int, totalMass:float, totalParticles:int), skip=1));
store(T1, nodesTable, hash(grpID, timeStep));

T2 = load('{}',csv(schema(currentGroup:int, currentTime:int, nextGroup:int, nowGroup:int, sharedParticles:int), skip=1));
store(T2, edgesTable, round_robin());
""".format(nodesTable, edgesTable)
        MyriaQuery.submit(loadData)
        order1Query = """
T1 = select currentGroup, grpID, timeStep, nowGroup, sharedParticles
    from scan(nodesTable) as n, scan(edgesTable) as e
    where n.timeStep = e.currentTime
    and n.grpID = e.currentGroup;
store(T1, order1Result);
"""
        order1Result = MyriaQuery.submit(order1Query)
        order2Query = """
T2 = select currentGroup, grpID, timeStep, nowGroup, sharedParticles
    from scan(nodesTable) as n, scan(edgesTable) as e
    where n.grpID = e.currentGroup
    and n.timeStep = e.currentTime;
store(T2, order2Result);
"""
        order2Result = MyriaQuery.submit(order2Query)
        self.assertListOfDictsEqual(order1Result.to_dict(),
                                    order2Result.to_dict())
Example #5
0
    def test(self):
        program = """
r = load('{}', csv(schema(follower:int, followee:int), delimiter=' '));
store(r, jwang:global_join:smallTable);
""".format(self.get_file_url('testdata/filescan/simple_two_col_int.txt'))
        ingest_query = MyriaQuery.submit(program)
        self.assertEqual(ingest_query.status, 'SUCCESS')

        join_json = json.loads(
            open('jsonQueries/nullChild_jortiz/ThreeWayLocalJoin.json').read())
        join_query = MyriaQuery.submit_plan(join_json).wait_for_completion()
        self.assertEqual(join_query.status, 'SUCCESS')
Example #6
0
    def test(self):
        twitterData = self.get_file_url('testdata/twitter/TwitterK.csv')
        query_uda = """
uda counter() {{[0 AS c]; [c + 1];  c;}}; T1 = load('{}',csv(schema(a:int,b:int))); T2 = [from T1 emit a, counter() as c]; store(T2, out_degree_uda);
""".format(twitterData)
        uda_result = MyriaQuery.submit(query_uda)
        query_count = """
T1 = load('{}',csv(schema(a:int,b:int))); T2 = [from T1 emit a, count(*) as c]; store(T2, out_degree_count);
""".format(twitterData)
        count_result = MyriaQuery.submit(query_count)
        self.assertListOfDictsEqual(count_result.to_dict(),
                                    uda_result.to_dict())
Example #7
0
    def test(self):
        program = """
r = load('{}', csv(schema(follower:int, followee:int), delimiter=' '));
store(r, jwang:global_join:smallTable);
""".format(self.get_file_url('testdata/filescan/simple_two_col_int.txt'))
        ingest_query = MyriaQuery.submit(program)
        self.assertEqual(ingest_query.status, 'SUCCESS')

        join_json = json.loads(
            open('jsonQueries/nullChild_jortiz/ThreeWayLocalJoin.json').read())
        join_query = MyriaQuery.submit_plan(join_json).wait_for_completion()
        self.assertEqual(join_query.status, 'SUCCESS')
Example #8
0
    def test(self):
        twitterData = self.get_file_url('testdata/twitter/TwitterK.csv')
        query_uda = """
uda counter() {{[0 AS c]; [c + 1];  c;}}; T1 = load('{}',csv(schema(a:int,b:int))); T2 = [from T1 emit a, counter() as c]; store(T2, out_degree_uda);
""".format(twitterData)
        uda_result = MyriaQuery.submit(query_uda)
        query_count = """
T1 = load('{}',csv(schema(a:int,b:int))); T2 = [from T1 emit a, count(*) as c]; store(T2, out_degree_count);
""".format(twitterData)
        count_result = MyriaQuery.submit(query_count)
        self.assertListOfDictsEqual(
            count_result.to_dict(), uda_result.to_dict())
Example #9
0
    def test(self):
        program = """
r = load('{}', csv(schema(x:int, y:int), delimiter=','));
store(r, r);
""".format(self.get_file_url('testdata/filescan/simple_two_col_int.txt'))
        query = MyriaQuery.submit(program)
        self.assertEqual(query.status, 'ERROR')
Example #10
0
    def largeFile(self):
        twitterDataS3 = 's3://uwdb/sampleData/TwitterK-Large-100MB.txt'
        query_ingest = """
T1 = load('{}',csv(schema(a:int,b:int))); T2 = [from T1 emit *]; store(T2, parallelIngest);
""".format(twitterDataS3)
        result = MyriaQuery.submit(query_ingest)
        self.assertEqual(len(result.to_dict()), 15475500)
Example #11
0
    def test(self):
        program = """
r = load('{}', csv(schema(x:int, y:int), delimiter=','));
store(r, r);
""".format(self.get_file_url('testdata/filescan/simple_two_col_int.txt'))
        query = MyriaQuery.submit(program)
        self.assertEqual(query.status, 'ERROR')
Example #12
0
        def query(self, line, cell='', environment=None, language=None):
            """ Execute a Myria query using the current language.

                Relies on MyriaRelation.DefaultConnection, which may be
                set explicitly or via %connect.

                Examples:

                %language MyriaL
                %%query
                T1 = scan(TwitterK);
                T2 = [from T1 emit $0 as x];
                store(T2, JustX);

                %language Datalog
                %query JustX(column0) :- TwitterK(column0,column1)

                q = %query JustX(column0) :- TwitterK(column0,column1)%
            """
            self.shell.user_ns.update(environment or {})

            return MyriaQuery.submit(
                _bind(line + '\n' + cell, self.shell.user_ns),
                connection=MyriaRelation.DefaultConnection,
                language=language or self.language,
                timeout=self.timeout)
Example #13
0
    def largeFile(self):
        twitterDataS3 ='s3://uwdb/sampleData/TwitterK-Large-100MB.txt'
        query_ingest = """
T1 = load('{}',csv(schema(a:int,b:int))); T2 = [from T1 emit *]; store(T2, parallelIngest);
""".format(twitterDataS3)
        result = MyriaQuery.submit(query_ingest)
        self.assertEqual(len(result.to_dict()), 15475500)
Example #14
0
    def run(self, nodes, master, user, user_shell, volumes):
        for worker, uri in self.work:
            log.info("Worker #%d ingesting %s", worker, uri)

        with master.ssh.remote_file(DEPLOYMENT_PATH, 'r') as descriptor:
            connection = MyriaConnection(deployment=descriptor, ssl=self.ssl)
            log.info("MyriaConnection URI: " + connection._url_start)
            relation = MyriaRelation(self.name,
                                     schema=self.schema,
                                     connection=connection)
            query = MyriaQuery.parallel_import(
                relation, self.work,
                scan_type=self.scan_type,
                scan_parameters=self.scan_parameters,
                insert_type=self.insert_type,
                insert_parameters=self.insert_parameters,
                timeout=self.timeout)
            log.info("Ingesting as query %d", query.query_id)

            if self.wait_for_completion:
                query.wait_for_completion()
                log.info("Ingest complete (%d, %s)",
                         query.query_id, query.status)

            MyriaInstaller.web_restart(master)
Example #15
0
    def test(self):
        program = r"""
R = [b'\x01' as bytes];
S = [from R emit BITSET($0) as bit];
store(S, bits);
"""
        query = MyriaQuery.submit(program)
        expected = [{
            'bit': True
        }, {
            'bit': False
        }, {
            'bit': False
        }, {
            'bit': False
        }, {
            'bit': False
        }, {
            'bit': False
        }, {
            'bit': False
        }, {
            'bit': False
        }]
        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)
Example #16
0
 def test(self):
     edges = []
     for i in range(self.MAXM):
         src = random.randint(0, self.MAXN - 1)
         dst = random.randint(0, self.MAXN - 1)
         edges.append((src, dst))
     with NamedTemporaryFile(suffix='.csv') as cc_input:
         for edge in edges:
             cc_input.write('%d,%d\n' % (edge[0], edge[1]))
         cc_input.flush()
         query = MyriaQuery.submit(self.get_program(cc_input.name))
         self.assertEqual(query.status, 'SUCCESS')
         results = MyriaRelation('public:adhoc:CC_output').to_dict()
         self.assertListOfDictsEqual(results, self.get_expected(edges))
         query = MyriaQuery.submit(self.get_program(cc_input.name, 'sync'))
         self.assertEqual(query.status, 'SUCCESS')
         results = MyriaRelation('public:adhoc:CC_output').to_dict()
         self.assertListOfDictsEqual(results, self.get_expected(edges))
Example #17
0
    def test(self):
        program = r"""
R = [b'\x01' as bytes];
store(R, bytes);
"""
        query = MyriaQuery.submit(program)
        expected = [{'bytes': base64.standard_b64encode(b'\x01')}]
        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)
Example #18
0
    def test(self):
        program = r"""
R = [b'\x01' as bytes];
store(R, bytes);
"""
        query = MyriaQuery.submit(program)
        expected = [{'bytes': base64.standard_b64encode(b'\x01')}]
        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)
Example #19
0
    def test(self):
        program = r"""
R = [b'\x00\x00\x00\x00\x00\x00\x00\x01' as longBlob];
S = [from R emit int(longBlob) as num];
store(S, resNum);
"""
        query = MyriaQuery.submit(program)
        expected = [{'num': 1}]
        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)
Example #20
0
    def test(self):
        program = r"""
R = [b'\xDE\xAD\xBE\xEF' as bytes];
S = [from R emit BYTERANGE(R.bytes, 2, 4) as bytes];
store(S, bytes);
"""
        query = MyriaQuery.submit(program)
        expected = [{'bytes': base64.standard_b64encode(b'\xBE\xEF')}]
        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)
Example #21
0
    def test(self):
        # Create empty file
        program = """
emptyrelation = load('{}', csv(schema(foo:string, bar:int)));
store(emptyrelation, emptyrelation);
""".format(self.get_file_url('testdata/filescan/empty.txt'))
        expected = []
        query = MyriaQuery.submit(program)
        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)
Example #22
0
    def test(self):
        program = """
x = [0 as exp, 1 as val];
do
  x = [from x emit exp+1 as exp, 2*val as val];
while [from x emit max(exp) < 5];
store(x, powersOfTwo);
"""
        query = MyriaQuery.submit(program)
        expected = [{'exp': 5, 'val': 32}]

        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)
Example #23
0
    def test(self):
        program = """
uploaddatatest = load('{}', csv(schema(s:string, i:int)));
store(uploaddatatest, uploaddatatest);
""".format(self.get_file_url('testdata/filescan/uploaddatatest.txt'))
        expected = [{
            u's': u'foo',
            u'i': 3242
        }, {
            u's': u'bar',
            u'i': 321
        }, {
            u's': u'baz',
            u'i': 104
        }]
        query = MyriaQuery.submit(program)
        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)

        # Now replace with another relation
        program = """
uploaddatatest = load('{}', csv(schema(s:string, i:int), delimiter='\\t'));
store(uploaddatatest, uploaddatatest);
""".format(self.get_file_url('testdata/filescan/uploaddatatest2.txt'))
        expected = [{
            u's': u'mexico',
            u'i': 42
        }, {
            u's': u'poland',
            u'i': 12342
        }, {
            u's': u'belize',
            u'i': 802304
        }]
        query = MyriaQuery.submit(program)
        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)
Example #24
0
 def test(self):
     with NamedTemporaryFile(suffix='.csv') as R, \
             NamedTemporaryFile(suffix='.csv') as A0, \
             NamedTemporaryFile(suffix='.csv') as B0, \
             NamedTemporaryFile(suffix='.csv') as C0:
         R_data = self.genData(R)
         A0_data = self.genData(A0)
         B0_data = self.genData(B0)
         C0_data = self.genData(C0)
         query = MyriaQuery.submit(
             self.get_join_chain_program(R.name, A0.name, B0.name, C0.name))
         self.assertEqual(query.status, 'SUCCESS')
         results = MyriaRelation('public:adhoc:joinChain').to_dict()
         self.assertListOfDictsEqual(
             results,
             self.get_join_chain_expected(R_data, A0_data, B0_data,
                                          C0_data))
         query = MyriaQuery.submit(
             self.get_join_circle_program(A0.name, B0.name, C0.name))
         self.assertEqual(query.status, 'SUCCESS')
         results = MyriaRelation('public:adhoc:joinCircle').to_dict()
         self.assertListOfDictsEqual(
             results,
             self.get_join_circle_expected(A0_data, B0_data, C0_data))
Example #25
0
    def test(self):
        program = """
        T1 = load("{}",csv(schema(a:int,b:int)));
        store(T1, public:adhoc:twitterDelete);
        """.format(self.get_file_url('testdata/twitter/TwitterK.csv'))
        query = MyriaQuery.submit(program)
        self.assertEqual(query.status, 'SUCCESS')

        relation = MyriaRelation('public:adhoc:twitterDelete')
        self.assertEqual(relation.is_persisted, True)

        # delete relation and check the catalog
        relation.delete()
        self.assertRaises(MyriaError, self.connection.dataset,
                          relation.qualified_name)
Example #26
0
    def test(self):
        program = """
T2 = load('{}', csv(schema(a:int, b:int, c:int), delimiter=','));
T1 = [FROM T2 as t EMIT * ORDER BY a ASC, b DESC, c ASC LIMIT 3];
store(T1, orderbyTest);
""".format(
            self.get_file_url(
                'testdata/filescan/simple_three_col_int_to_sort.txt'))
        expected = [{
            u'a': 1,
            u'b': 4,
            u'c': 2
        }, {
            u'a': 1,
            u'b': 2,
            u'c': 2
        }, {
            u'a': 1,
            u'b': 2,
            u'c': 4
        }]
        query = MyriaQuery.submit(program)
        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)

        program = """
T2 = load('{}', csv(schema(a:int, b:int, c:int), delimiter=','));
T1 = SELECT a, b, c FROM T2 as t ORDER BY a ASC, b DESC, c ASC LIMIT 3;
store(T1, orderbyTest);
""".format(
            self.get_file_url(
                'testdata/filescan/simple_three_col_int_to_sort.txt'))
        expected = [{
            u'a': 1,
            u'b': 4,
            u'c': 2
        }, {
            u'a': 1,
            u'b': 2,
            u'c': 2
        }, {
            u'a': 1,
            u'b': 2,
            u'c': 4
        }]
        query = MyriaQuery.submit(program)
        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)

        program = """
T2 = load('{}', csv(schema(a:int, b:int, c:int), delimiter=','));
T1 = [FROM T2 as t EMIT a, b, count(*) as cnt ORDER BY a ASC, cnt DESC LIMIT 3];
store(T1, orderbyTest);
""".format(
            self.get_file_url(
                'testdata/filescan/simple_three_col_int_to_sort.txt'))
        expected = [{
            u'a': 1,
            u'b': 2,
            u'cnt': 2
        }, {
            u'a': 1,
            u'b': 4,
            u'cnt': 1
        }, {
            u'a': 10,
            u'b': 1,
            u'cnt': 2
        }]
        query = MyriaQuery.submit(program)
        self.assertEqual(query.status, 'SUCCESS')
        self.assertListOfDictsEqual(query.to_dict(), expected)