def test_multivalued_extension_method(self): server_state = {} with HTTMock(create_mock(server_state)): # Prevent ambient UDF registration MyriaRelation.DefaultConnection = None relation = MyriaRelation(FULL_NAME, connection=self.connection) @myria_function(name='my_udf', output_type=BOOLEAN_TYPE, multivalued=True) def extension(column1, column2): return [ str(column1) == str(column2), str(column1) == str(column2) ] udf = relation.my_udf() _apply = next( iter(filter(lambda op: isinstance(op, Apply), udf.query.walk())), None) self.assertIsNotNone(_apply) self.assertEqual(len(_apply.emitters), 1) pyudf = _apply.emitters[0][1] if apply else None self.assertIsInstance(pyudf, PYUDF) self.assertEqual(pyudf.typ, BOOLEAN_TYPE) self.assertTrue(pyudf.arguments, 2) self.assertEqual([n.get_val() for n in pyudf.arguments], SCHEMA['columnNames']) self.assertEqual(len(server_state), 1) self.assertTrue(server_state.values()[0]['isMultiValued']) self.assertEqual(server_state.values()[0]['outputType'], 'BOOLEAN_TYPE')
def test_project_named_expression(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) projected = relation.select(foo=lambda t: t.column + 12345678) json = projected._sink().to_json() self.assertTrue('foo' in str(json)) self.assertTrue('12345678' in str(json))
def test_multivalued_extension_method(self): server_state = {} with HTTMock(create_mock(server_state)): # Prevent ambient UDF registration MyriaRelation.DefaultConnection = None relation = MyriaRelation(FULL_NAME, connection=self.connection) @myria_function(name='my_udf', output_type=BOOLEAN_TYPE, multivalued=True) def extension(column1, column2): return [str(column1) == str(column2), str(column1) == str(column2)] udf = relation.my_udf() _apply = next(iter(filter(lambda op: isinstance(op, Apply), udf.query.walk())), None) self.assertIsNotNone(_apply) self.assertEqual(len(_apply.emitters), 1) pyudf = _apply.emitters[0][1] if apply else None self.assertIsInstance(pyudf, PYUDF) self.assertEqual(pyudf.typ, BOOLEAN_TYPE) self.assertTrue(pyudf.arguments, 2) self.assertEqual([n.get_val() for n in pyudf.arguments], SCHEMA['columnNames']) self.assertEqual(len(server_state), 1) self.assertTrue(server_state.values()[0]['isMultiValued']) self.assertEqual(server_state.values()[0]['outputType'], 'BOOLEAN_TYPE')
def test_scan(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) json = relation._sink().to_json() optype = json['plan']['fragments'][0]['operators'][0]['opType'] name = json['plan']['fragments'][0]['operators'][0]['opName'] self.assertTrue('Scan' in optype) self.assertTrue(relation.name in name)
def test_name(self): with HTTMock(local_mock): relation = MyriaRelation(FULL_NAME, connection=self.connection) self.assertEquals(relation.name, FULL_NAME) self.assertDictEqual(relation.qualified_name, QUALIFIED_NAME) self.assertListEqual(relation.components, NAME_COMPONENTS) self.assertEquals(relation._get_name(relation.qualified_name), FULL_NAME)
def test_select_string(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) selected = relation.where("12345 + 67890") json = selected._sink().to_json() self.assertTrue("PLUS" in str(json)) self.assertTrue('12345' in str(json)) self.assertTrue("67890" in str(json))
def test_select_expression(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) selected = relation.where(lambda t: t.column < 123456) json = selected._sink().to_json() self.assertTrue("LT" in str(json)) self.assertTrue('123456' in str(json)) self.assertTrue("column" in str(json))
def test_count(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) count = relation.count() self.assertListEqual(count.query.grouping_list, []) self.assertListEqual(count.query.aggregate_list, [COUNTALL()]) self.assertIsNotNone(count._sink().to_json())
def _on_completed(self): """ Load query metadata after query completion """ dataset = self.connection._wrap_get('/dataset', params={'queryId': self.query_id}) if len(dataset): self._qualified_name = dataset[0]['relationKey'] self._name = MyriaRelation._get_name(self._qualified_name) self._components = MyriaRelation._get_name_components(self._name)
def test_json_download(self): with HTTMock(local_mock): relation = MyriaRelation(FULL_NAME, connection=self.connection) self.assertListEqual(relation.to_dict(), TUPLES) for i in xrange(1, len(TUPLES) + 1): self.assertListEqual(relation.to_dict(limit=i), TUPLES[:i])
def test_connection(self): with HTTMock(local_mock): relation = MyriaRelation(FULL_NAME, connection=self.connection) self.assertEquals(relation.connection, self.connection) relation = MyriaRelation(FULL_NAME) self.assertEquals(relation.connection, MyriaRelation.DefaultConnection)
def test_project_named_string(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) projected = relation.select(foo='column') json = projected._sink().to_json() self.assertTrue("outputName" in str(json)) self.assertTrue('foo' in str(json)) self.assertTrue("column" in str(json)) self.assertFalse("column2" in str(json))
def test_unpersisted_dict_download(self): with HTTMock(local_mock): relation = MyriaRelation('public:adhoc:NOTFOUND', connection=self.connection, schema=MyriaSchema( {'columnNames': ['name'], 'columnTypes': ['INT_TYPE']})) self.assertEquals(relation.to_dict(), [])
def test_count_attribute(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) count = relation.count('column2') self.assertListEqual(count.query.grouping_list, []) self.assertListEqual(count.query.aggregate_list, [COUNT(UnnamedAttributeRef(1))]) self.assertIsNotNone(count._sink().to_json())
def test_project_positional_string(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) projected = relation.select("column") sunk = projected._sink() sunk2 = sunk json = sunk2.to_json() self.assertTrue("outputName" in str(json)) self.assertTrue("column" in str(json)) self.assertFalse("column2" in str(json))
def test_static_load(self): state = {} with HTTMock(create_mock(state)): url = 'file:///foo.bar' MyriaRelation.load(FULL_NAME, url, MyriaSchema(SCHEMA), connection=self.connection) plan = state['query'] text = json.dumps(plan['plan']['fragments'][1]['operators'][0]) self.assertTrue('FileScan' in text) self.assertTrue('TupleSource' in text) self.assertTrue(url in text)
def _on_completed(self): """ Load query metadata after query completion """ dataset = self.connection._wrap_get('/dataset', params={'queryId': self.query_id}) if len(dataset): self._qualified_name = dataset[0]['relationKey'] self._name = MyriaRelation._get_name(self._qualified_name) self._components = MyriaRelation._get_name_components(self._name) else: raise AttributeError('Unable to load query metadata ' '(query status={})'.format(self.status))
def test_product(self): with HTTMock(create_mock()): left = MyriaRelation(FULL_NAME, connection=self.connection) right = MyriaRelation(FULL_NAME2, connection=self.connection) product = left.join(right) join = filter(lambda op: isinstance(op, CrossProduct), product.query.walk()) self.assertTrue(join) self.assertEqual(join[0].left, left.query) self.assertEqual(join[0].right, right.query) self.assertIsNotNone(product._sink().to_json())
def test_join_predicate(self): with HTTMock(create_mock()): left = MyriaRelation(FULL_NAME, connection=self.connection) right = MyriaRelation(FULL_NAME2, connection=self.connection) joined = left.join(right, lambda l, r: l.column == r.column3) join = filter(lambda op: isinstance(op, Join), joined.query.walk()) self.assertTrue(join) self.assertEqual(join[0].left, left.query) self.assertEqual(join[0].right, right.query) self.assertIsNotNone(join[0].condition) self.assertIsNotNone(join[0].condition) self.assertNotEqual(join[0].condition, TAUTOLOGY) self.assertIsNotNone(joined._sink().to_json())
def test_python_udf_predicate(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) udf = relation.where(lambda t: eval("t[0] < 10")) select = next(iter(filter(lambda op: isinstance(op, Select), udf.query.walk())), None) self.assertIsNotNone(select) pyudf = select.condition self.assertIsInstance(pyudf, PYUDF) self.assertEqual(pyudf.typ, BOOLEAN_TYPE) self.assertTrue(pyudf.arguments, 2) self.assertEqual([n.get_val() for n in pyudf.arguments], SCHEMA['columnNames'])
def test_python_udf(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) udf = relation.select(lambda t: eval("5 < 10")) _apply = next(iter(filter(lambda op: isinstance(op, Apply), udf.query.walk())), None) self.assertIsNotNone(_apply) self.assertEqual(len(_apply.emitters), 1) pyudf = _apply.emitters[0][1] if apply else None self.assertIsInstance(pyudf, PYUDF) self.assertEqual(pyudf.typ, STRING_TYPE) self.assertTrue(pyudf.arguments, 2) self.assertEqual([n.get_val() for n in pyudf.arguments], SCHEMA['columnNames'])
def test_join_lambda_attribute(self): with HTTMock(create_mock()): left = MyriaRelation(FULL_NAME, connection=self.connection) right = MyriaRelation(FULL_NAME2, connection=self.connection) joined = left.join(right, lambda l, r: l.column == r.column3, projection=[lambda l, r: l.column2]) join = filter(lambda op: isinstance(op, ProjectingJoin), joined.query.walk()) self.assertTrue(join) self.assertEqual(join[0].left, left.query) self.assertEqual(join[0].right, right.query) self.assertIsNotNone(join[0].output_columns) self.assertListEqual(join[0].output_columns, [UnnamedAttributeRef(1)]) self.assertIsNotNone(joined._sink().to_json())
def test_unpersisted_relation(self): with HTTMock(local_mock): self.assertFalse(MyriaRelation( 'public:adhoc:NOTFOUND', connection=self.connection, schema=MyriaSchema( {'columnNames': ['name'], 'columnTypes': ['INT_TYPE']})).is_persisted)
def test_python_registered_udf(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) udf1 = id udf = relation.select(lambda t: udf1(t[0])) applys = filter(lambda op: isinstance(op, Apply), udf.query.walk()) self.assertEqual(len(applys), 1) _apply = applys[0] if applys else None self.assertIsNotNone(_apply) self.assertEqual(len(_apply.emitters), 1) pyudf = _apply.emitters[0][1] if apply else None self.assertIsInstance(pyudf, PYUDF) self.assertEqual(pyudf.typ, UDF1_TYPE) self.assertTrue(pyudf.arguments, UDF1_ARITY)
def test_python_udf_predicate(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) udf = relation.where(lambda t: eval("t[0] < 10")) select = next( iter( filter(lambda op: isinstance(op, Select), udf.query.walk())), None) self.assertIsNotNone(select) pyudf = select.condition self.assertIsInstance(pyudf, PYUDF) self.assertEqual(pyudf.typ, BOOLEAN_TYPE) self.assertTrue(pyudf.arguments, 2) self.assertEqual([n.get_val() for n in pyudf.arguments], SCHEMA['columnNames'])
def test_python_udf(self): with HTTMock(create_mock()): relation = MyriaRelation(FULL_NAME, connection=self.connection) udf = relation.select(lambda t: eval("5 < 10")) _apply = next( iter(filter(lambda op: isinstance(op, Apply), udf.query.walk())), None) self.assertIsNotNone(_apply) self.assertEqual(len(_apply.emitters), 1) pyudf = _apply.emitters[0][1] if apply else None self.assertIsInstance(pyudf, PYUDF) self.assertEqual(pyudf.typ, STRING_TYPE) self.assertTrue(pyudf.arguments, 2) self.assertEqual([n.get_val() for n in pyudf.arguments], SCHEMA['columnNames'])
def test_parallel_import(self): with HTTMock(local_mock): schema = MyriaSchema({ 'columnNames': ['column'], 'columnTypes': ['INT_TYPE'] }) relation = MyriaRelation(FULL_NAME, schema=schema, connection=self.connection) work = [('http://input-uri-0', 0), ('http://input-uri-1', 1)] query = MyriaQuery.parallel_import(relation, work) self.assertEquals(query.status, 'Unittest-Success')
def test_load(self): state = {} with HTTMock(create_mock(state)): url = 'file:///foo.bar' relation = MyriaRelation(FULL_NAME, connection=self.connection) relation.load(url, schema=MyriaSchema(SCHEMA)) plan = relation._sink().to_json() text = json.dumps(plan['plan']['fragments'][1]['operators'][0]) self.assertTrue('FileScan' in text) self.assertTrue('TupleSource' in text) self.assertTrue(url in text) relation.execute() plan = state['query'] text = json.dumps(plan['plan']['fragments'][0]['operators'][0]) self.assertTrue('FileScan' in text) self.assertTrue('TupleSource' in text) self.assertTrue(url in text)
def test_persisted_with_schema(self): with HTTMock(local_mock): self.assertIsInstance(MyriaRelation(FULL_NAME, connection=self.connection, schema=MyriaSchema(SCHEMA)), MyriaRelation) different_name = {'columnNames': ['foo'], 'columnTypes': ['INT_TYPE']} self.assertRaises(ValueError, MyriaRelation, FULL_NAME, connection=self.connection, schema=MyriaSchema(different_name)) different_type = {'columnNames': ['column'], 'columnTypes': ['STRING_TYPE']} self.assertRaises(ValueError, MyriaRelation, FULL_NAME, connection=self.connection, schema=MyriaSchema(different_type))
def test_persisted_relation(self): with HTTMock(local_mock): self.assertTrue(MyriaRelation( FULL_NAME, connection=self.connection).is_persisted)
def test_json_download(self): with HTTMock(local_mock): relation = MyriaRelation(FULL_NAME, connection=self.connection) self.assertListEqual(relation.to_dict(), TUPLES)
def test_unpersisted_dict_download(self): with HTTMock(local_mock): relation = MyriaRelation('public:adhoc:NOTFOUND', connection=self.connection) self.assertEquals(relation.to_dict(), [])
def test_len(self): with HTTMock(local_mock): relation = MyriaRelation(FULL_NAME, connection=self.connection) self.assertEquals(len(relation), TOTAL_TUPLES)
def test_created_date(self): with HTTMock(local_mock): relation = MyriaRelation(FULL_NAME, connection=self.connection) self.assertEquals(relation.created_date, CREATED_DATE)
def test_persisted_schema(self): with HTTMock(local_mock): relation = MyriaRelation(FULL_NAME, connection=self.connection) self.assertDictEqual(relation.schema.to_dict(), SCHEMA)