def testFilterPushdownThroughUnion(self): expr = self.expr['name', 'id'].union(self.expr2['id', 'name']) expr = expr.filter(expr.id + 1 < 3) expected = 'SELECT * \n' \ 'FROM (\n' \ ' SELECT t1.`name`, t1.`id` \n' \ ' FROM mocked_project.`pyodps_test_expr_table` t1 \n' \ ' WHERE (t1.`id` + 1) < 3 \n' \ ' UNION ALL\n' \ ' SELECT t2.`name`, t2.`id` \n' \ ' FROM mocked_project.`pyodps_test_expr_table2` t2 \n' \ ' WHERE (t2.`id` + 1) < 3\n' \ ') t3' self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) expr1 = self.expr.filter(self.expr.id == 1)['name', 'id'] expr2 = self.expr.filter(self.expr.id == 0)['id', 'name'] expr = expr1.union(expr2) expected = 'SELECT * \n' \ 'FROM (\n' \ ' SELECT t1.`name`, t1.`id` \n' \ ' FROM mocked_project.`pyodps_test_expr_table` t1 \n' \ ' WHERE t1.`id` == 1 \n' \ ' UNION ALL\n' \ ' SELECT t2.`name`, t2.`id` \n' \ ' FROM mocked_project.`pyodps_test_expr_table` t2 \n' \ ' WHERE t2.`id` == 0\n' \ ') t3' self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testUnionPrune(self): left = self.expr.select('name', 'id') right = self.expr3.select(self.expr3.fid.astype('int').rename('id'), self.expr3.name) expr = left.union(right)['id'] expected = "SELECT t3.`id` \n" \ "FROM (\n" \ " SELECT t1.`id` \n" \ " FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ " UNION ALL\n" \ " SELECT CAST(t2.`fid` AS BIGINT) AS `id` \n" \ " FROM mocked_project.`pyodps_test_expr_table2` t2\n" \ ") t3" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) expr = self.expr.union(self.expr2) expected = 'SELECT * \n' \ 'FROM (\n' \ ' SELECT * \n' \ ' FROM mocked_project.`pyodps_test_expr_table` t1 \n' \ ' UNION ALL\n' \ ' SELECT * \n' \ ' FROM mocked_project.`pyodps_test_expr_table2` t2\n' \ ') t3' self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testFilterPrune(self): expr = self.expr.filter(self.expr.name == 'name1') expr = expr['name', 'id'] new_expr = ColumnPruning(expr.to_dag()).prune() self.assertIsInstance(new_expr.input, FilterCollectionExpr) self.assertNotIsInstance(new_expr.input.input, ProjectCollectionExpr) self.assertIsNotNone(new_expr.input.input._source_data) expected = 'SELECT t1.`name`, t1.`id` \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \ 'WHERE t1.`name` == \'name1\'' self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) expr = self.expr.filter(self.expr.name == 'name1') new_expr = ColumnPruning(expr.to_dag()).prune() self.assertIsInstance(new_expr, FilterCollectionExpr) self.assertIsNotNone(new_expr.input._source_data) expr = self.expr.filter(self.expr.id.isin(self.expr3.id)) expected = 'SELECT * \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \ 'WHERE t1.`id` IN (SELECT t3.`id` FROM ( ' \ 'SELECT t2.`id` FROM mocked_project.`pyodps_test_expr_table2` t2 ) t3)' self.assertTrue(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testSerializers(self): teacher = Example.Teacher(name='t1') professors = [Example.Teacher(name='p1'), Example.Teacher(name='p2')] jsn = Example.Json(label='json', tags=['t1', 't2'], nest=Example.Json.Nest(name='n'), nests=[Example.Json.Nest(name='n1'), Example.Json.Nest(name='n2')]) dt = datetime.fromtimestamp(time.mktime(datetime.now().timetuple())) example = Example(name='example 1', type='ex', date=dt, lessons=['less1', 'less2'], teacher=teacher, professors=professors, properties={'test': 'true'}, jsn=jsn) sel = example.serialize() self.assertEqual( to_str(expected_xml_template % utils.gen_rfc822(dt, localtime=True)), to_str(sel)) parsed_example = Example.parse(sel) self.assertEqual(example.name, parsed_example.name) self.assertEqual(example.type, parsed_example.type) self.assertEqual(example.date, parsed_example.date) self.assertSequenceEqual(example.lessons, parsed_example.lessons) self.assertEqual(example.teacher, parsed_example.teacher) self.assertSequenceEqual(example.professors, parsed_example.professors) self.assertTrue(len(example.properties) == len(parsed_example.properties) and any(example.properties[it] == parsed_example.properties[it]) for it in example.properties) self.assertEqual(example.jsn.label, parsed_example.jsn.label) self.assertEqual(example.jsn.tags, parsed_example.jsn.tags) self.assertEqual(example.jsn.nest, parsed_example.jsn.nest) self.assertSequenceEqual(example.jsn.nests, parsed_example.jsn.nests)
def testCreateDeleteFunction(self): test_resource_name = 'pyodps_t_tmp_test_function_resource.py' test_function_name = 'pyodps_t_tmp_test_function' try: self.odps.delete_resource(test_resource_name) except errors.NoSuchObject: pass try: self.odps.delete_function(test_function_name) except errors.NoSuchObject: pass test_resource = self.odps.create_resource( test_resource_name, 'py', file_obj=FILE_CONTENT) test_function = self.odps.create_function( test_function_name, class_type=test_resource_name.split('.', 1)[0]+'.MyPlus', resources=[test_resource,]) self.assertIsNotNone(test_function.name) self.assertIsNotNone(test_function.owner) self.assertIsNotNone(test_function.creation_time) self.assertIsNotNone(test_function.class_type) self.assertEqual(len(test_function.resources), 1) with self.odps.open_resource(name=test_resource_name, mode='r') as fp: self.assertEqual(to_str(fp.read()), to_str(FILE_CONTENT)) test_resource.drop() test_function.drop()
def testSamplePrune(self): expr = self.expr['name', 'id'].sample(parts=5)['id', ] expected = "SELECT t1.`id` \n" \ "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ "WHERE SAMPLE(5, 1)" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testJoinPrune(self): left = self.expr.select(self.expr, type='normal') right = self.expr3[:4] joined = left.left_join(right, on='id') expr = joined.id_x.rename('id') expected = "SELECT t2.`id` \n" \ "FROM (\n" \ " SELECT t1.`id` \n" \ " FROM mocked_project.`pyodps_test_expr_table` t1\n" \ ") t2 \n" \ "LEFT OUTER JOIN \n" \ " (\n" \ " SELECT t3.`id` \n" \ " FROM mocked_project.`pyodps_test_expr_table2` t3 \n" \ " LIMIT 4\n" \ " ) t4\n" \ "ON t2.`id` == t4.`id`" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) joined = self.expr.join(self.expr2, 'name') expected = 'SELECT t1.`name`, t1.`id` AS `id_x`, t1.`fid` AS `fid_x`, ' \ 't1.`isMale` AS `isMale_x`, t1.`scale` AS `scale_x`, ' \ 't1.`birth` AS `birth_x`, t1.`ds` AS `ds_x`, t2.`id` AS `id_y`, ' \ 't2.`fid` AS `fid_y`, t2.`isMale` AS `isMale_y`, t2.`scale` AS `scale_y`, ' \ 't2.`birth` AS `birth_y`, t2.`ds` AS `ds_y` \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \ 'INNER JOIN \n' \ ' mocked_project.`pyodps_test_expr_table2` t2\n' \ 'ON t1.`name` == t2.`name`' self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(joined, prettify=False)))
def testXFlowInstanceToXML(self): xflow_name = 'pyodps_t_tmp_xflow_algo_name' project = 'algo_project' parameters = {'key': 'value'} got_xml = self.odps.get_project(project).xflows._gen_xlow_instance_xml( xflow_name=xflow_name, xflow_project=project, parameters=parameters) self.assertEqual(to_str(got_xml), to_str(EXPECTED_XFLOW_INSTANCE_XML))
def testSmallRowsFormatter(self): data = [self._random_values() for _ in range(10)] pd = ResultFrame(data=data, schema=self.schema, pandas=True) result = ResultFrame(data=data, schema=self.schema, pandas=False) self.assertEqual(to_str(repr(pd)), to_str(repr(result))) self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_())) self.assertEqual(result._values, [r for r in result])
def testDistinctPrune(self): expr = self.expr.distinct(self.expr.id + 1, self.expr.name)['name', ] expected = "SELECT t2.`name` \n" \ "FROM (\n" \ " SELECT DISTINCT t1.`id` + 1 AS `id`, t1.`name` \n" \ " FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ ") t2" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testSQLTaskToXML(self): query = 'select * from dual' task = SQLTask(query=query) to_xml = task.serialize() right_xml = template % {'sql': query} self.assertEqual(to_str(to_xml), to_str(right_xml)) task = Task.parse(None, to_xml) self.assertIsInstance(task, SQLTask)
def testSortPrune(self): expr = self.expr[self.expr.exclude('name'), self.expr.name.rename('name2')].sort('name2')['id', 'fid'] expected = "SELECT t2.`id`, t2.`fid` \n" \ "FROM (\n" \ " SELECT t1.`id`, t1.`fid`, t1.`name` AS `name2` \n" \ " FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ " ORDER BY name2 \n" \ " LIMIT 10000\n" \ ") t2" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testSlicePrune(self): expr = self.expr.filter(self.expr.fid < 0)[:4]['name', lambda x: x.id + 1] new_expr = ColumnPruning(expr.to_dag()).prune() self.assertIsNotNone(new_expr.input.input.input._source_data) expected = "SELECT t1.`name`, t1.`id` + 1 AS `id` \n" \ "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ "WHERE t1.`fid` < 0 \n" \ "LIMIT 4" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testMutatePrune(self): expr = self.expr[self.expr.exclude('birth'), self.expr.fid.astype('int').rename('new_id')] expr = expr[expr, expr.groupby('name').mutate(lambda x: x.new_id.cumsum().rename('new_id_sum'))] expr = expr[expr.new_id, expr.new_id_sum] expected = "SELECT t2.`new_id`, t2.`new_id_sum` \n" \ "FROM (\n" \ " SELECT CAST(t1.`fid` AS BIGINT) AS `new_id`, " \ "SUM(CAST(t1.`fid` AS BIGINT)) OVER (PARTITION BY t1.`name`) AS `new_id_sum` \n" \ " FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ ") t2" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testLargeColumnsFormatter(self): names = list(itertools.chain(*[[name + str(i) for name in self.schema.names] for i in range(10)])) types = self.schema.types * 10 schema = Schema.from_lists(names, types) gen_row = lambda: list(itertools.chain(*(self._random_values().values for _ in range(10)))) data = [Record(schema=df_schema_to_odps_schema(schema), values=gen_row()) for _ in range(10)] pd = ResultFrame(data=data, schema=schema, pandas=True) result = ResultFrame(data=data, schema=schema, pandas=False) self.assertEqual(to_str(repr(pd)), to_str(repr(result))) self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_()))
def testUnion(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] schema2 = Schema.from_lists(['name', 'id2', 'id3'], [types.string, types.bigint, types.bigint]) table_name = 'pyodps_test_engine_table2' self.odps.delete_table(table_name, if_exists=True) table2 = self.odps.create_table(name=table_name, schema=schema2) expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2)) self._gen_data(data=data) data2 = [ ['name3', 5, -1], ['name4', 6, -2] ] self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2]) try: expr = self.expr['name', 'id'].distinct().union(expr2[expr2.id2.rename('id'), 'name']) res = self.engine.execute(expr) result = self._get_result(res) expected = [ ['name1', 4], ['name1', 3], ['name2', 2], ['name3', 5], ['name4', 6] ] result = sorted(result) expected = sorted(expected) self.assertEqual(len(result), len(expected)) for e, r in zip(result, expected): self.assertEqual([to_str(t) for t in e], [to_str(t) for t in r]) finally: table2.drop()
def testFilterPartitionPrune(self): expr = self.expr.filter_partition('ds=today')[lambda x: x.fid < 0][ 'name', lambda x: x.id + 1] new_expr = ColumnPruning(expr.to_dag()).prune() self.assertEqual(set(new_expr.input.input.schema.names), set(['name', 'id', 'fid'])) expected = "SELECT t2.`name`, t2.`id` + 1 AS `id` \n" \ "FROM (\n" \ " SELECT t1.`name`, t1.`id`, t1.`fid` \n" \ " FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ " WHERE t1.`ds` == 'today' \n" \ ") t2 \n" \ "WHERE t2.`fid` < 0" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testReadWriteTable(self): test_table_name = tn('pyodps_t_tmp_read_write_table') schema = Schema.from_lists(['id', 'name', 'right'], ['bigint', 'string', 'boolean']) self.odps.delete_table(test_table_name, if_exists=True) self.assertFalse(self.odps.exist_table(test_table_name)) table = self.odps.create_table(test_table_name, schema) data = [[111, 'aaa', True], [222, 'bbb', False], [333, 'ccc', True], [444, '中文', False]] length = len(data) records = [Record(schema=schema, values=values) for values in data] texted_data = [[it[0], to_str(it[1]), it[2]] for it in data] self.odps.write_table(table, 0, records) self.assertSequenceEqual(texted_data, [record.values for record in self.odps.read_table(table, length)]) self.assertSequenceEqual(texted_data[::2], [record.values for record in self.odps.read_table(table, length, step=2)]) self.assertSequenceEqual(texted_data, [record.values for record in table.head(length)]) self.odps.delete_table(test_table_name) self.assertFalse(self.odps.exist_table(test_table_name))
def testJoin(self): data = [ ['name1', 4, 5.3, None, None, None], ['name2', 2, 3.5, None, None, None], ['name1', 4, 4.2, None, None, None], ['name1', 3, 2.2, None, None, None], ['name1', 3, 4.1, None, None, None], ] schema2 = Schema.from_lists(['name', 'id2', 'id3'], [types.string, types.bigint, types.bigint]) table_name = 'pyodps_test_engine_table2' self.odps.delete_table(table_name, if_exists=True) table2 = self.odps.create_table(name=table_name, schema=schema2) expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2)) self._gen_data(data=data) data2 = [ ['name1', 4, -1], ['name2', 1, -2] ] self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2]) try: expr = self.expr.join(expr2)['name', 'id2'] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 5) expected = [ [to_str('name1'), 4], [to_str('name2'), 1] ] self.assertTrue(all(it in expected for it in result)) expr = self.expr.join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(result), 2) expected = [to_str('name1'), 4] self.assertTrue(all(it == expected for it in result)) finally: table2.drop()
def testCreateInstanceXML(self): instances = self.odps._project.instances uuid = '359696d4-ac73-4e6c-86d1-6649b01f1a22' query = 'select * from dual;' priority = 5 task = SQLTask(query=query) job = instances._create_job( task=task, priority=priority, uuid_=uuid) xml = instances._get_submit_instance_content(job) expected_xml = expected_xml_template % { 'query': query, 'uuid': uuid, 'priority': priority } self.assertEqual(to_str(xml), to_str(expected_xml))
def testString(self): data = self._gen_data(5) methods_to_fields = [ (lambda s: s.capitalize(), self.expr.name.capitalize()), (lambda s: data[0][0] in s, self.expr.name.contains(data[0][0], regex=False)), (lambda s: s.count(data[0][0]), self.expr.name.count(data[0][0])), (lambda s: s.endswith(data[0][0]), self.expr.name.endswith(data[0][0])), (lambda s: s.startswith(data[0][0]), self.expr.name.startswith(data[0][0])), (lambda s: s.find(data[0][0]), self.expr.name.find(data[0][0])), (lambda s: s.rfind(data[0][0]), self.expr.name.rfind(data[0][0])), (lambda s: s.replace(data[0][0], 'test'), self.expr.name.replace(data[0][0], 'test')), (lambda s: s[0], self.expr.name.get(0)), (lambda s: len(s), self.expr.name.len()), (lambda s: s.ljust(10), self.expr.name.ljust(10)), (lambda s: s.ljust(20, '*'), self.expr.name.ljust(20, fillchar='*')), (lambda s: s.rjust(10), self.expr.name.rjust(10)), (lambda s: s.rjust(20, '*'), self.expr.name.rjust(20, fillchar='*')), (lambda s: s * 4, self.expr.name.repeat(4)), (lambda s: s[2: 10: 2], self.expr.name.slice(2, 10, 2)), (lambda s: s[-5: -1], self.expr.name.slice(-5, -1)), (lambda s: s.title(), self.expr.name.title()), (lambda s: s.rjust(20, '0'), self.expr.name.zfill(20)), (lambda s: s.isalnum(), self.expr.name.isalnum()), (lambda s: s.isalpha(), self.expr.name.isalpha()), (lambda s: s.isdigit(), self.expr.name.isdigit()), (lambda s: s.isspace(), self.expr.name.isspace()), (lambda s: s.isupper(), self.expr.name.isupper()), (lambda s: s.istitle(), self.expr.name.istitle()), (lambda s: to_str(s).isnumeric(), self.expr.name.isnumeric()), (lambda s: to_str(s).isdecimal(), self.expr.name.isdecimal()), ] fields = [it[1].rename('id'+str(i)) for i, it in enumerate(methods_to_fields)] expr = self.expr[fields] res = self.engine.execute(expr) result = self._get_result(res) for i, it in enumerate(methods_to_fields): method = it[0] first = [method(it[0]) for it in data] second = [it[i] for it in result] self.assertEqual(first, second)
def testArithmeticFormatter(self): expr = self.expr d = -(expr["id"]) + 20.34 - expr["id"] + float(20) * expr["id"] - expr["id"] / 4.9 + 40 // 2 + expr["id"] // 1.2 try: self._lines_eq(EXPECTED_ARITHMETIC_FORMAT, repr(d)) except AssertionError as e: left = [to_str(line.rstrip()) for line in EXPECTED_ARITHMETIC_FORMAT.split("\n")] right = [to_str(line.rstrip()) for line in repr(d).split("\n")] self.assertEqual(len(left), len(right)) for l, r in zip(left, right): try: self.assertEqual(l, r) except AssertionError: try: self.assertAlmostEqual(float(l), float(r)) except: raise e
def testGroupbyPrune(self): expr = self.expr.groupby('name').agg(id=self.expr.id.max()) expr = expr[expr.id < 0]['name', ] expected = "SELECT t1.`name` \n" \ "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ "GROUP BY t1.`name` \n" \ "HAVING MAX(t1.`id`) < 0" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) expr = self.expr.groupby('name').agg(id=self.expr.id.max()) expr = expr[expr.id < 0]['id',] expected = "SELECT MAX(t1.`id`) AS `id` \n" \ "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ "GROUP BY t1.`name` \n" \ "HAVING MAX(t1.`id`) < 0" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testReadNonAsciiSQLInstance(self): test_table = tn('pyodps_t_tmp_read_non_ascii_sql_instance') self.odps.delete_table(test_table, if_exists=True) table = self.odps.create_table( test_table, schema=Schema.from_lists(['size', 'name'], ['bigint', 'string']), if_not_exists=True) data = [[1, '中\\\\n\\\n文 ,\r '], [2, '测试\x00\x01\x02数据']] self.odps.write_table( table, 0, [table.new_record(it) for it in data]) with self.odps.execute_sql('select name from %s' % test_table).open_reader() as reader: read_data = sorted([to_str(r[0]) for r in reader]) expected_data = sorted([to_str(r[1]) for r in data]) self.assertSequenceEqual(read_data, expected_data) table.drop()
def testCEncodeAndDecode(self): try: from odps.tunnel.pb.encoder_c import Encoder from odps.tunnel.pb.decoder_c import Decoder encoder = Encoder() encoder.append_tag(0, WIRETYPE_VARINT) encoder.append_tag(1, WIRETYPE_VARINT) encoder.append_sint64(-2 ** 40) encoder.append_tag(2, WIRETYPE_LENGTH_DELIMITED) encoder.append_string(to_binary("hello")) encoder.append_tag(3, WIRETYPE_VARINT) encoder.append_bool(True) encoder.append_tag(4, WIRETYPE_FIXED64) encoder.append_float(3.14) encoder.append_double(0.31415926) encoder.append_tag(5, WIRETYPE_VARINT) encoder.append_uint32(2 ** 30) encoder.append_tag(6, WIRETYPE_VARINT) encoder.append_uint64(2 ** 40) buffer_size = len(encoder) tube = io.BytesIO(encoder.tostring()) decoder = Decoder(tube) self.assertEquals((0, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type()) self.assertEquals((1, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type()) self.assertEquals(-2 ** 40, decoder.read_sint64()) self.assertEquals((2, WIRETYPE_LENGTH_DELIMITED), decoder.read_field_number_and_wire_type()) self.assertEquals(to_str("hello"), to_str(decoder.read_string())) self.assertEquals((3, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type()) self.assertEquals(True, decoder.read_bool()) self.assertEquals((4, WIRETYPE_FIXED64), decoder.read_field_number_and_wire_type()) self.assertAlmostEqual(3.14, decoder.read_float(), delta=0.001) self.assertEquals(0.31415926, decoder.read_double()) self.assertEquals((5, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type()) self.assertEquals(2 ** 30, decoder.read_uint32()) self.assertEquals((6, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type()) self.assertEquals(2 ** 40, decoder.read_uint64()) self.assertEquals(buffer_size, decoder.position()) except ImportError: warnings.warn("No Encoder or Decoder built by cython found")
def testFilterPushdownThroughProjection(self): expr = self.expr[self.expr.id + 1, 'name'][lambda x: x.id < 10] expected = 'SELECT t1.`id` + 1 AS `id`, t1.`name` \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \ 'WHERE (t1.`id` + 1) < 10' self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) expr = self.expr['name', self.expr.id ** 2]\ .filter(lambda x: x.name == 'name1').filter(lambda x: x.id < 3) expected = "SELECT t1.`name`, CAST(POW(t1.`id`, 2) AS BIGINT) AS `id` \n" \ "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ "WHERE (t1.`name` == 'name1') AND ((CAST(POW(t1.`id`, 2) AS BIGINT)) < 3)" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) expr = self.expr['name', self.expr.id + 1].filter(lambda x: x.name == 'name1')[ lambda x: 'tt' + x.name, 'id' ].filter(lambda x: x.id < 3) expected = "SELECT CONCAT('tt', t1.`name`) AS `name`, t1.`id` + 1 AS `id` \n" \ "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ "WHERE (t1.`name` == 'name1') AND ((t1.`id` + 1) < 3)" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) expr = self.expr.filter(self.expr.name == 'name1').select('name', lambda x: (x.id + 1) * 2)[ lambda x: 'tt' + x.name, 'id' ].filter(lambda x: x.id < 3) expected = "SELECT CONCAT('tt', t1.`name`) AS `name`, (t1.`id` + 1) * 2 AS `id` \n" \ "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ "WHERE (((t1.`id` + 1) * 2) < 3) AND (t1.`name` == 'name1')" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) expr = self.expr.filter(self.expr.id.between(2, 6), self.expr.name.lower().contains('pyodps', regex=False)).name.nunique() expected = "SELECT COUNT(DISTINCT t2.`name`) AS `name_nunique` \n" \ "FROM (\n" \ " SELECT t1.`id`, t1.`name` \n" \ " FROM mocked_project.`pyodps_test_expr_table` t1 \n" \ " WHERE ((t1.`id` >= 2) AND (t1.`id` <= 6)) AND INSTR(TOLOWER(t1.`name`), 'pyodps') > 0 \n" \ ") t2" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testProjectPrune(self): expr = self.expr.select('name', 'id') new_expr = ColumnPruning(expr.to_dag()).prune() self.assertIsInstance(new_expr, ProjectCollectionExpr) self.assertIsNotNone(new_expr.input._source_data) expected = 'SELECT t1.`name`, t1.`id` \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1' self.assertEqual(expected, ODPSEngine(self.odps).compile(expr, prettify=False)) expr = self.expr[Scalar(3).rename('const'), NullScalar('string').rename('string_const'), self.expr.id] expected = 'SELECT 3 AS `const`, CAST(NULL AS STRING) AS `string_const`, t1.`id` \n' \ 'FROM mocked_project.`pyodps_test_expr_table` t1' self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False))) expr = self.expr.select(pt=BuiltinFunction('max_pt', args=(self.expr._source_data.name,))) expected = "SELECT max_pt('pyodps_test_expr_table') AS `pt` \n" \ "FROM mocked_project.`pyodps_test_expr_table` t1" self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
def testArrayReadWriteTable(self): test_table_name = tn("pyodps_t_tmp_read_write_table") schema = Schema.from_lists(["id", "name", "right"], ["bigint", "string", "boolean"]) self.odps.delete_table(test_table_name, if_exists=True) self.assertFalse(self.odps.exist_table(test_table_name)) table = self.odps.create_table(test_table_name, schema) data = [[111, "aaa", True], [222, "bbb", False], [333, "ccc", True], [444, "中文", False]] length = len(data) texted_data = [[it[0], to_str(it[1]), it[2]] for it in data] self.odps.write_table(table, 0, data) self.assertSequenceEqual(texted_data, [record.values for record in self.odps.read_table(table, length)]) self.assertSequenceEqual( texted_data[::2], [record.values for record in self.odps.read_table(table, length, step=2)] ) self.assertSequenceEqual(texted_data, [record.values for record in table.head(length)]) self.odps.delete_table(test_table_name) self.assertFalse(self.odps.exist_table(test_table_name))
def testFileResource(self): resource_name = tn('pyodps_t_tmp_file_resource') try: self.odps.delete_resource(resource_name) except errors.ODPSError: pass resource = self.odps.create_resource(resource_name, 'file', file_obj=FILE_CONTENT) self.assertIsInstance(resource, FileResource) with resource.open(mode='r') as fp: self.assertRaises(IOError, lambda: fp.write('sss')) self.assertRaises(IOError, lambda: fp.writelines(['sss\n'])) self.assertIsInstance(fp.read(), six.text_type) fp.seek(0, compat.SEEK_END) size = fp.tell() fp.seek(0) self.assertEqual(fp._size, size) self.assertEqual(to_str(fp.read()), to_str(FILE_CONTENT)) fp.seek(1) self.assertEqual(to_str(fp.read()), to_str(FILE_CONTENT[1:])) fp.seek(0) self.assertEqual(to_str(fp.readline()), to_str(FILE_CONTENT.split('\n', 1)[0] + '\n')) fp.seek(0) add_newline = lambda s: s if s.endswith('\n') else s + '\n' self.assertEqual( [to_str(add_newline(l)) for l in fp], [to_str(add_newline(l)) for l in FILE_CONTENT.splitlines()]) self.assertFalse(fp._need_commit) self.assertTrue(fp._opened) self.assertFalse(fp._opened) self.assertIsNone(fp._fp) with resource.open(mode='w') as fp: self.assertRaises(IOError, fp.read) self.assertRaises(IOError, fp.readline) self.assertRaises(IOError, fp.readlines) fp.writelines([OVERWRITE_FILE_CONTENT] * 2) self.assertTrue(fp._need_commit) size = fp._size with resource.open(mode='r+') as fp: self.assertEqual(to_str(fp.read()), to_str(OVERWRITE_FILE_CONTENT * 2)) self.assertEqual(size, fp._size) fp.seek(0) fp.write(FILE_CONTENT) fp.truncate() self.assertTrue(fp._need_commit) with resource.open(mode='a') as fp: self.assertRaises(IOError, fp.read) self.assertRaises(IOError, fp.readline) self.assertRaises(IOError, fp.readlines) fp.write(OVERWRITE_FILE_CONTENT) self.assertTrue(fp._need_commit) with resource.open(mode='a+') as fp: self.assertEqual(to_str(fp.read()), to_str(FILE_CONTENT + OVERWRITE_FILE_CONTENT)) fp.seek(1) fp.truncate() self.assertTrue(fp._need_commit) fp = resource.open(mode='r') self.assertEqual(to_str(fp.read()), FILE_CONTENT[0]) fp.close() with resource.open(mode='w+') as fp: self.assertEqual(len(fp.read()), 0) fp.write(FILE_CONTENT) with resource.open(mode='r+') as fp: self.assertEqual(to_str(fp.read()), FILE_CONTENT) resource.update(file_obj='update') with resource.open(mode='rb') as fp: self.assertIsInstance(fp.read(), six.binary_type) fp.seek(0) self.assertEqual(to_str(fp.read()), to_str('update')) self.odps.delete_resource(resource_name)
def _gen_random_string(self, max_length=15): gen_letter = lambda: letters[random.randint(0, 51)] return to_str(''.join([gen_letter() for _ in range(random.randint(1, 15))]))
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import zipfile from odps.tests.core import TestBase, to_str, tn from odps.compat import unittest, six from odps import compat from odps.models import Resource, FileResource, TableResource, VolumeArchiveResource, \ VolumeFileResource, Schema from odps import errors, types FILE_CONTENT = to_str(""" Proudly swept the rain by the cliffs As it glided through the trees Still following ever the bud The ahihi lehua of the vale """) OVERWRITE_FILE_CONTENT = to_str(""" Farewell to thee, farewell to thee The charming one who dwells in the shaded bowers One fond embrace, 'Ere I depart Until we meet again Sweet memories come back to me Bringing fresh remembrances Of the past Dearest one, yes, you are mine own From you, true love shall never depart """)
def testElement(self): data = self._gen_data(5, nullable_field='name') fields = [ self.expr.name.isnull().rename('name1'), self.expr.name.notnull().rename('name2'), self.expr.name.fillna('test').rename('name3'), self.expr.id.isin([1, 2, 3]).rename('id1'), self.expr.id.isin(self.expr.fid.astype('int')).rename('id2'), self.expr.id.notin([1, 2, 3]).rename('id3'), self.expr.id.notin(self.expr.fid.astype('int')).rename('id4'), self.expr.id.between(self.expr.fid, 3).rename('id5'), self.expr.name.fillna('test').switch('test', 'test' + self.expr.name.fillna('test'), 'test2', 'test2' + self.expr.name.fillna('test'), default=self.expr.name).rename('name4'), self.expr.id.cut([100, 200, 300], labels=['xsmall', 'small', 'large', 'xlarge'], include_under=True, include_over=True).rename('id6') ] expr = self.expr[fields] res = self.engine.execute(expr) result = self._get_result(res) self.assertEqual(len(data), len(result)) self.assertEqual(len([it for it in data if it[0] is None]), len([it[0] for it in result if it[0]])) self.assertEqual(len([it[0] for it in data if it[0] is not None]), len([it[1] for it in result if it[1]])) self.assertEqual([(it[0] if it[0] is not None else 'test') for it in data], [it[2] for it in result]) self.assertEqual([(it[1] in (1, 2, 3)) for it in data], [it[3] for it in result]) fids = [int(it[2]) for it in data] self.assertEqual([(it[1] in fids) for it in data], [it[4] for it in result]) self.assertEqual([(it[1] not in (1, 2, 3)) for it in data], [it[5] for it in result]) self.assertEqual([(it[1] not in fids) for it in data], [it[6] for it in result]) self.assertEqual([(it[2] <= it[1] <= 3) for it in data], [it[7] for it in result]) self.assertEqual([to_str('testtest' if it[0] is None else it[0]) for it in data], [to_str(it[8]) for it in result]) def get_val(val): if val <= 100: return 'xsmall' elif 100 < val <= 200: return 'small' elif 200 < val <= 300: return 'large' else: return 'xlarge' self.assertEqual([to_str(get_val(it[1])) for it in data], [to_str(it[9]) for it in result])
def _gen_random_string(self, max_length=15): gen_letter = lambda: letters[random.randint(0, 51)] return to_str(''.join( [gen_letter() for _ in range(random.randint(1, max_length))]))