コード例 #1
0
    def testFilterPushdownThroughUnion(self):
        expr = self.expr['name', 'id'].union(self.expr2['id', 'name'])
        expr = expr.filter(expr.id + 1 < 3)

        expected = 'SELECT * \n' \
                   'FROM (\n' \
                   '  SELECT t1.`name`, t1.`id` \n' \
                   '  FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   '  WHERE (t1.`id` + 1) < 3 \n' \
                   '  UNION ALL\n' \
                   '    SELECT t2.`name`, t2.`id` \n' \
                   '    FROM mocked_project.`pyodps_test_expr_table2` t2 \n' \
                   '    WHERE (t2.`id` + 1) < 3\n' \
                   ') t3'
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr1 = self.expr.filter(self.expr.id == 1)['name', 'id']
        expr2 = self.expr.filter(self.expr.id == 0)['id', 'name']
        expr = expr1.union(expr2)

        expected = 'SELECT * \n' \
                   'FROM (\n' \
                   '  SELECT t1.`name`, t1.`id` \n' \
                   '  FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   '  WHERE t1.`id` == 1 \n' \
                   '  UNION ALL\n' \
                   '    SELECT t2.`name`, t2.`id` \n' \
                   '    FROM mocked_project.`pyodps_test_expr_table` t2 \n' \
                   '    WHERE t2.`id` == 0\n' \
                   ') t3'
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #2
0
    def testUnionPrune(self):
        left = self.expr.select('name', 'id')
        right = self.expr3.select(self.expr3.fid.astype('int').rename('id'), self.expr3.name)
        expr = left.union(right)['id']

        expected = "SELECT t3.`id` \n" \
                   "FROM (\n" \
                   "  SELECT t1.`id` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "  UNION ALL\n" \
                   "    SELECT CAST(t2.`fid` AS BIGINT) AS `id` \n" \
                   "    FROM mocked_project.`pyodps_test_expr_table2` t2\n" \
                   ") t3"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.union(self.expr2)

        expected = 'SELECT * \n' \
                   'FROM (\n' \
                   '  SELECT * \n' \
                   '  FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   '  UNION ALL\n' \
                   '    SELECT * \n' \
                   '    FROM mocked_project.`pyodps_test_expr_table2` t2\n' \
                   ') t3'

        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #3
0
    def testFilterPrune(self):
        expr = self.expr.filter(self.expr.name == 'name1')
        expr = expr['name', 'id']

        new_expr = ColumnPruning(expr.to_dag()).prune()

        self.assertIsInstance(new_expr.input, FilterCollectionExpr)
        self.assertNotIsInstance(new_expr.input.input, ProjectCollectionExpr)
        self.assertIsNotNone(new_expr.input.input._source_data)

        expected = 'SELECT t1.`name`, t1.`id` \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   'WHERE t1.`name` == \'name1\''
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.filter(self.expr.name == 'name1')

        new_expr = ColumnPruning(expr.to_dag()).prune()

        self.assertIsInstance(new_expr, FilterCollectionExpr)
        self.assertIsNotNone(new_expr.input._source_data)

        expr = self.expr.filter(self.expr.id.isin(self.expr3.id))

        expected = 'SELECT * \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   'WHERE t1.`id` IN (SELECT t3.`id` FROM (  ' \
                   'SELECT t2.`id`   FROM mocked_project.`pyodps_test_expr_table2` t2 ) t3)'
        self.assertTrue(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #4
0
    def testSerializers(self):
        teacher = Example.Teacher(name='t1')
        professors = [Example.Teacher(name='p1'), Example.Teacher(name='p2')]
        jsn = Example.Json(label='json', tags=['t1', 't2'],
                           nest=Example.Json.Nest(name='n'),
                           nests=[Example.Json.Nest(name='n1'), Example.Json.Nest(name='n2')])

        dt = datetime.fromtimestamp(time.mktime(datetime.now().timetuple()))
        example = Example(name='example 1', type='ex', date=dt,
                          lessons=['less1', 'less2'], teacher=teacher, professors=professors,
                          properties={'test': 'true'}, jsn=jsn)
        sel = example.serialize()

        self.assertEqual(
            to_str(expected_xml_template % utils.gen_rfc822(dt, localtime=True)), to_str(sel))

        parsed_example = Example.parse(sel)

        self.assertEqual(example.name, parsed_example.name)
        self.assertEqual(example.type, parsed_example.type)
        self.assertEqual(example.date, parsed_example.date)
        self.assertSequenceEqual(example.lessons, parsed_example.lessons)
        self.assertEqual(example.teacher, parsed_example.teacher)
        self.assertSequenceEqual(example.professors, parsed_example.professors)
        self.assertTrue(len(example.properties) == len(parsed_example.properties) and
                        any(example.properties[it] == parsed_example.properties[it])
                        for it in example.properties)
        self.assertEqual(example.jsn.label, parsed_example.jsn.label)
        self.assertEqual(example.jsn.tags, parsed_example.jsn.tags)
        self.assertEqual(example.jsn.nest, parsed_example.jsn.nest)
        self.assertSequenceEqual(example.jsn.nests, parsed_example.jsn.nests)
コード例 #5
0
    def testCreateDeleteFunction(self):
        test_resource_name = 'pyodps_t_tmp_test_function_resource.py'
        test_function_name = 'pyodps_t_tmp_test_function'

        try:
            self.odps.delete_resource(test_resource_name)
        except errors.NoSuchObject:
            pass
        try:
            self.odps.delete_function(test_function_name)
        except errors.NoSuchObject:
            pass

        test_resource = self.odps.create_resource(
            test_resource_name, 'py', file_obj=FILE_CONTENT)

        test_function = self.odps.create_function(
            test_function_name,
            class_type=test_resource_name.split('.', 1)[0]+'.MyPlus',
            resources=[test_resource,])

        self.assertIsNotNone(test_function.name)
        self.assertIsNotNone(test_function.owner)
        self.assertIsNotNone(test_function.creation_time)
        self.assertIsNotNone(test_function.class_type)
        self.assertEqual(len(test_function.resources), 1)

        with self.odps.open_resource(name=test_resource_name, mode='r') as fp:
            self.assertEqual(to_str(fp.read()), to_str(FILE_CONTENT))

        test_resource.drop()
        test_function.drop()
コード例 #6
0
    def testSamplePrune(self):
        expr = self.expr['name', 'id'].sample(parts=5)['id', ]

        expected = "SELECT t1.`id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "WHERE SAMPLE(5, 1)"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #7
0
    def testJoinPrune(self):
        left = self.expr.select(self.expr, type='normal')
        right = self.expr3[:4]
        joined = left.left_join(right, on='id')
        expr = joined.id_x.rename('id')

        expected = "SELECT t2.`id` \n" \
                   "FROM (\n" \
                   "  SELECT t1.`id` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1\n" \
                   ") t2 \n" \
                   "LEFT OUTER JOIN \n" \
                   "  (\n" \
                   "    SELECT t3.`id` \n" \
                   "    FROM mocked_project.`pyodps_test_expr_table2` t3 \n" \
                   "    LIMIT 4\n" \
                   "  ) t4\n" \
                   "ON t2.`id` == t4.`id`"

        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        joined = self.expr.join(self.expr2, 'name')

        expected = 'SELECT t1.`name`, t1.`id` AS `id_x`, t1.`fid` AS `fid_x`, ' \
                   't1.`isMale` AS `isMale_x`, t1.`scale` AS `scale_x`, ' \
                   't1.`birth` AS `birth_x`, t1.`ds` AS `ds_x`, t2.`id` AS `id_y`, ' \
                   't2.`fid` AS `fid_y`, t2.`isMale` AS `isMale_y`, t2.`scale` AS `scale_y`, ' \
                   't2.`birth` AS `birth_y`, t2.`ds` AS `ds_y` \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   'INNER JOIN \n' \
                   '  mocked_project.`pyodps_test_expr_table2` t2\n' \
                   'ON t1.`name` == t2.`name`'
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(joined, prettify=False)))
コード例 #8
0
    def testXFlowInstanceToXML(self):
        xflow_name = 'pyodps_t_tmp_xflow_algo_name'
        project = 'algo_project'
        parameters = {'key': 'value'}

        got_xml = self.odps.get_project(project).xflows._gen_xlow_instance_xml(
            xflow_name=xflow_name, xflow_project=project, parameters=parameters)
        self.assertEqual(to_str(got_xml), to_str(EXPECTED_XFLOW_INSTANCE_XML))
コード例 #9
0
    def testSmallRowsFormatter(self):
        data = [self._random_values() for _ in range(10)]
        pd = ResultFrame(data=data, schema=self.schema, pandas=True)
        result = ResultFrame(data=data, schema=self.schema, pandas=False)
        self.assertEqual(to_str(repr(pd)), to_str(repr(result)))
        self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_()))

        self.assertEqual(result._values, [r for r in result])
コード例 #10
0
    def testDistinctPrune(self):
        expr = self.expr.distinct(self.expr.id + 1, self.expr.name)['name', ]

        expected = "SELECT t2.`name` \n" \
                   "FROM (\n" \
                   "  SELECT DISTINCT t1.`id` + 1 AS `id`, t1.`name` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   ") t2"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #11
0
    def testSQLTaskToXML(self):
        query = 'select * from dual'

        task = SQLTask(query=query)
        to_xml = task.serialize()
        right_xml = template % {'sql': query}

        self.assertEqual(to_str(to_xml), to_str(right_xml))

        task = Task.parse(None, to_xml)
        self.assertIsInstance(task, SQLTask)
コード例 #12
0
    def testSortPrune(self):
        expr = self.expr[self.expr.exclude('name'), self.expr.name.rename('name2')].sort('name2')['id', 'fid']

        expected = "SELECT t2.`id`, t2.`fid` \n" \
                   "FROM (\n" \
                   "  SELECT t1.`id`, t1.`fid`, t1.`name` AS `name2` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "  ORDER BY name2 \n" \
                   "  LIMIT 10000\n" \
                   ") t2"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #13
0
    def testSlicePrune(self):
        expr = self.expr.filter(self.expr.fid < 0)[:4]['name', lambda x: x.id + 1]

        new_expr = ColumnPruning(expr.to_dag()).prune()
        self.assertIsNotNone(new_expr.input.input.input._source_data)

        expected = "SELECT t1.`name`, t1.`id` + 1 AS `id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "WHERE t1.`fid` < 0 \n" \
                   "LIMIT 4"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #14
0
    def testMutatePrune(self):
        expr = self.expr[self.expr.exclude('birth'), self.expr.fid.astype('int').rename('new_id')]
        expr = expr[expr, expr.groupby('name').mutate(lambda x: x.new_id.cumsum().rename('new_id_sum'))]
        expr = expr[expr.new_id, expr.new_id_sum]

        expected = "SELECT t2.`new_id`, t2.`new_id_sum` \n" \
                   "FROM (\n" \
                   "  SELECT CAST(t1.`fid` AS BIGINT) AS `new_id`, " \
                   "SUM(CAST(t1.`fid` AS BIGINT)) OVER (PARTITION BY t1.`name`) AS `new_id_sum` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   ") t2"

        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #15
0
    def testLargeColumnsFormatter(self):
        names = list(itertools.chain(*[[name + str(i) for name in self.schema.names] for i in range(10)]))
        types = self.schema.types * 10

        schema = Schema.from_lists(names, types)
        gen_row = lambda: list(itertools.chain(*(self._random_values().values for _ in range(10))))
        data = [Record(schema=df_schema_to_odps_schema(schema), values=gen_row()) for _ in range(10)]

        pd = ResultFrame(data=data, schema=schema, pandas=True)
        result = ResultFrame(data=data, schema=schema, pandas=False)

        self.assertEqual(to_str(repr(pd)), to_str(repr(result)))
        self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_()))
コード例 #16
0
    def testUnion(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.bigint, types.bigint])
        table_name = 'pyodps_test_engine_table2'
        self.odps.delete_table(table_name, if_exists=True)
        table2 = self.odps.create_table(name=table_name, schema=schema2)
        expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2))

        self._gen_data(data=data)

        data2 = [
            ['name3', 5, -1],
            ['name4', 6, -2]
        ]

        self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2])

        try:
            expr = self.expr['name', 'id'].distinct().union(expr2[expr2.id2.rename('id'), 'name'])

            res = self.engine.execute(expr)
            result = self._get_result(res)

            expected = [
                ['name1', 4],
                ['name1', 3],
                ['name2', 2],
                ['name3', 5],
                ['name4', 6]
            ]

            result = sorted(result)
            expected = sorted(expected)

            self.assertEqual(len(result), len(expected))
            for e, r in zip(result, expected):
                self.assertEqual([to_str(t) for t in e],
                                 [to_str(t) for t in r])

        finally:
            table2.drop()
コード例 #17
0
    def testFilterPartitionPrune(self):
        expr = self.expr.filter_partition('ds=today')[lambda x: x.fid < 0][
            'name', lambda x: x.id + 1]

        new_expr = ColumnPruning(expr.to_dag()).prune()
        self.assertEqual(set(new_expr.input.input.schema.names), set(['name', 'id', 'fid']))

        expected = "SELECT t2.`name`, t2.`id` + 1 AS `id` \n" \
                   "FROM (\n" \
                   "  SELECT t1.`name`, t1.`id`, t1.`fid` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "  WHERE t1.`ds` == 'today' \n" \
                   ") t2 \n" \
                   "WHERE t2.`fid` < 0"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #18
0
    def testReadWriteTable(self):
        test_table_name = tn('pyodps_t_tmp_read_write_table')
        schema = Schema.from_lists(['id', 'name', 'right'], ['bigint', 'string', 'boolean'])

        self.odps.delete_table(test_table_name, if_exists=True)
        self.assertFalse(self.odps.exist_table(test_table_name))

        table = self.odps.create_table(test_table_name, schema)
        data = [[111, 'aaa', True],
                [222, 'bbb', False],
                [333, 'ccc', True],
                [444, '中文', False]]
        length = len(data)
        records = [Record(schema=schema, values=values) for values in data]

        texted_data = [[it[0], to_str(it[1]), it[2]] for it in data]

        self.odps.write_table(table, 0, records)
        self.assertSequenceEqual(texted_data, [record.values for record in self.odps.read_table(table, length)])
        self.assertSequenceEqual(texted_data[::2],
                                 [record.values for record in self.odps.read_table(table, length, step=2)])

        self.assertSequenceEqual(texted_data, [record.values for record in table.head(length)])

        self.odps.delete_table(test_table_name)
        self.assertFalse(self.odps.exist_table(test_table_name))
コード例 #19
0
    def testJoin(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.bigint, types.bigint])
        table_name = 'pyodps_test_engine_table2'
        self.odps.delete_table(table_name, if_exists=True)
        table2 = self.odps.create_table(name=table_name, schema=schema2)
        expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2))

        self._gen_data(data=data)

        data2 = [
            ['name1', 4, -1],
            ['name2', 1, -2]
        ]

        self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2])

        try:
            expr = self.expr.join(expr2)['name', 'id2']

            res = self.engine.execute(expr)
            result = self._get_result(res)

            self.assertEqual(len(result), 5)
            expected = [
                [to_str('name1'), 4],
                [to_str('name2'), 1]
            ]
            self.assertTrue(all(it in expected for it in result))

            expr = self.expr.join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            self.assertEqual(len(result), 2)
            expected = [to_str('name1'), 4]
            self.assertTrue(all(it == expected for it in result))

        finally:
            table2.drop()
コード例 #20
0
    def testCreateInstanceXML(self):
        instances = self.odps._project.instances

        uuid = '359696d4-ac73-4e6c-86d1-6649b01f1a22'
        query = 'select * from dual;'
        priority = 5

        task = SQLTask(query=query)
        job = instances._create_job(
            task=task, priority=priority, uuid_=uuid)
        xml = instances._get_submit_instance_content(job)
        expected_xml = expected_xml_template % {
            'query': query,
            'uuid': uuid,
            'priority': priority
        }
        self.assertEqual(to_str(xml), to_str(expected_xml))
コード例 #21
0
    def testString(self):
        data = self._gen_data(5)

        methods_to_fields = [
            (lambda s: s.capitalize(), self.expr.name.capitalize()),
            (lambda s: data[0][0] in s, self.expr.name.contains(data[0][0], regex=False)),
            (lambda s: s.count(data[0][0]), self.expr.name.count(data[0][0])),
            (lambda s: s.endswith(data[0][0]), self.expr.name.endswith(data[0][0])),
            (lambda s: s.startswith(data[0][0]), self.expr.name.startswith(data[0][0])),
            (lambda s: s.find(data[0][0]), self.expr.name.find(data[0][0])),
            (lambda s: s.rfind(data[0][0]), self.expr.name.rfind(data[0][0])),
            (lambda s: s.replace(data[0][0], 'test'), self.expr.name.replace(data[0][0], 'test')),
            (lambda s: s[0], self.expr.name.get(0)),
            (lambda s: len(s), self.expr.name.len()),
            (lambda s: s.ljust(10), self.expr.name.ljust(10)),
            (lambda s: s.ljust(20, '*'), self.expr.name.ljust(20, fillchar='*')),
            (lambda s: s.rjust(10), self.expr.name.rjust(10)),
            (lambda s: s.rjust(20, '*'), self.expr.name.rjust(20, fillchar='*')),
            (lambda s: s * 4, self.expr.name.repeat(4)),
            (lambda s: s[2: 10: 2], self.expr.name.slice(2, 10, 2)),
            (lambda s: s[-5: -1], self.expr.name.slice(-5, -1)),
            (lambda s: s.title(), self.expr.name.title()),
            (lambda s: s.rjust(20, '0'), self.expr.name.zfill(20)),
            (lambda s: s.isalnum(), self.expr.name.isalnum()),
            (lambda s: s.isalpha(), self.expr.name.isalpha()),
            (lambda s: s.isdigit(), self.expr.name.isdigit()),
            (lambda s: s.isspace(), self.expr.name.isspace()),
            (lambda s: s.isupper(), self.expr.name.isupper()),
            (lambda s: s.istitle(), self.expr.name.istitle()),
            (lambda s: to_str(s).isnumeric(), self.expr.name.isnumeric()),
            (lambda s: to_str(s).isdecimal(), self.expr.name.isdecimal()),
        ]

        fields = [it[1].rename('id'+str(i)) for i, it in enumerate(methods_to_fields)]

        expr = self.expr[fields]

        res = self.engine.execute(expr)
        result = self._get_result(res)

        for i, it in enumerate(methods_to_fields):
            method = it[0]

            first = [method(it[0]) for it in data]
            second = [it[i] for it in result]
            self.assertEqual(first, second)
コード例 #22
0
    def testArithmeticFormatter(self):
        expr = self.expr
        d = -(expr["id"]) + 20.34 - expr["id"] + float(20) * expr["id"] - expr["id"] / 4.9 + 40 // 2 + expr["id"] // 1.2

        try:
            self._lines_eq(EXPECTED_ARITHMETIC_FORMAT, repr(d))
        except AssertionError as e:
            left = [to_str(line.rstrip()) for line in EXPECTED_ARITHMETIC_FORMAT.split("\n")]
            right = [to_str(line.rstrip()) for line in repr(d).split("\n")]
            self.assertEqual(len(left), len(right))
            for l, r in zip(left, right):
                try:
                    self.assertEqual(l, r)
                except AssertionError:
                    try:
                        self.assertAlmostEqual(float(l), float(r))
                    except:
                        raise e
コード例 #23
0
    def testGroupbyPrune(self):
        expr = self.expr.groupby('name').agg(id=self.expr.id.max())
        expr = expr[expr.id < 0]['name', ]

        expected = "SELECT t1.`name` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "GROUP BY t1.`name` \n" \
                   "HAVING MAX(t1.`id`) < 0"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.groupby('name').agg(id=self.expr.id.max())
        expr = expr[expr.id < 0]['id',]

        expected = "SELECT MAX(t1.`id`) AS `id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "GROUP BY t1.`name` \n" \
                   "HAVING MAX(t1.`id`) < 0"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #24
0
    def testReadNonAsciiSQLInstance(self):
        test_table = tn('pyodps_t_tmp_read_non_ascii_sql_instance')
        self.odps.delete_table(test_table, if_exists=True)
        table = self.odps.create_table(
            test_table,
            schema=Schema.from_lists(['size', 'name'], ['bigint', 'string']), if_not_exists=True)

        data = [[1, '中\\\\n\\\n文 ,\r '], [2, '测试\x00\x01\x02数据']]
        self.odps.write_table(
            table, 0, [table.new_record(it) for it in data])

        with self.odps.execute_sql('select name from %s' % test_table).open_reader() as reader:
            read_data = sorted([to_str(r[0]) for r in reader])
            expected_data = sorted([to_str(r[1]) for r in data])

            self.assertSequenceEqual(read_data, expected_data)

        table.drop()
コード例 #25
0
    def testCEncodeAndDecode(self):
        try:
            from odps.tunnel.pb.encoder_c import Encoder
            from odps.tunnel.pb.decoder_c import Decoder

            encoder = Encoder()
            encoder.append_tag(0, WIRETYPE_VARINT)
            encoder.append_tag(1, WIRETYPE_VARINT)
            encoder.append_sint64(-2 ** 40)
            encoder.append_tag(2, WIRETYPE_LENGTH_DELIMITED)
            encoder.append_string(to_binary("hello"))
            encoder.append_tag(3, WIRETYPE_VARINT)
            encoder.append_bool(True)
            encoder.append_tag(4, WIRETYPE_FIXED64)
            encoder.append_float(3.14)
            encoder.append_double(0.31415926)
            encoder.append_tag(5, WIRETYPE_VARINT)
            encoder.append_uint32(2 ** 30)
            encoder.append_tag(6, WIRETYPE_VARINT)
            encoder.append_uint64(2 ** 40)
            buffer_size = len(encoder)

            tube = io.BytesIO(encoder.tostring())
            decoder = Decoder(tube)
            self.assertEquals((0, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type())
            self.assertEquals((1, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type())
            self.assertEquals(-2 ** 40, decoder.read_sint64())
            self.assertEquals((2, WIRETYPE_LENGTH_DELIMITED), decoder.read_field_number_and_wire_type())
            self.assertEquals(to_str("hello"), to_str(decoder.read_string()))
            self.assertEquals((3, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type())
            self.assertEquals(True, decoder.read_bool())
            self.assertEquals((4, WIRETYPE_FIXED64), decoder.read_field_number_and_wire_type())
            self.assertAlmostEqual(3.14, decoder.read_float(), delta=0.001)
            self.assertEquals(0.31415926, decoder.read_double())
            self.assertEquals((5, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type())
            self.assertEquals(2 ** 30, decoder.read_uint32())
            self.assertEquals((6, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type())
            self.assertEquals(2 ** 40, decoder.read_uint64())
            self.assertEquals(buffer_size, decoder.position())
        except ImportError:
            warnings.warn("No Encoder or Decoder built by cython found")
コード例 #26
0
    def testFilterPushdownThroughProjection(self):
        expr = self.expr[self.expr.id + 1, 'name'][lambda x: x.id < 10]

        expected = 'SELECT t1.`id` + 1 AS `id`, t1.`name` \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   'WHERE (t1.`id` + 1) < 10'
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr['name', self.expr.id ** 2]\
            .filter(lambda x: x.name == 'name1').filter(lambda x: x.id < 3)
        expected = "SELECT t1.`name`, CAST(POW(t1.`id`, 2) AS BIGINT) AS `id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "WHERE (t1.`name` == 'name1') AND ((CAST(POW(t1.`id`, 2) AS BIGINT)) < 3)"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr['name', self.expr.id + 1].filter(lambda x: x.name == 'name1')[
            lambda x: 'tt' + x.name, 'id'
        ].filter(lambda x: x.id < 3)

        expected = "SELECT CONCAT('tt', t1.`name`) AS `name`, t1.`id` + 1 AS `id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "WHERE (t1.`name` == 'name1') AND ((t1.`id` + 1) < 3)"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.filter(self.expr.name == 'name1').select('name', lambda x: (x.id + 1) * 2)[
            lambda x: 'tt' + x.name, 'id'
        ].filter(lambda x: x.id < 3)
        expected = "SELECT CONCAT('tt', t1.`name`) AS `name`, (t1.`id` + 1) * 2 AS `id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "WHERE (((t1.`id` + 1) * 2) < 3) AND (t1.`name` == 'name1')"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.filter(self.expr.id.between(2, 6),
                                self.expr.name.lower().contains('pyodps', regex=False)).name.nunique()
        expected = "SELECT COUNT(DISTINCT t2.`name`) AS `name_nunique` \n" \
                   "FROM (\n" \
                   "  SELECT t1.`id`, t1.`name` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "  WHERE ((t1.`id` >= 2) AND (t1.`id` <= 6)) AND INSTR(TOLOWER(t1.`name`), 'pyodps') > 0 \n" \
                   ") t2"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #27
0
    def testProjectPrune(self):
        expr = self.expr.select('name', 'id')
        new_expr = ColumnPruning(expr.to_dag()).prune()
        self.assertIsInstance(new_expr, ProjectCollectionExpr)
        self.assertIsNotNone(new_expr.input._source_data)

        expected = 'SELECT t1.`name`, t1.`id` \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1'
        self.assertEqual(expected, ODPSEngine(self.odps).compile(expr, prettify=False))

        expr = self.expr[Scalar(3).rename('const'),
                         NullScalar('string').rename('string_const'),
                         self.expr.id]
        expected = 'SELECT 3 AS `const`, CAST(NULL AS STRING) AS `string_const`, t1.`id` \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1'
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.select(pt=BuiltinFunction('max_pt', args=(self.expr._source_data.name,)))
        expected = "SELECT max_pt('pyodps_test_expr_table') AS `pt` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
コード例 #28
0
    def testArrayReadWriteTable(self):
        test_table_name = tn("pyodps_t_tmp_read_write_table")
        schema = Schema.from_lists(["id", "name", "right"], ["bigint", "string", "boolean"])

        self.odps.delete_table(test_table_name, if_exists=True)
        self.assertFalse(self.odps.exist_table(test_table_name))

        table = self.odps.create_table(test_table_name, schema)
        data = [[111, "aaa", True], [222, "bbb", False], [333, "ccc", True], [444, "中文", False]]
        length = len(data)

        texted_data = [[it[0], to_str(it[1]), it[2]] for it in data]

        self.odps.write_table(table, 0, data)
        self.assertSequenceEqual(texted_data, [record.values for record in self.odps.read_table(table, length)])
        self.assertSequenceEqual(
            texted_data[::2], [record.values for record in self.odps.read_table(table, length, step=2)]
        )

        self.assertSequenceEqual(texted_data, [record.values for record in table.head(length)])

        self.odps.delete_table(test_table_name)
        self.assertFalse(self.odps.exist_table(test_table_name))
コード例 #29
0
    def testFileResource(self):
        resource_name = tn('pyodps_t_tmp_file_resource')

        try:
            self.odps.delete_resource(resource_name)
        except errors.ODPSError:
            pass

        resource = self.odps.create_resource(resource_name,
                                             'file',
                                             file_obj=FILE_CONTENT)
        self.assertIsInstance(resource, FileResource)

        with resource.open(mode='r') as fp:
            self.assertRaises(IOError, lambda: fp.write('sss'))
            self.assertRaises(IOError, lambda: fp.writelines(['sss\n']))

            self.assertIsInstance(fp.read(), six.text_type)

            fp.seek(0, compat.SEEK_END)
            size = fp.tell()
            fp.seek(0)
            self.assertEqual(fp._size, size)

            self.assertEqual(to_str(fp.read()), to_str(FILE_CONTENT))
            fp.seek(1)
            self.assertEqual(to_str(fp.read()), to_str(FILE_CONTENT[1:]))

            fp.seek(0)
            self.assertEqual(to_str(fp.readline()),
                             to_str(FILE_CONTENT.split('\n', 1)[0] + '\n'))

            fp.seek(0)
            add_newline = lambda s: s if s.endswith('\n') else s + '\n'
            self.assertEqual(
                [to_str(add_newline(l)) for l in fp],
                [to_str(add_newline(l)) for l in FILE_CONTENT.splitlines()])

            self.assertFalse(fp._need_commit)
            self.assertTrue(fp._opened)

        self.assertFalse(fp._opened)
        self.assertIsNone(fp._fp)

        with resource.open(mode='w') as fp:
            self.assertRaises(IOError, fp.read)
            self.assertRaises(IOError, fp.readline)
            self.assertRaises(IOError, fp.readlines)

            fp.writelines([OVERWRITE_FILE_CONTENT] * 2)

            self.assertTrue(fp._need_commit)

            size = fp._size

        with resource.open(mode='r+') as fp:
            self.assertEqual(to_str(fp.read()),
                             to_str(OVERWRITE_FILE_CONTENT * 2))

            self.assertEqual(size, fp._size)

            fp.seek(0)
            fp.write(FILE_CONTENT)
            fp.truncate()

            self.assertTrue(fp._need_commit)

        with resource.open(mode='a') as fp:
            self.assertRaises(IOError, fp.read)
            self.assertRaises(IOError, fp.readline)
            self.assertRaises(IOError, fp.readlines)

            fp.write(OVERWRITE_FILE_CONTENT)

            self.assertTrue(fp._need_commit)

        with resource.open(mode='a+') as fp:
            self.assertEqual(to_str(fp.read()),
                             to_str(FILE_CONTENT + OVERWRITE_FILE_CONTENT))
            fp.seek(1)
            fp.truncate()
            self.assertTrue(fp._need_commit)

        fp = resource.open(mode='r')
        self.assertEqual(to_str(fp.read()), FILE_CONTENT[0])
        fp.close()

        with resource.open(mode='w+') as fp:
            self.assertEqual(len(fp.read()), 0)
            fp.write(FILE_CONTENT)

        with resource.open(mode='r+') as fp:
            self.assertEqual(to_str(fp.read()), FILE_CONTENT)

        resource.update(file_obj='update')
        with resource.open(mode='rb') as fp:
            self.assertIsInstance(fp.read(), six.binary_type)
            fp.seek(0)
            self.assertEqual(to_str(fp.read()), to_str('update'))

        self.odps.delete_resource(resource_name)
コード例 #30
0
 def _gen_random_string(self, max_length=15):
     gen_letter = lambda: letters[random.randint(0, 51)]
     return to_str(''.join([gen_letter() for _ in range(random.randint(1, 15))]))
コード例 #31
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import zipfile

from odps.tests.core import TestBase, to_str, tn
from odps.compat import unittest, six
from odps import compat
from odps.models import Resource, FileResource, TableResource, VolumeArchiveResource, \
    VolumeFileResource, Schema
from odps import errors, types

FILE_CONTENT = to_str("""
Proudly swept the rain by the cliffs
As it glided through the trees
Still following ever the bud
The ahihi lehua of the vale
""")
OVERWRITE_FILE_CONTENT = to_str("""
Farewell to thee, farewell to thee
The charming one who dwells in the shaded bowers
One fond embrace,
'Ere I depart
Until we meet again
Sweet memories come back to me
Bringing fresh remembrances
Of the past
Dearest one, yes, you are mine own
From you, true love shall never depart
""")
コード例 #32
0
    def testElement(self):
        data = self._gen_data(5, nullable_field='name')

        fields = [
            self.expr.name.isnull().rename('name1'),
            self.expr.name.notnull().rename('name2'),
            self.expr.name.fillna('test').rename('name3'),
            self.expr.id.isin([1, 2, 3]).rename('id1'),
            self.expr.id.isin(self.expr.fid.astype('int')).rename('id2'),
            self.expr.id.notin([1, 2, 3]).rename('id3'),
            self.expr.id.notin(self.expr.fid.astype('int')).rename('id4'),
            self.expr.id.between(self.expr.fid, 3).rename('id5'),
            self.expr.name.fillna('test').switch('test', 'test' + self.expr.name.fillna('test'),
                                                 'test2', 'test2' + self.expr.name.fillna('test'),
                                                 default=self.expr.name).rename('name4'),
            self.expr.id.cut([100, 200, 300],
                             labels=['xsmall', 'small', 'large', 'xlarge'],
                             include_under=True, include_over=True).rename('id6')
        ]

        expr = self.expr[fields]

        res = self.engine.execute(expr)
        result = self._get_result(res)

        self.assertEqual(len(data), len(result))

        self.assertEqual(len([it for it in data if it[0] is None]),
                         len([it[0] for it in result if it[0]]))

        self.assertEqual(len([it[0] for it in data if it[0] is not None]),
                         len([it[1] for it in result if it[1]]))

        self.assertEqual([(it[0] if it[0] is not None else 'test') for it in data],
                         [it[2] for it in result])

        self.assertEqual([(it[1] in (1, 2, 3)) for it in data],
                         [it[3] for it in result])

        fids = [int(it[2]) for it in data]
        self.assertEqual([(it[1] in fids) for it in data],
                         [it[4] for it in result])

        self.assertEqual([(it[1] not in (1, 2, 3)) for it in data],
                         [it[5] for it in result])

        self.assertEqual([(it[1] not in fids) for it in data],
                         [it[6] for it in result])

        self.assertEqual([(it[2] <= it[1] <= 3) for it in data],
                         [it[7] for it in result])

        self.assertEqual([to_str('testtest' if it[0] is None else it[0]) for it in data],
                         [to_str(it[8]) for it in result])

        def get_val(val):
            if val <= 100:
                return 'xsmall'
            elif 100 < val <= 200:
                return 'small'
            elif 200 < val <= 300:
                return 'large'
            else:
                return 'xlarge'
        self.assertEqual([to_str(get_val(it[1])) for it in data], [to_str(it[9]) for it in result])
コード例 #33
0
 def _gen_random_string(self, max_length=15):
     gen_letter = lambda: letters[random.randint(0, 51)]
     return to_str(''.join(
         [gen_letter() for _ in range(random.randint(1, max_length))]))