def testFilterPushdownThroughUnion(self):
        expr = self.expr['name', 'id'].union(self.expr2['id', 'name'])
        expr = expr.filter(expr.id + 1 < 3)

        expected = 'SELECT * \n' \
                   'FROM (\n' \
                   '  SELECT t1.`name`, t1.`id` \n' \
                   '  FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   '  WHERE (t1.`id` + 1) < 3 \n' \
                   '  UNION ALL\n' \
                   '    SELECT t2.`name`, t2.`id` \n' \
                   '    FROM mocked_project.`pyodps_test_expr_table2` t2 \n' \
                   '    WHERE (t2.`id` + 1) < 3\n' \
                   ') t3'
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr1 = self.expr.filter(self.expr.id == 1)['name', 'id']
        expr2 = self.expr.filter(self.expr.id == 0)['id', 'name']
        expr = expr1.union(expr2)

        expected = 'SELECT * \n' \
                   'FROM (\n' \
                   '  SELECT t1.`name`, t1.`id` \n' \
                   '  FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   '  WHERE t1.`id` == 1 \n' \
                   '  UNION ALL\n' \
                   '    SELECT t2.`name`, t2.`id` \n' \
                   '    FROM mocked_project.`pyodps_test_expr_table` t2 \n' \
                   '    WHERE t2.`id` == 0\n' \
                   ') t3'
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testUnionPrune(self):
        left = self.expr.select('name', 'id')
        right = self.expr3.select(self.expr3.fid.astype('int').rename('id'), self.expr3.name)
        expr = left.union(right)['id']

        expected = "SELECT t3.`id` \n" \
                   "FROM (\n" \
                   "  SELECT t1.`id` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "  UNION ALL\n" \
                   "    SELECT CAST(t2.`fid` AS BIGINT) AS `id` \n" \
                   "    FROM mocked_project.`pyodps_test_expr_table2` t2\n" \
                   ") t3"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.union(self.expr2)

        expected = 'SELECT * \n' \
                   'FROM (\n' \
                   '  SELECT * \n' \
                   '  FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   '  UNION ALL\n' \
                   '    SELECT * \n' \
                   '    FROM mocked_project.`pyodps_test_expr_table2` t2\n' \
                   ') t3'

        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testFilterPrune(self):
        expr = self.expr.filter(self.expr.name == 'name1')
        expr = expr['name', 'id']

        new_expr = ColumnPruning(expr.to_dag()).prune()

        self.assertIsInstance(new_expr.input, FilterCollectionExpr)
        self.assertNotIsInstance(new_expr.input.input, ProjectCollectionExpr)
        self.assertIsNotNone(new_expr.input.input._source_data)

        expected = 'SELECT t1.`name`, t1.`id` \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   'WHERE t1.`name` == \'name1\''
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.filter(self.expr.name == 'name1')

        new_expr = ColumnPruning(expr.to_dag()).prune()

        self.assertIsInstance(new_expr, FilterCollectionExpr)
        self.assertIsNotNone(new_expr.input._source_data)

        expr = self.expr.filter(self.expr.id.isin(self.expr3.id))

        expected = 'SELECT * \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   'WHERE t1.`id` IN (SELECT t3.`id` FROM (  ' \
                   'SELECT t2.`id`   FROM mocked_project.`pyodps_test_expr_table2` t2 ) t3)'
        self.assertTrue(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testSerializers(self):
        teacher = Example.Teacher(name='t1')
        professors = [Example.Teacher(name='p1'), Example.Teacher(name='p2')]
        jsn = Example.Json(label='json', tags=['t1', 't2'],
                           nest=Example.Json.Nest(name='n'),
                           nests=[Example.Json.Nest(name='n1'), Example.Json.Nest(name='n2')])

        dt = datetime.fromtimestamp(time.mktime(datetime.now().timetuple()))
        example = Example(name='example 1', type='ex', date=dt,
                          lessons=['less1', 'less2'], teacher=teacher, professors=professors,
                          properties={'test': 'true'}, jsn=jsn)
        sel = example.serialize()

        self.assertEqual(
            to_str(expected_xml_template % utils.gen_rfc822(dt, localtime=True)), to_str(sel))

        parsed_example = Example.parse(sel)

        self.assertEqual(example.name, parsed_example.name)
        self.assertEqual(example.type, parsed_example.type)
        self.assertEqual(example.date, parsed_example.date)
        self.assertSequenceEqual(example.lessons, parsed_example.lessons)
        self.assertEqual(example.teacher, parsed_example.teacher)
        self.assertSequenceEqual(example.professors, parsed_example.professors)
        self.assertTrue(len(example.properties) == len(parsed_example.properties) and
                        any(example.properties[it] == parsed_example.properties[it])
                        for it in example.properties)
        self.assertEqual(example.jsn.label, parsed_example.jsn.label)
        self.assertEqual(example.jsn.tags, parsed_example.jsn.tags)
        self.assertEqual(example.jsn.nest, parsed_example.jsn.nest)
        self.assertSequenceEqual(example.jsn.nests, parsed_example.jsn.nests)
    def testCreateDeleteFunction(self):
        test_resource_name = 'pyodps_t_tmp_test_function_resource.py'
        test_function_name = 'pyodps_t_tmp_test_function'

        try:
            self.odps.delete_resource(test_resource_name)
        except errors.NoSuchObject:
            pass
        try:
            self.odps.delete_function(test_function_name)
        except errors.NoSuchObject:
            pass

        test_resource = self.odps.create_resource(
            test_resource_name, 'py', file_obj=FILE_CONTENT)

        test_function = self.odps.create_function(
            test_function_name,
            class_type=test_resource_name.split('.', 1)[0]+'.MyPlus',
            resources=[test_resource,])

        self.assertIsNotNone(test_function.name)
        self.assertIsNotNone(test_function.owner)
        self.assertIsNotNone(test_function.creation_time)
        self.assertIsNotNone(test_function.class_type)
        self.assertEqual(len(test_function.resources), 1)

        with self.odps.open_resource(name=test_resource_name, mode='r') as fp:
            self.assertEqual(to_str(fp.read()), to_str(FILE_CONTENT))

        test_resource.drop()
        test_function.drop()
    def testSamplePrune(self):
        expr = self.expr['name', 'id'].sample(parts=5)['id', ]

        expected = "SELECT t1.`id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "WHERE SAMPLE(5, 1)"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testJoinPrune(self):
        left = self.expr.select(self.expr, type='normal')
        right = self.expr3[:4]
        joined = left.left_join(right, on='id')
        expr = joined.id_x.rename('id')

        expected = "SELECT t2.`id` \n" \
                   "FROM (\n" \
                   "  SELECT t1.`id` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1\n" \
                   ") t2 \n" \
                   "LEFT OUTER JOIN \n" \
                   "  (\n" \
                   "    SELECT t3.`id` \n" \
                   "    FROM mocked_project.`pyodps_test_expr_table2` t3 \n" \
                   "    LIMIT 4\n" \
                   "  ) t4\n" \
                   "ON t2.`id` == t4.`id`"

        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        joined = self.expr.join(self.expr2, 'name')

        expected = 'SELECT t1.`name`, t1.`id` AS `id_x`, t1.`fid` AS `fid_x`, ' \
                   't1.`isMale` AS `isMale_x`, t1.`scale` AS `scale_x`, ' \
                   't1.`birth` AS `birth_x`, t1.`ds` AS `ds_x`, t2.`id` AS `id_y`, ' \
                   't2.`fid` AS `fid_y`, t2.`isMale` AS `isMale_y`, t2.`scale` AS `scale_y`, ' \
                   't2.`birth` AS `birth_y`, t2.`ds` AS `ds_y` \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   'INNER JOIN \n' \
                   '  mocked_project.`pyodps_test_expr_table2` t2\n' \
                   'ON t1.`name` == t2.`name`'
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(joined, prettify=False)))
    def testXFlowInstanceToXML(self):
        xflow_name = 'pyodps_t_tmp_xflow_algo_name'
        project = 'algo_project'
        parameters = {'key': 'value'}

        got_xml = self.odps.get_project(project).xflows._gen_xlow_instance_xml(
            xflow_name=xflow_name, xflow_project=project, parameters=parameters)
        self.assertEqual(to_str(got_xml), to_str(EXPECTED_XFLOW_INSTANCE_XML))
    def testSmallRowsFormatter(self):
        data = [self._random_values() for _ in range(10)]
        pd = ResultFrame(data=data, schema=self.schema, pandas=True)
        result = ResultFrame(data=data, schema=self.schema, pandas=False)
        self.assertEqual(to_str(repr(pd)), to_str(repr(result)))
        self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_()))

        self.assertEqual(result._values, [r for r in result])
    def testDistinctPrune(self):
        expr = self.expr.distinct(self.expr.id + 1, self.expr.name)['name', ]

        expected = "SELECT t2.`name` \n" \
                   "FROM (\n" \
                   "  SELECT DISTINCT t1.`id` + 1 AS `id`, t1.`name` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   ") t2"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testSQLTaskToXML(self):
        query = 'select * from dual'

        task = SQLTask(query=query)
        to_xml = task.serialize()
        right_xml = template % {'sql': query}

        self.assertEqual(to_str(to_xml), to_str(right_xml))

        task = Task.parse(None, to_xml)
        self.assertIsInstance(task, SQLTask)
    def testSortPrune(self):
        expr = self.expr[self.expr.exclude('name'), self.expr.name.rename('name2')].sort('name2')['id', 'fid']

        expected = "SELECT t2.`id`, t2.`fid` \n" \
                   "FROM (\n" \
                   "  SELECT t1.`id`, t1.`fid`, t1.`name` AS `name2` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "  ORDER BY name2 \n" \
                   "  LIMIT 10000\n" \
                   ") t2"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testSlicePrune(self):
        expr = self.expr.filter(self.expr.fid < 0)[:4]['name', lambda x: x.id + 1]

        new_expr = ColumnPruning(expr.to_dag()).prune()
        self.assertIsNotNone(new_expr.input.input.input._source_data)

        expected = "SELECT t1.`name`, t1.`id` + 1 AS `id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "WHERE t1.`fid` < 0 \n" \
                   "LIMIT 4"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testMutatePrune(self):
        expr = self.expr[self.expr.exclude('birth'), self.expr.fid.astype('int').rename('new_id')]
        expr = expr[expr, expr.groupby('name').mutate(lambda x: x.new_id.cumsum().rename('new_id_sum'))]
        expr = expr[expr.new_id, expr.new_id_sum]

        expected = "SELECT t2.`new_id`, t2.`new_id_sum` \n" \
                   "FROM (\n" \
                   "  SELECT CAST(t1.`fid` AS BIGINT) AS `new_id`, " \
                   "SUM(CAST(t1.`fid` AS BIGINT)) OVER (PARTITION BY t1.`name`) AS `new_id_sum` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   ") t2"

        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testLargeColumnsFormatter(self):
        names = list(itertools.chain(*[[name + str(i) for name in self.schema.names] for i in range(10)]))
        types = self.schema.types * 10

        schema = Schema.from_lists(names, types)
        gen_row = lambda: list(itertools.chain(*(self._random_values().values for _ in range(10))))
        data = [Record(schema=df_schema_to_odps_schema(schema), values=gen_row()) for _ in range(10)]

        pd = ResultFrame(data=data, schema=schema, pandas=True)
        result = ResultFrame(data=data, schema=schema, pandas=False)

        self.assertEqual(to_str(repr(pd)), to_str(repr(result)))
        self.assertEqual(to_str(pd._repr_html_()), to_str(result._repr_html_()))
    def testUnion(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.bigint, types.bigint])
        table_name = 'pyodps_test_engine_table2'
        self.odps.delete_table(table_name, if_exists=True)
        table2 = self.odps.create_table(name=table_name, schema=schema2)
        expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2))

        self._gen_data(data=data)

        data2 = [
            ['name3', 5, -1],
            ['name4', 6, -2]
        ]

        self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2])

        try:
            expr = self.expr['name', 'id'].distinct().union(expr2[expr2.id2.rename('id'), 'name'])

            res = self.engine.execute(expr)
            result = self._get_result(res)

            expected = [
                ['name1', 4],
                ['name1', 3],
                ['name2', 2],
                ['name3', 5],
                ['name4', 6]
            ]

            result = sorted(result)
            expected = sorted(expected)

            self.assertEqual(len(result), len(expected))
            for e, r in zip(result, expected):
                self.assertEqual([to_str(t) for t in e],
                                 [to_str(t) for t in r])

        finally:
            table2.drop()
    def testFilterPartitionPrune(self):
        expr = self.expr.filter_partition('ds=today')[lambda x: x.fid < 0][
            'name', lambda x: x.id + 1]

        new_expr = ColumnPruning(expr.to_dag()).prune()
        self.assertEqual(set(new_expr.input.input.schema.names), set(['name', 'id', 'fid']))

        expected = "SELECT t2.`name`, t2.`id` + 1 AS `id` \n" \
                   "FROM (\n" \
                   "  SELECT t1.`name`, t1.`id`, t1.`fid` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "  WHERE t1.`ds` == 'today' \n" \
                   ") t2 \n" \
                   "WHERE t2.`fid` < 0"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testReadWriteTable(self):
        test_table_name = tn('pyodps_t_tmp_read_write_table')
        schema = Schema.from_lists(['id', 'name', 'right'], ['bigint', 'string', 'boolean'])

        self.odps.delete_table(test_table_name, if_exists=True)
        self.assertFalse(self.odps.exist_table(test_table_name))

        table = self.odps.create_table(test_table_name, schema)
        data = [[111, 'aaa', True],
                [222, 'bbb', False],
                [333, 'ccc', True],
                [444, '中文', False]]
        length = len(data)
        records = [Record(schema=schema, values=values) for values in data]

        texted_data = [[it[0], to_str(it[1]), it[2]] for it in data]

        self.odps.write_table(table, 0, records)
        self.assertSequenceEqual(texted_data, [record.values for record in self.odps.read_table(table, length)])
        self.assertSequenceEqual(texted_data[::2],
                                 [record.values for record in self.odps.read_table(table, length, step=2)])

        self.assertSequenceEqual(texted_data, [record.values for record in table.head(length)])

        self.odps.delete_table(test_table_name)
        self.assertFalse(self.odps.exist_table(test_table_name))
    def testJoin(self):
        data = [
            ['name1', 4, 5.3, None, None, None],
            ['name2', 2, 3.5, None, None, None],
            ['name1', 4, 4.2, None, None, None],
            ['name1', 3, 2.2, None, None, None],
            ['name1', 3, 4.1, None, None, None],
        ]

        schema2 = Schema.from_lists(['name', 'id2', 'id3'],
                                    [types.string, types.bigint, types.bigint])
        table_name = 'pyodps_test_engine_table2'
        self.odps.delete_table(table_name, if_exists=True)
        table2 = self.odps.create_table(name=table_name, schema=schema2)
        expr2 = CollectionExpr(_source_data=table2, _schema=odps_schema_to_df_schema(schema2))

        self._gen_data(data=data)

        data2 = [
            ['name1', 4, -1],
            ['name2', 1, -2]
        ]

        self.odps.write_table(table2, 0, [table2.new_record(values=d) for d in data2])

        try:
            expr = self.expr.join(expr2)['name', 'id2']

            res = self.engine.execute(expr)
            result = self._get_result(res)

            self.assertEqual(len(result), 5)
            expected = [
                [to_str('name1'), 4],
                [to_str('name2'), 1]
            ]
            self.assertTrue(all(it in expected for it in result))

            expr = self.expr.join(expr2, on=['name', ('id', 'id2')])[self.expr.name, expr2.id2]
            res = self.engine.execute(expr)
            result = self._get_result(res)
            self.assertEqual(len(result), 2)
            expected = [to_str('name1'), 4]
            self.assertTrue(all(it == expected for it in result))

        finally:
            table2.drop()
    def testCreateInstanceXML(self):
        instances = self.odps._project.instances

        uuid = '359696d4-ac73-4e6c-86d1-6649b01f1a22'
        query = 'select * from dual;'
        priority = 5

        task = SQLTask(query=query)
        job = instances._create_job(
            task=task, priority=priority, uuid_=uuid)
        xml = instances._get_submit_instance_content(job)
        expected_xml = expected_xml_template % {
            'query': query,
            'uuid': uuid,
            'priority': priority
        }
        self.assertEqual(to_str(xml), to_str(expected_xml))
    def testString(self):
        data = self._gen_data(5)

        methods_to_fields = [
            (lambda s: s.capitalize(), self.expr.name.capitalize()),
            (lambda s: data[0][0] in s, self.expr.name.contains(data[0][0], regex=False)),
            (lambda s: s.count(data[0][0]), self.expr.name.count(data[0][0])),
            (lambda s: s.endswith(data[0][0]), self.expr.name.endswith(data[0][0])),
            (lambda s: s.startswith(data[0][0]), self.expr.name.startswith(data[0][0])),
            (lambda s: s.find(data[0][0]), self.expr.name.find(data[0][0])),
            (lambda s: s.rfind(data[0][0]), self.expr.name.rfind(data[0][0])),
            (lambda s: s.replace(data[0][0], 'test'), self.expr.name.replace(data[0][0], 'test')),
            (lambda s: s[0], self.expr.name.get(0)),
            (lambda s: len(s), self.expr.name.len()),
            (lambda s: s.ljust(10), self.expr.name.ljust(10)),
            (lambda s: s.ljust(20, '*'), self.expr.name.ljust(20, fillchar='*')),
            (lambda s: s.rjust(10), self.expr.name.rjust(10)),
            (lambda s: s.rjust(20, '*'), self.expr.name.rjust(20, fillchar='*')),
            (lambda s: s * 4, self.expr.name.repeat(4)),
            (lambda s: s[2: 10: 2], self.expr.name.slice(2, 10, 2)),
            (lambda s: s[-5: -1], self.expr.name.slice(-5, -1)),
            (lambda s: s.title(), self.expr.name.title()),
            (lambda s: s.rjust(20, '0'), self.expr.name.zfill(20)),
            (lambda s: s.isalnum(), self.expr.name.isalnum()),
            (lambda s: s.isalpha(), self.expr.name.isalpha()),
            (lambda s: s.isdigit(), self.expr.name.isdigit()),
            (lambda s: s.isspace(), self.expr.name.isspace()),
            (lambda s: s.isupper(), self.expr.name.isupper()),
            (lambda s: s.istitle(), self.expr.name.istitle()),
            (lambda s: to_str(s).isnumeric(), self.expr.name.isnumeric()),
            (lambda s: to_str(s).isdecimal(), self.expr.name.isdecimal()),
        ]

        fields = [it[1].rename('id'+str(i)) for i, it in enumerate(methods_to_fields)]

        expr = self.expr[fields]

        res = self.engine.execute(expr)
        result = self._get_result(res)

        for i, it in enumerate(methods_to_fields):
            method = it[0]

            first = [method(it[0]) for it in data]
            second = [it[i] for it in result]
            self.assertEqual(first, second)
    def testArithmeticFormatter(self):
        expr = self.expr
        d = -(expr["id"]) + 20.34 - expr["id"] + float(20) * expr["id"] - expr["id"] / 4.9 + 40 // 2 + expr["id"] // 1.2

        try:
            self._lines_eq(EXPECTED_ARITHMETIC_FORMAT, repr(d))
        except AssertionError as e:
            left = [to_str(line.rstrip()) for line in EXPECTED_ARITHMETIC_FORMAT.split("\n")]
            right = [to_str(line.rstrip()) for line in repr(d).split("\n")]
            self.assertEqual(len(left), len(right))
            for l, r in zip(left, right):
                try:
                    self.assertEqual(l, r)
                except AssertionError:
                    try:
                        self.assertAlmostEqual(float(l), float(r))
                    except:
                        raise e
    def testGroupbyPrune(self):
        expr = self.expr.groupby('name').agg(id=self.expr.id.max())
        expr = expr[expr.id < 0]['name', ]

        expected = "SELECT t1.`name` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "GROUP BY t1.`name` \n" \
                   "HAVING MAX(t1.`id`) < 0"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.groupby('name').agg(id=self.expr.id.max())
        expr = expr[expr.id < 0]['id',]

        expected = "SELECT MAX(t1.`id`) AS `id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "GROUP BY t1.`name` \n" \
                   "HAVING MAX(t1.`id`) < 0"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testReadNonAsciiSQLInstance(self):
        test_table = tn('pyodps_t_tmp_read_non_ascii_sql_instance')
        self.odps.delete_table(test_table, if_exists=True)
        table = self.odps.create_table(
            test_table,
            schema=Schema.from_lists(['size', 'name'], ['bigint', 'string']), if_not_exists=True)

        data = [[1, '中\\\\n\\\n文 ,\r '], [2, '测试\x00\x01\x02数据']]
        self.odps.write_table(
            table, 0, [table.new_record(it) for it in data])

        with self.odps.execute_sql('select name from %s' % test_table).open_reader() as reader:
            read_data = sorted([to_str(r[0]) for r in reader])
            expected_data = sorted([to_str(r[1]) for r in data])

            self.assertSequenceEqual(read_data, expected_data)

        table.drop()
Example #25
0
    def testCEncodeAndDecode(self):
        try:
            from odps.tunnel.pb.encoder_c import Encoder
            from odps.tunnel.pb.decoder_c import Decoder

            encoder = Encoder()
            encoder.append_tag(0, WIRETYPE_VARINT)
            encoder.append_tag(1, WIRETYPE_VARINT)
            encoder.append_sint64(-2 ** 40)
            encoder.append_tag(2, WIRETYPE_LENGTH_DELIMITED)
            encoder.append_string(to_binary("hello"))
            encoder.append_tag(3, WIRETYPE_VARINT)
            encoder.append_bool(True)
            encoder.append_tag(4, WIRETYPE_FIXED64)
            encoder.append_float(3.14)
            encoder.append_double(0.31415926)
            encoder.append_tag(5, WIRETYPE_VARINT)
            encoder.append_uint32(2 ** 30)
            encoder.append_tag(6, WIRETYPE_VARINT)
            encoder.append_uint64(2 ** 40)
            buffer_size = len(encoder)

            tube = io.BytesIO(encoder.tostring())
            decoder = Decoder(tube)
            self.assertEquals((0, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type())
            self.assertEquals((1, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type())
            self.assertEquals(-2 ** 40, decoder.read_sint64())
            self.assertEquals((2, WIRETYPE_LENGTH_DELIMITED), decoder.read_field_number_and_wire_type())
            self.assertEquals(to_str("hello"), to_str(decoder.read_string()))
            self.assertEquals((3, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type())
            self.assertEquals(True, decoder.read_bool())
            self.assertEquals((4, WIRETYPE_FIXED64), decoder.read_field_number_and_wire_type())
            self.assertAlmostEqual(3.14, decoder.read_float(), delta=0.001)
            self.assertEquals(0.31415926, decoder.read_double())
            self.assertEquals((5, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type())
            self.assertEquals(2 ** 30, decoder.read_uint32())
            self.assertEquals((6, WIRETYPE_VARINT), decoder.read_field_number_and_wire_type())
            self.assertEquals(2 ** 40, decoder.read_uint64())
            self.assertEquals(buffer_size, decoder.position())
        except ImportError:
            warnings.warn("No Encoder or Decoder built by cython found")
    def testFilterPushdownThroughProjection(self):
        expr = self.expr[self.expr.id + 1, 'name'][lambda x: x.id < 10]

        expected = 'SELECT t1.`id` + 1 AS `id`, t1.`name` \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1 \n' \
                   'WHERE (t1.`id` + 1) < 10'
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr['name', self.expr.id ** 2]\
            .filter(lambda x: x.name == 'name1').filter(lambda x: x.id < 3)
        expected = "SELECT t1.`name`, CAST(POW(t1.`id`, 2) AS BIGINT) AS `id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "WHERE (t1.`name` == 'name1') AND ((CAST(POW(t1.`id`, 2) AS BIGINT)) < 3)"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr['name', self.expr.id + 1].filter(lambda x: x.name == 'name1')[
            lambda x: 'tt' + x.name, 'id'
        ].filter(lambda x: x.id < 3)

        expected = "SELECT CONCAT('tt', t1.`name`) AS `name`, t1.`id` + 1 AS `id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "WHERE (t1.`name` == 'name1') AND ((t1.`id` + 1) < 3)"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.filter(self.expr.name == 'name1').select('name', lambda x: (x.id + 1) * 2)[
            lambda x: 'tt' + x.name, 'id'
        ].filter(lambda x: x.id < 3)
        expected = "SELECT CONCAT('tt', t1.`name`) AS `name`, (t1.`id` + 1) * 2 AS `id` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "WHERE (((t1.`id` + 1) * 2) < 3) AND (t1.`name` == 'name1')"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.filter(self.expr.id.between(2, 6),
                                self.expr.name.lower().contains('pyodps', regex=False)).name.nunique()
        expected = "SELECT COUNT(DISTINCT t2.`name`) AS `name_nunique` \n" \
                   "FROM (\n" \
                   "  SELECT t1.`id`, t1.`name` \n" \
                   "  FROM mocked_project.`pyodps_test_expr_table` t1 \n" \
                   "  WHERE ((t1.`id` >= 2) AND (t1.`id` <= 6)) AND INSTR(TOLOWER(t1.`name`), 'pyodps') > 0 \n" \
                   ") t2"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testProjectPrune(self):
        expr = self.expr.select('name', 'id')
        new_expr = ColumnPruning(expr.to_dag()).prune()
        self.assertIsInstance(new_expr, ProjectCollectionExpr)
        self.assertIsNotNone(new_expr.input._source_data)

        expected = 'SELECT t1.`name`, t1.`id` \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1'
        self.assertEqual(expected, ODPSEngine(self.odps).compile(expr, prettify=False))

        expr = self.expr[Scalar(3).rename('const'),
                         NullScalar('string').rename('string_const'),
                         self.expr.id]
        expected = 'SELECT 3 AS `const`, CAST(NULL AS STRING) AS `string_const`, t1.`id` \n' \
                   'FROM mocked_project.`pyodps_test_expr_table` t1'
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))

        expr = self.expr.select(pt=BuiltinFunction('max_pt', args=(self.expr._source_data.name,)))
        expected = "SELECT max_pt('pyodps_test_expr_table') AS `pt` \n" \
                   "FROM mocked_project.`pyodps_test_expr_table` t1"
        self.assertEqual(to_str(expected), to_str(ODPSEngine(self.odps).compile(expr, prettify=False)))
    def testArrayReadWriteTable(self):
        test_table_name = tn("pyodps_t_tmp_read_write_table")
        schema = Schema.from_lists(["id", "name", "right"], ["bigint", "string", "boolean"])

        self.odps.delete_table(test_table_name, if_exists=True)
        self.assertFalse(self.odps.exist_table(test_table_name))

        table = self.odps.create_table(test_table_name, schema)
        data = [[111, "aaa", True], [222, "bbb", False], [333, "ccc", True], [444, "中文", False]]
        length = len(data)

        texted_data = [[it[0], to_str(it[1]), it[2]] for it in data]

        self.odps.write_table(table, 0, data)
        self.assertSequenceEqual(texted_data, [record.values for record in self.odps.read_table(table, length)])
        self.assertSequenceEqual(
            texted_data[::2], [record.values for record in self.odps.read_table(table, length, step=2)]
        )

        self.assertSequenceEqual(texted_data, [record.values for record in table.head(length)])

        self.odps.delete_table(test_table_name)
        self.assertFalse(self.odps.exist_table(test_table_name))
Example #29
0
    def testFileResource(self):
        resource_name = tn('pyodps_t_tmp_file_resource')

        try:
            self.odps.delete_resource(resource_name)
        except errors.ODPSError:
            pass

        resource = self.odps.create_resource(resource_name,
                                             'file',
                                             file_obj=FILE_CONTENT)
        self.assertIsInstance(resource, FileResource)

        with resource.open(mode='r') as fp:
            self.assertRaises(IOError, lambda: fp.write('sss'))
            self.assertRaises(IOError, lambda: fp.writelines(['sss\n']))

            self.assertIsInstance(fp.read(), six.text_type)

            fp.seek(0, compat.SEEK_END)
            size = fp.tell()
            fp.seek(0)
            self.assertEqual(fp._size, size)

            self.assertEqual(to_str(fp.read()), to_str(FILE_CONTENT))
            fp.seek(1)
            self.assertEqual(to_str(fp.read()), to_str(FILE_CONTENT[1:]))

            fp.seek(0)
            self.assertEqual(to_str(fp.readline()),
                             to_str(FILE_CONTENT.split('\n', 1)[0] + '\n'))

            fp.seek(0)
            add_newline = lambda s: s if s.endswith('\n') else s + '\n'
            self.assertEqual(
                [to_str(add_newline(l)) for l in fp],
                [to_str(add_newline(l)) for l in FILE_CONTENT.splitlines()])

            self.assertFalse(fp._need_commit)
            self.assertTrue(fp._opened)

        self.assertFalse(fp._opened)
        self.assertIsNone(fp._fp)

        with resource.open(mode='w') as fp:
            self.assertRaises(IOError, fp.read)
            self.assertRaises(IOError, fp.readline)
            self.assertRaises(IOError, fp.readlines)

            fp.writelines([OVERWRITE_FILE_CONTENT] * 2)

            self.assertTrue(fp._need_commit)

            size = fp._size

        with resource.open(mode='r+') as fp:
            self.assertEqual(to_str(fp.read()),
                             to_str(OVERWRITE_FILE_CONTENT * 2))

            self.assertEqual(size, fp._size)

            fp.seek(0)
            fp.write(FILE_CONTENT)
            fp.truncate()

            self.assertTrue(fp._need_commit)

        with resource.open(mode='a') as fp:
            self.assertRaises(IOError, fp.read)
            self.assertRaises(IOError, fp.readline)
            self.assertRaises(IOError, fp.readlines)

            fp.write(OVERWRITE_FILE_CONTENT)

            self.assertTrue(fp._need_commit)

        with resource.open(mode='a+') as fp:
            self.assertEqual(to_str(fp.read()),
                             to_str(FILE_CONTENT + OVERWRITE_FILE_CONTENT))
            fp.seek(1)
            fp.truncate()
            self.assertTrue(fp._need_commit)

        fp = resource.open(mode='r')
        self.assertEqual(to_str(fp.read()), FILE_CONTENT[0])
        fp.close()

        with resource.open(mode='w+') as fp:
            self.assertEqual(len(fp.read()), 0)
            fp.write(FILE_CONTENT)

        with resource.open(mode='r+') as fp:
            self.assertEqual(to_str(fp.read()), FILE_CONTENT)

        resource.update(file_obj='update')
        with resource.open(mode='rb') as fp:
            self.assertIsInstance(fp.read(), six.binary_type)
            fp.seek(0)
            self.assertEqual(to_str(fp.read()), to_str('update'))

        self.odps.delete_resource(resource_name)
Example #30
0
 def _gen_random_string(self, max_length=15):
     gen_letter = lambda: letters[random.randint(0, 51)]
     return to_str(''.join([gen_letter() for _ in range(random.randint(1, 15))]))
Example #31
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import zipfile

from odps.tests.core import TestBase, to_str, tn
from odps.compat import unittest, six
from odps import compat
from odps.models import Resource, FileResource, TableResource, VolumeArchiveResource, \
    VolumeFileResource, Schema
from odps import errors, types

FILE_CONTENT = to_str("""
Proudly swept the rain by the cliffs
As it glided through the trees
Still following ever the bud
The ahihi lehua of the vale
""")
OVERWRITE_FILE_CONTENT = to_str("""
Farewell to thee, farewell to thee
The charming one who dwells in the shaded bowers
One fond embrace,
'Ere I depart
Until we meet again
Sweet memories come back to me
Bringing fresh remembrances
Of the past
Dearest one, yes, you are mine own
From you, true love shall never depart
""")
    def testElement(self):
        data = self._gen_data(5, nullable_field='name')

        fields = [
            self.expr.name.isnull().rename('name1'),
            self.expr.name.notnull().rename('name2'),
            self.expr.name.fillna('test').rename('name3'),
            self.expr.id.isin([1, 2, 3]).rename('id1'),
            self.expr.id.isin(self.expr.fid.astype('int')).rename('id2'),
            self.expr.id.notin([1, 2, 3]).rename('id3'),
            self.expr.id.notin(self.expr.fid.astype('int')).rename('id4'),
            self.expr.id.between(self.expr.fid, 3).rename('id5'),
            self.expr.name.fillna('test').switch('test', 'test' + self.expr.name.fillna('test'),
                                                 'test2', 'test2' + self.expr.name.fillna('test'),
                                                 default=self.expr.name).rename('name4'),
            self.expr.id.cut([100, 200, 300],
                             labels=['xsmall', 'small', 'large', 'xlarge'],
                             include_under=True, include_over=True).rename('id6')
        ]

        expr = self.expr[fields]

        res = self.engine.execute(expr)
        result = self._get_result(res)

        self.assertEqual(len(data), len(result))

        self.assertEqual(len([it for it in data if it[0] is None]),
                         len([it[0] for it in result if it[0]]))

        self.assertEqual(len([it[0] for it in data if it[0] is not None]),
                         len([it[1] for it in result if it[1]]))

        self.assertEqual([(it[0] if it[0] is not None else 'test') for it in data],
                         [it[2] for it in result])

        self.assertEqual([(it[1] in (1, 2, 3)) for it in data],
                         [it[3] for it in result])

        fids = [int(it[2]) for it in data]
        self.assertEqual([(it[1] in fids) for it in data],
                         [it[4] for it in result])

        self.assertEqual([(it[1] not in (1, 2, 3)) for it in data],
                         [it[5] for it in result])

        self.assertEqual([(it[1] not in fids) for it in data],
                         [it[6] for it in result])

        self.assertEqual([(it[2] <= it[1] <= 3) for it in data],
                         [it[7] for it in result])

        self.assertEqual([to_str('testtest' if it[0] is None else it[0]) for it in data],
                         [to_str(it[8]) for it in result])

        def get_val(val):
            if val <= 100:
                return 'xsmall'
            elif 100 < val <= 200:
                return 'small'
            elif 200 < val <= 300:
                return 'large'
            else:
                return 'xlarge'
        self.assertEqual([to_str(get_val(it[1])) for it in data], [to_str(it[9]) for it in result])
Example #33
0
 def _gen_random_string(self, max_length=15):
     gen_letter = lambda: letters[random.randint(0, 51)]
     return to_str(''.join(
         [gen_letter() for _ in range(random.randint(1, max_length))]))