Ejemplo n.º 1
0
 def testRecordSetAndGetByName(self):
     s = Schema.from_lists(['col%s' % i for i in range(8)], [
         'bigint', 'double', 'string', 'datetime', 'boolean', 'decimal',
         'array<string>', 'map<string,bigint>'
     ])
     r = Record(schema=s)
     r['col0'] = 1
     r['col1'] = 1.2
     r['col2'] = 'abc'
     r['col3'] = datetime(2016, 1, 1)
     r['col4'] = True
     r['col5'] = _decimal.Decimal('1.111')
     r['col6'] = ['a', 'b']
     r['col7'] = OrderedDict({'a': 1})
     self.assertSequenceEqual(r.values, [
         1, 1.2, 'abc',
         datetime(2016, 1, 1), True,
         _decimal.Decimal('1.111'), ['a', 'b'],
         OrderedDict({'a': 1})
     ])
     self.assertEquals(1, r['col0'])
     self.assertEquals(1.2, r['col1'])
     self.assertEquals('abc', r['col2'])
     self.assertEquals(datetime(2016, 1, 1), r['col3'])
     self.assertEquals(True, r['col4'])
     self.assertEquals(_decimal.Decimal('1.111'), r['col5'])
     self.assertEquals(['a', 'b'], r['col6'])
     self.assertEquals(OrderedDict({'a': 1}), r['col7'])
Ejemplo n.º 2
0
    def testRecordSetAndGetByIndex(self):
        s = Schema.from_lists(['col%s' % i for i in range(8)], [
            'bigint', 'double', 'string', 'datetime', 'boolean', 'decimal',
            'array<string>', 'map<string,bigint>'
        ])
        s.build_snapshot()
        if options.force_py:
            self.assertIsNone(s._snapshot)
        else:
            self.assertIsNotNone(s._snapshot)

        r = Record(schema=s)
        r[0] = 1
        r[1] = 1.2
        r[2] = 'abc'
        r[3] = datetime(2016, 1, 1)
        r[4] = True
        r[5] = _decimal.Decimal('1.111')
        r[6] = ['a', 'b']
        r[7] = OrderedDict({'a': 1})
        self.assertSequenceEqual(r.values, [
            1, 1.2, 'abc',
            datetime(2016, 1, 1), True,
            _decimal.Decimal('1.111'), ['a', 'b'],
            OrderedDict({'a': 1})
        ])
        self.assertEquals(1, r[0])
        self.assertEquals(1.2, r[1])
        self.assertEquals('abc', r[2])
        self.assertEquals(datetime(2016, 1, 1), r[3])
        self.assertEquals(True, r[4])
        self.assertEquals(_decimal.Decimal('1.111'), r[5])
        self.assertEquals(['a', 'b'], r[6])
        self.assertEquals(OrderedDict({'a': 1}), r[7])
        self.assertEquals([1, 1.2], r[:2])
Ejemplo n.º 3
0
 def _gen_data(self):
     return [
         ('hello world', 2**63-1, math.pi, datetime(2015, 9, 19, 2, 11, 25, 33000),
          True, Decimal('3.14'), ['simple', 'easy'], OrderedDict({'s': 1})),
         ('goodbye', 222222, math.e, datetime(2020, 3, 10), False, Decimal('2.555555'),
          ['true', None], OrderedDict({'true': 1})),
         ('c'*300, -2**63+1, -2.222, datetime(1999, 5, 25, 3, 10), True, Decimal(22222),
          ['false'], OrderedDict({'false': 0})),
     ]
Ejemplo n.º 4
0
    def testPandasPersistODPS2(self):
        import pandas as pd
        import numpy as np

        data_int8 = np.random.randint(0, 10, (1, ), dtype=np.int8)
        data_int16 = np.random.randint(0, 10, (1, ), dtype=np.int16)
        data_int32 = np.random.randint(0, 10, (1, ), dtype=np.int32)
        data_int64 = np.random.randint(0, 10, (1, ), dtype=np.int64)
        data_float32 = np.random.random((1, )).astype(np.float32)
        data_float64 = np.random.random((1, )).astype(np.float64)

        df = DataFrame(
            pd.DataFrame(
                OrderedDict([('data_int8', data_int8),
                             ('data_int16', data_int16),
                             ('data_int32', data_int32),
                             ('data_int64', data_int64),
                             ('data_float32', data_float32),
                             ('data_float64', data_float64)])))
        tmp_table_name = tn('pyodps_test_mixed_persist_odps2_types')

        self.odps.delete_table(tmp_table_name, if_exists=True)
        df.persist(tmp_table_name,
                   lifecycle=1,
                   drop_table=True,
                   odps=self.odps)

        t = self.odps.get_table(tmp_table_name)
        expected_types = [
            odps_types.tinyint, odps_types.smallint, odps_types.int_,
            odps_types.bigint, odps_types.float_, odps_types.double
        ]
        self.assertEqual(expected_types, t.schema.types)
Ejemplo n.º 5
0
    def _gen_random_map(self, random_map_type):
        size = random.randint(100, 500)

        random_map_type = types.validate_data_type(random_map_type)

        key_arrays = self._gen_random_array(random_map_type.key_type, size)
        value_arrays = self._gen_random_array(random_map_type.value_type, size)

        m = OrderedDict(zip(key_arrays, value_arrays))
        return m
Ejemplo n.º 6
0
    def testCreateTableDDL(self):
        from odps.compat import OrderedDict
        from odps.models import Table

        test_table_name = tn('pyodps_t_tmp_table_ddl')
        schema = Schema.from_lists(['id', 'name'], ['bigint', 'string'], [
            'ds',
        ], [
            'string',
        ])
        self.odps.delete_table(test_table_name, if_exists=True)
        table = self.odps.create_table(test_table_name, schema, lifecycle=10)

        ddl = table.get_ddl()
        self.assertNotIn('EXTERNAL', ddl)
        self.assertNotIn('NOT EXISTS', ddl)
        for col in table.schema.names:
            self.assertIn(col, ddl)

        ddl = table.get_ddl(if_not_exists=True)
        self.assertIn('NOT EXISTS', ddl)

        ddl = Table.gen_create_table_sql(
            'test_external_table',
            schema,
            comment='TEST_COMMENT',
            storage_handler='com.aliyun.odps.CsvStorageHandler',
            serde_properties=OrderedDict([('name1', 'value1'),
                                          ('name2', 'value2')]),
            location='oss://mock_endpoint/mock_bucket/mock_path/',
        )
        self.assertEqual(
            ddl,
            textwrap.dedent("""
        CREATE EXTERNAL TABLE `test_external_table` (
          `id` BIGINT,
          `name` STRING
        )
        COMMENT 'TEST_COMMENT'
        PARTITIONED BY (
          `ds` STRING
        )
        STORED BY 'com.aliyun.odps.CsvStorageHandler'
        WITH SERDEPROPERTIES (
          'name1' = 'value1',
          'name2' = 'value2'
        )
        LOCATION 'oss://mock_endpoint/mock_bucket/mock_path/'
        """).strip())
Ejemplo n.º 7
0
    def testPivotTable(self):
        data = [['name1', 1, 1.0, True], ['name1', 1, 5.0, True],
                ['name1', 2, 2.0, True], ['name2', 1, 3.0, False],
                ['name2', 3, 4.0, False]]

        table_name = tn('pyodps_test_mixed_engine_pivot_table')
        self.odps.delete_table(table_name, if_exists=True)
        table = self.odps.create_table(
            name=table_name,
            schema=Schema.from_lists(
                ['name', 'id', 'fid', 'ismale'],
                ['string', 'bigint', 'double', 'boolean']))
        expr = DataFrame(table)
        try:
            self.odps.write_table(table, 0, data)

            expr1 = expr.pivot_table(rows='name', values='fid')
            res = self.engine.execute(expr1)
            result = self._get_result(res)

            expected = [
                ['name1', 8.0 / 3],
                ['name2', 3.5],
            ]
            self.assertEqual(sorted(result), sorted(expected))

            expr2 = expr.pivot_table(rows='name',
                                     values='fid',
                                     aggfunc=['mean', 'sum'])
            res = self.engine.execute(expr2)
            result = self._get_result(res)

            expected = [
                ['name1', 8.0 / 3, 8.0],
                ['name2', 3.5, 7.0],
            ]
            self.assertEqual(res.schema.names, ['name', 'fid_mean', 'fid_sum'])
            self.assertEqual(sorted(result), sorted(expected))

            expr3 = expr.pivot_table(rows='id',
                                     values='fid',
                                     columns='name',
                                     fill_value=0).distinct()
            res = self.engine.execute(expr3)
            result = self._get_result(res)

            expected = [[1, 3.0, 3.0], [2, 2.0, 0], [3, 0, 4.0]]

            self.assertEqual(res.schema.names,
                             ['id', 'name1_fid_mean', 'name2_fid_mean'])
            self.assertEqual(result, expected)

            class Agg(object):
                def buffer(self):
                    return [0]

                def __call__(self, buffer, val):
                    buffer[0] += val

                def merge(self, buffer, pbuffer):
                    buffer[0] += pbuffer[0]

                def getvalue(self, buffer):
                    return buffer[0]

            aggfuncs = OrderedDict([('my_sum', Agg), ('mean', 'mean')])
            expr4 = expr.pivot_table(rows='id',
                                     values='fid',
                                     columns='name',
                                     fill_value=0,
                                     aggfunc=aggfuncs)
            res = self.engine.execute(expr4)
            result = self._get_result(res)

            expected = [[1, 6.0, 3.0, 3.0, 3.0], [2, 2.0, 0, 2.0, 0],
                        [3, 0, 4.0, 0, 4.0]]

            self.assertEqual(res.schema.names, [
                'id', 'name1_fid_my_sum', 'name2_fid_my_sum', 'name1_fid_mean',
                'name2_fid_mean'
            ])
            self.assertEqual(result, expected)
        finally:
            table.drop()
Ejemplo n.º 8
0
    def testPivotTable(self):
        from odps.df.expr.dynamic import DynamicMixin

        expr = self.expr.pivot_table(values='int8', rows='float32')
        self.assertNotIsInstance(expr, DynamicMixin)
        self.assertEqual(expr.schema.names, ['float32', 'int8_mean'])

        expr = self.expr.pivot_table(values=('int16', 'int32'),
                                     rows=['float32', 'int8'])
        self.assertEqual(expr.schema.names,
                         ['float32', 'int8', 'int16_mean', 'int32_mean'])

        expr = self.expr.pivot_table(values=('int16', 'int32'),
                                     rows=['string', 'boolean'],
                                     aggfunc=['mean', 'sum'])
        self.assertEqual(expr.schema.names, [
            'string', 'boolean', 'int16_mean', 'int32_mean', 'int16_sum',
            'int32_sum'
        ])
        self.assertEqual(expr.schema.types, [
            types.string, types.boolean, types.float64, types.float64,
            types.int16, types.int32
        ])

        @output(['my_mean'], ['float'])
        class Aggregator(object):
            def buffer(self):
                return [0.0, 0]

            def __call__(self, buffer, val):
                buffer[0] += val
                buffer[1] += 1

            def merge(self, buffer, pbuffer):
                buffer[0] += pbuffer[0]
                buffer[1] += pbuffer[1]

            def getvalue(self, buffer):
                if buffer[1] == 0:
                    return 0.0
                return buffer[0] / buffer[1]

        expr = self.expr.pivot_table(values='int16',
                                     rows='string',
                                     aggfunc=Aggregator)
        self.assertEqual(expr.schema.names, ['string', 'int16_my_mean'])
        self.assertEqual(expr.schema.types, [types.string, types.float64])

        aggfunc = OrderedDict([('my_agg', Aggregator),
                               ('my_agg2', Aggregator)])

        expr = self.expr.pivot_table(values='int16',
                                     rows='string',
                                     aggfunc=aggfunc)
        self.assertEqual(expr.schema.names,
                         ['string', 'int16_my_agg', 'int16_my_agg2'])
        self.assertEqual(expr.schema.types,
                         [types.string, types.float64, types.float64])

        expr = self.expr.pivot_table(values='int16',
                                     columns='boolean',
                                     rows='string')
        self.assertIsInstance(expr, DynamicMixin)
Ejemplo n.º 9
0
    from odps.sqlalchemy_odps import update_test_setting

    _ONE_ROW_COMPLEX_CONTENTS = [
        True,
        127,
        32767,
        2147483647,
        9223372036854775807,
        0.5,
        0.25,
        'a string',
        pd.Timestamp(1970, 1, 1, 8),
        b'123',
        [1, 2],
        OrderedDict({
            1: 2,
            3: 4
        }),
        OrderedDict({
            "a": 1,
            "b": 2
        }),
        decimal.Decimal('0.1'),
    ]
except ImportError:
    dependency_installed = False
else:
    dependency_installed = True


def create_one_row(o):
    table = 'one_row'