def setUpClass(cls):
     super(PandasConversionTestBase, cls).setUpClass()
     cls.data = [(1, 1, 1, 1, True, 1.1, 1.2, 'hello', bytearray(b"aaa"),
                  decimal.Decimal('1000000000000000000.01'), datetime.date(2014, 9, 13),
                  datetime.time(hour=1, minute=0, second=1),
                  datetime.datetime(1970, 1, 1, 0, 0, 0, 123000), ['hello', '中文'],
                  Row(a=1, b='hello', c=datetime.datetime(1970, 1, 1, 0, 0, 0, 123000),
                      d=[1, 2])),
                 (1, 2, 2, 2, False, 2.1, 2.2, 'world', bytearray(b"bbb"),
                  decimal.Decimal('1000000000000000000.02'), datetime.date(2014, 9, 13),
                  datetime.time(hour=1, minute=0, second=1),
                  datetime.datetime(1970, 1, 1, 0, 0, 0, 123000), ['hello', '中文'],
                  Row(a=1, b='hello', c=datetime.datetime(1970, 1, 1, 0, 0, 0, 123000),
                      d=[1, 2]))]
     cls.data_type = DataTypes.ROW(
         [DataTypes.FIELD("f1", DataTypes.TINYINT()),
          DataTypes.FIELD("f2", DataTypes.SMALLINT()),
          DataTypes.FIELD("f3", DataTypes.INT()),
          DataTypes.FIELD("f4", DataTypes.BIGINT()),
          DataTypes.FIELD("f5", DataTypes.BOOLEAN()),
          DataTypes.FIELD("f6", DataTypes.FLOAT()),
          DataTypes.FIELD("f7", DataTypes.DOUBLE()),
          DataTypes.FIELD("f8", DataTypes.STRING()),
          DataTypes.FIELD("f9", DataTypes.BYTES()),
          DataTypes.FIELD("f10", DataTypes.DECIMAL(38, 18)),
          DataTypes.FIELD("f11", DataTypes.DATE()),
          DataTypes.FIELD("f12", DataTypes.TIME()),
          DataTypes.FIELD("f13", DataTypes.TIMESTAMP(3)),
          DataTypes.FIELD("f14", DataTypes.ARRAY(DataTypes.STRING())),
          DataTypes.FIELD("f15", DataTypes.ROW(
              [DataTypes.FIELD("a", DataTypes.INT()),
               DataTypes.FIELD("b", DataTypes.STRING()),
               DataTypes.FIELD("c", DataTypes.TIMESTAMP(3)),
               DataTypes.FIELD("d", DataTypes.ARRAY(DataTypes.INT()))]))], False)
     cls.pdf = cls.create_pandas_data_frame()
Beispiel #2
0
def _create_orc_basic_row_and_data() -> Tuple[RowType, RowTypeInfo, List[Row]]:
    row_type = DataTypes.ROW([
        DataTypes.FIELD('char', DataTypes.CHAR(10)),
        DataTypes.FIELD('varchar', DataTypes.VARCHAR(10)),
        DataTypes.FIELD('bytes', DataTypes.BYTES()),
        DataTypes.FIELD('boolean', DataTypes.BOOLEAN()),
        DataTypes.FIELD('decimal', DataTypes.DECIMAL(2, 0)),
        DataTypes.FIELD('int', DataTypes.INT()),
        DataTypes.FIELD('bigint', DataTypes.BIGINT()),
        DataTypes.FIELD('double', DataTypes.DOUBLE()),
        DataTypes.FIELD('date', DataTypes.DATE().bridged_to('java.sql.Date')),
        DataTypes.FIELD('timestamp', DataTypes.TIMESTAMP(3).bridged_to('java.sql.Timestamp')),
    ])
    row_type_info = Types.ROW_NAMED(
        ['char', 'varchar', 'bytes', 'boolean', 'decimal', 'int', 'bigint', 'double',
         'date', 'timestamp'],
        [Types.STRING(), Types.STRING(), Types.PRIMITIVE_ARRAY(Types.BYTE()), Types.BOOLEAN(),
         Types.BIG_DEC(), Types.INT(), Types.LONG(), Types.DOUBLE(), Types.SQL_DATE(),
         Types.SQL_TIMESTAMP()]
    )
    data = [Row(
        char='char',
        varchar='varchar',
        bytes=b'varbinary',
        boolean=True,
        decimal=Decimal(1.5),
        int=2147483647,
        bigint=-9223372036854775808,
        double=2e-308,
        date=date(1970, 1, 1),
        timestamp=datetime(1970, 1, 2, 3, 4, 5, 600000),
    )]
    return row_type, row_type_info, data
Beispiel #3
0
    def test_basic_type(self):
        test_types = [DataTypes.STRING(),
                      DataTypes.BOOLEAN(),
                      DataTypes.BYTES(),
                      DataTypes.TINYINT(),
                      DataTypes.SMALLINT(),
                      DataTypes.INT(),
                      DataTypes.BIGINT(),
                      DataTypes.FLOAT(),
                      DataTypes.DOUBLE(),
                      DataTypes.DATE(),
                      DataTypes.TIME(),
                      DataTypes.TIMESTAMP(3)]

        java_types = [_to_java_type(item) for item in test_types]

        converted_python_types = [_from_java_type(item) for item in java_types]

        self.assertEqual(test_types, converted_python_types)