Beispiel #1
0
def from_proto(field_type):
    """
    Creates the corresponding :class:`Coder` given the protocol representation of the field type.

    :param field_type: the protocol representation of the field type
    :return: :class:`Coder`
    """
    field_type_name = field_type.type_name
    coder = _type_name_mappings.get(field_type_name)
    if coder is not None:
        return coder
    if field_type_name == type_name.ROW:
        return RowCoder([from_proto(f.type) for f in field_type.row_schema.fields])
    if field_type_name == type_name.TIMESTAMP:
        return TimestampCoder(field_type.timestamp_info.precision)
    if field_type_name == type_name.LOCAL_ZONED_TIMESTAMP:
        timezone = pytz.timezone(pipeline_options.view_as(DebugOptions).lookup_experiment(
            "table.exec.timezone"))
        return LocalZonedTimestampCoder(field_type.local_zoned_timestamp_info.precision, timezone)
    elif field_type_name == type_name.ARRAY:
        return ArrayCoder(from_proto(field_type.collection_element_type))
    elif field_type_name == type_name.MAP:
        return MapCoder(from_proto(field_type.map_info.key_type),
                        from_proto(field_type.map_info.value_type))
    elif field_type_name == type_name.DECIMAL:
        return DecimalCoder(field_type.decimal_info.precision,
                            field_type.decimal_info.scale)
    else:
        raise ValueError("field_type %s is not supported." % field_type)
Beispiel #2
0
    def _pickle_from_runner_api_parameter(schema_proto, unused_components,
                                          unused_context):
        def _to_arrow_schema(row_type):
            return pa.schema([
                pa.field(n, to_arrow_type(t), t._nullable)
                for n, t in zip(row_type.field_names(), row_type.field_types())
            ])

        def _to_data_type(field):
            if field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.TINYINT:
                return TinyIntType(field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.SMALLINT:
                return SmallIntType(field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.INT:
                return IntType(field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.BIGINT:
                return BigIntType(field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.BOOLEAN:
                return BooleanType(field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.FLOAT:
                return FloatType(field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.DOUBLE:
                return DoubleType(field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.VARCHAR:
                return VarCharType(0x7fffffff, field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.VARBINARY:
                return VarBinaryType(0x7fffffff, field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.DECIMAL:
                return DecimalType(field.type.decimal_info.precision,
                                   field.type.decimal_info.scale,
                                   field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.DATE:
                return DateType(field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.TIME:
                return TimeType(field.type.time_info.precision,
                                field.type.nullable)
            elif field.type.type_name == \
                    flink_fn_execution_pb2.Schema.TypeName.LOCAL_ZONED_TIMESTAMP:
                return LocalZonedTimestampType(
                    field.type.local_zoned_timestamp_info.precision,
                    field.type.nullable)
            elif field.type.type_name == flink_fn_execution_pb2.Schema.TypeName.TIMESTAMP:
                return TimestampType(field.type.timestamp_info.precision,
                                     field.type.nullable)
            else:
                raise ValueError("field_type %s is not supported." %
                                 field.type)

        def _to_row_type(row_schema):
            return RowType([
                RowField(f.name, _to_data_type(f)) for f in row_schema.fields
            ])

        timezone = pytz.timezone(
            pipeline_options.view_as(DebugOptions).lookup_experiment(
                "table.exec.timezone"))
        row_type = _to_row_type(schema_proto)
        return ArrowCoder(_to_arrow_schema(row_type), row_type, timezone)