Example #1
0
 def decode_from_stream(self, in_stream, nested):
     row_kind_value, fields = self._decode_one_row_from_stream(
         in_stream, nested)
     row = Row(*fields)
     row.set_field_names(self.field_names)
     row.set_row_kind(RowKind(row_kind_value))
     return row
Example #2
0
def convert_to_python_obj(data, type_info):
    if type_info == Types.PICKLED_BYTE_ARRAY():
        return pickle.loads(data)
    elif isinstance(type_info, ExternalTypeInfo):
        return convert_to_python_obj(data, type_info._type_info)
    else:
        gateway = get_gateway()
        pickle_bytes = gateway.jvm.PythonBridgeUtils. \
            getPickledBytesFromJavaObject(data, type_info.get_java_type_info())
        if isinstance(type_info, RowTypeInfo) or isinstance(
                type_info, TupleTypeInfo):
            field_data = zip(list(pickle_bytes[1:]),
                             type_info.get_field_types())
            fields = []
            for data, field_type in field_data:
                if len(data) == 0:
                    fields.append(None)
                else:
                    fields.append(
                        pickled_bytes_to_python_converter(data, field_type))
            if isinstance(type_info, RowTypeInfo):
                return Row.of_kind(
                    RowKind(int.from_bytes(pickle_bytes[0], 'little')),
                    *fields)
            else:
                return tuple(fields)
        else:
            return pickled_bytes_to_python_converter(pickle_bytes, type_info)
Example #3
0
    def to_internal(self, value) -> IN:
        if value is None:
            return None

        row = Row()
        row._values = [self._field_data_converters[i].to_internal(item)
                       for i, item in enumerate(value[1])]
        row.set_field_names(self._field_names)
        row.set_row_kind(RowKind(value[0]))

        return row
Example #4
0
    def to_internal(self, value) -> IN:
        if value is None:
            return None

        self._reuse_row._values = [
            self._field_data_converters[i].to_internal(item)
            for i, item in enumerate(value[1])
        ]
        self._reuse_row.set_row_kind(RowKind(value[0]))

        return self._reuse_row
Example #5
0
def pickled_bytes_to_python_converter(data, field_type):
    if isinstance(field_type, RowTypeInfo):
        row_kind = RowKind(int.from_bytes(data[0], 'little'))
        data = zip(list(data[1:]), field_type.get_field_types())
        fields = []
        for d, d_type in data:
            fields.append(pickled_bytes_to_python_converter(d, d_type))
        row = Row.of_kind(row_kind, *fields)
        return row
    else:
        data = pickle.loads(data)
        if field_type == Types.SQL_TIME():
            seconds, microseconds = divmod(data, 10**6)
            minutes, seconds = divmod(seconds, 60)
            hours, minutes = divmod(minutes, 60)
            return datetime.time(hours, minutes, seconds, microseconds)
        elif field_type == Types.SQL_DATE():
            return field_type.from_internal_type(data)
        elif field_type == Types.SQL_TIMESTAMP():
            return field_type.from_internal_type(int(data.timestamp() * 10**6))
        elif field_type == Types.FLOAT():
            return field_type.from_internal_type(ast.literal_eval(data))
        elif isinstance(
                field_type,
            (BasicArrayTypeInfo, PrimitiveArrayTypeInfo, ObjectArrayTypeInfo)):
            element_type = field_type._element_type
            elements = []
            for element_bytes in data:
                elements.append(
                    pickled_bytes_to_python_converter(element_bytes,
                                                      element_type))
            return elements
        elif isinstance(field_type, MapTypeInfo):
            key_type = field_type._key_type_info
            value_type = field_type._value_type_info
            zip_kv = zip(data[0], data[1])
            return dict((pickled_bytes_to_python_converter(k, key_type),
                         pickled_bytes_to_python_converter(v, value_type))
                        for k, v in zip_kv)
        elif isinstance(field_type, ListTypeInfo):
            element_type = field_type.elem_type
            elements = []
            for element_bytes in data:
                elements.append(
                    pickled_bytes_to_python_converter(element_bytes,
                                                      element_type))
            return elements
        else:
            return field_type.from_internal_type(data)
Example #6
0
    def decode_from_stream(self, in_stream, length=0) -> Row:
        row_kind_and_null_mask = self._mask_utils.read_mask(in_stream)
        fields = [None if row_kind_and_null_mask[idx + ROW_KIND_BIT_SIZE] else
                  self._field_coders[idx].decode_from_stream(in_stream)
                  for idx in range(0, self._field_count)]

        # compute the row_kind value
        row_kind_value = 0
        for i in range(ROW_KIND_BIT_SIZE):
            row_kind_value += int(row_kind_and_null_mask[i]) * 2 ** i

        row = Row(*fields)
        row.set_field_names(self._field_names)
        row.set_row_kind(RowKind(row_kind_value))
        return row