Beispiel #1
0
 def __init__(self, schema: Schema, spec: PartitionSpec, physical_partition_no: int):
     self._orig_schema = schema
     self._key_index = [schema.index_of_key(key) for key in spec.partition_by]
     self._schema = schema.extract(spec.partition_by)
     self._physical_partition_no = physical_partition_no
     # the following will be set by the framework
     self._row: List[Any] = []
     self._partition_no = 0
     self._slice_no = 0
Beispiel #2
0
    def get_partitioner(self, schema: Schema) -> SchemaedDataPartitioner:
        """Get :class:`~triad.utils.pyarrow.SchemaedDataPartitioner` by input
        dataframe schema

        :param schema: the dataframe schema this partition spec to operate on
        :return: SchemaedDataPartitioner object
        """
        pos = [schema.index_of_key(key) for key in self.partition_by]
        return SchemaedDataPartitioner(
            schema.pa_schema,
            pos,
            sizer=None,
            row_limit=self._row_limit,
            size_limit=self._size_limit,
        )