Python TypeBlocksの例、static_frame.core.type_blocks.TypeBlocks Pythonの例

コード例 #1

0

ファイルを表示

ファイル: store_hdf5.py プロジェクト: adamczykm/static-frame

    def read(
        self,
        label: tp.Optional[str] = None,
        *,
        config: tp.Optional[StoreConfig] = None,
        container_type: tp.Type[Frame] = Frame,
    ) -> Frame:
        '''
        Args:
            {dtypes}
        '''
        import tables

        if config is None:
            config = StoreConfig()  # get default
        if config.dtypes:
            raise NotImplementedError(
                'using config.dtypes on HDF5 not yet supported')

        index_depth = config.index_depth
        columns_depth = config.columns_depth

        index_arrays = []
        columns_labels = []

        with tables.open_file(self._fp, mode='r') as file:
            table = file.get_node(f'/{label}')
            colnames = table.cols._v_colnames

            def blocks() -> tp.Iterator[np.ndarray]:
                for col_idx, colname in enumerate(colnames):

                    # can also do: table.read(field=colname)
                    array = table.col(colname)

                    if array.dtype.kind in DTYPE_STR_KIND:
                        array = array.astype(str)
                    array.flags.writeable = False

                    if col_idx < index_depth:
                        index_arrays.append(array)
                        continue
                    # only store column labels for those yielded
                    columns_labels.append(colname)
                    yield array

            if config.consolidate_blocks:
                data = TypeBlocks.from_blocks(
                    TypeBlocks.consolidate_blocks(blocks()))
            else:
                data = TypeBlocks.from_blocks(blocks())

        return container_type._from_data_index_arrays_column_labels(
            data=data,
            index_depth=index_depth,
            index_arrays=index_arrays,
            columns_depth=columns_depth,
            columns_labels=columns_labels,
            name=tp.cast(tp.Hashable, label)  # not sure why this is necessary
        )

コード例 #2

0

ファイルを表示

ファイル: store_hdf5.py プロジェクト: vishalbelsare/static-frame

    def read_many(self,
            labels: tp.Iterable[tp.Hashable],
            *,
            config: StoreConfigMapInitializer = None,
            container_type: tp.Type[Frame] = Frame,
            ) -> tp.Iterator[Frame]:
        import tables
        config_map = StoreConfigMap.from_initializer(config)

        with tables.open_file(self._fp, mode='r') as file:
            for label in labels:
                c = config_map[label]
                label_encoded = config_map.default.label_encode(label)

                index_depth = c.index_depth
                index_constructors = c.index_constructors
                columns_depth = c.columns_depth
                columns_constructors = c.columns_constructors
                consolidate_blocks = c.consolidate_blocks
                if c.dtypes:
                    raise NotImplementedError('using config.dtypes on HDF5 not yet supported')

                index_arrays = []
                columns_labels = []

                table = file.get_node(f'/{label_encoded}')
                colnames = table.cols._v_colnames

                def blocks() -> tp.Iterator[np.ndarray]:
                    for col_idx, colname in enumerate(colnames):
                        # can also do: table.read(field=colname)
                        array = table.col(colname)
                        if array.dtype.kind in DTYPE_STR_KINDS:
                            array = array.astype(str)
                        array.flags.writeable = False

                        if col_idx < index_depth:
                            index_arrays.append(array)
                            continue
                        # only store column labels for those yielded
                        columns_labels.append(colname)
                        yield array

                if consolidate_blocks:
                    data = TypeBlocks.from_blocks(TypeBlocks.consolidate_blocks(blocks()))
                else:
                    data = TypeBlocks.from_blocks(blocks())

                # this will own_data in subsequent constructor call
                yield container_type._from_data_index_arrays_column_labels(
                        data=data,
                        index_depth=index_depth,
                        index_arrays=index_arrays,
                        index_constructors=index_constructors,
                        columns_depth=columns_depth,
                        columns_labels=columns_labels,
                        columns_constructors=columns_constructors,
                        name=label,
                        )

コード例 #3

0

ファイルを表示

    def to_type_blocks(self) -> TypeBlocks:
        '''
        Provide a correctly typed TypeBlocks representation.
        '''
        depth_count = self.depth
        if depth_count == 0:
            return TypeBlocks.from_zero_size_shape()

        return TypeBlocks.from_blocks(
            self.values_at_depth(d) for d in range(depth_count))

コード例 #4

0

ファイルを表示

ファイル: index_level.py プロジェクト: admdev8/static-frame

    def to_type_blocks(self) -> TypeBlocks:
        '''
        Provide a correctly typed TypeBlocks representation.
        '''
        try:
            depth_count = self.depth
        except StopIteration:
            # assume we have no depth or length
            return TypeBlocks.from_zero_size_shape()

        return TypeBlocks.from_blocks(
            self.values_at_depth(d) for d in range(depth_count))

コード例 #5

0

ファイルを表示

    def _index_decode(
        *,
        archive: Archive,
        metadata: tp.Dict[str, tp.Any],
        key_template_values: str,
        key_types: str,
        depth: int,
        cls_index: tp.Type['IndexBase'],
        name: NameType,
    ) -> tp.Optional['IndexBase']:
        '''Build index or columns.
        '''
        from static_frame.core.type_blocks import TypeBlocks

        if key_template_values.format(0) not in archive.labels:
            index = None
        elif depth == 1:
            index = cls_index(
                archive.read_array(key_template_values.format(0)),
                name=name,
            )
        else:
            index_tb = TypeBlocks.from_blocks(
                archive.read_array(key_template_values.format(i))
                for i in range(depth))
            index_constructors = [
                ContainerMap.str_to_cls(name) for name in metadata[key_types]
            ]
            index = cls_index._from_type_blocks(
                index_tb,  # type: ignore
                name=name,
                index_constructors=index_constructors,
            )
        return index

コード例 #6

0

ファイルを表示

            def blocks() -> tp.Iterator[np.ndarray]:
                type_blocks = []
                previous_f: tp.Optional[Frame] = None
                block_compatible = True
                reblock_compatible = True

                for f in frames:
                    if len(f.columns) != len(columns) or (f.columns !=
                                                          columns).any():
                        f = f.reindex(columns=columns, fill_value=fill_value)

                    type_blocks.append(f._blocks)
                    # column size is all the same by this point
                    if previous_f is not None:  # after the first
                        if block_compatible:
                            block_compatible &= f._blocks.block_compatible(
                                previous_f._blocks,
                                axis=1)  # only compare columns
                        if reblock_compatible:
                            reblock_compatible &= f._blocks.reblock_compatible(
                                previous_f._blocks)
                    previous_f = f

                yield from TypeBlocks.vstack_blocks_to_blocks(
                    type_blocks=type_blocks,
                    block_compatible=block_compatible,
                    reblock_compatible=reblock_compatible,
                )

コード例 #7

0

ファイルを表示

ファイル: pivot.py プロジェクト: vishalbelsare/static-frame

def pivot_items(
    blocks: TypeBlocks,
    group_fields_iloc: tp.Iterable[tp.Hashable],
    group_depth: int,
    data_field_iloc: tp.Hashable,
    func_single: UFunc,
) -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Any]]:
    '''
    Specialized generator of pairs for when we hae only one data_field and one function.
    '''
    group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[
        0]  #type: ignore

    for label, _, sub in blocks.group(axis=0, key=group_key):
        # label = group if take_group else group[0]
        # will always be first
        values = sub._extract_array_column(data_field_iloc)
        yield label, func_single(values)

コード例 #8

0

ファイルを表示

ファイル: pivot.py プロジェクト: vishalbelsare/static-frame

def pivot_records_items(
    blocks: TypeBlocks, group_fields_iloc: tp.Iterable[tp.Hashable],
    group_depth: int, data_fields_iloc: tp.Iterable[tp.Hashable],
    func_single: tp.Optional[UFunc],
    func_map: tp.Sequence[tp.Tuple[tp.Hashable, UFunc]]
) -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Sequence[tp.Any]]]:
    '''
    Given a Frame and pivot parameters, perform the group by ont he group_fields and within each group,
    '''
    # NOTE: this delivers results by label row for use in a Frame.from_records_items constructor
    # take_group_index = group_depth > 1
    # columns_loc_to_iloc = frame.columns._loc_to_iloc

    group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[
        0]  #type: ignore
    record_size = len(data_fields_iloc) * (1 if func_single else len(func_map))
    record: tp.List[tp.Any]

    for label, _, part in blocks.group(axis=0, key=group_key):
        # label = group_index if take_group_index else group_index[0]
        record = [None] * record_size  # This size can be pre allocated,
        pos = 0

        if func_single:
            for column_key in data_fields_iloc:
                values = part._extract_array_column(column_key)
                record[pos] = func_single(values)
                pos += 1
        else:
            for column_key in data_fields_iloc:
                values = part._extract_array_column(column_key)
                for _, func in func_map:
                    record[pos] = func(values)
                    pos += 1

        yield label, record

コード例 #9

0

ファイルを表示