コード例 #1
0
ファイル: pivot.py プロジェクト: vishalbelsare/static-frame
def pivot_items(
    blocks: TypeBlocks,
    group_fields_iloc: tp.Iterable[tp.Hashable],
    group_depth: int,
    data_field_iloc: tp.Hashable,
    func_single: UFunc,
) -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Any]]:
    '''
    Specialized generator of pairs for when we hae only one data_field and one function.
    '''
    group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[
        0]  #type: ignore

    for label, _, sub in blocks.group(axis=0, key=group_key):
        # label = group if take_group else group[0]
        # will always be first
        values = sub._extract_array_column(data_field_iloc)
        yield label, func_single(values)
コード例 #2
0
ファイル: pivot.py プロジェクト: vishalbelsare/static-frame
def pivot_records_items(
    blocks: TypeBlocks, group_fields_iloc: tp.Iterable[tp.Hashable],
    group_depth: int, data_fields_iloc: tp.Iterable[tp.Hashable],
    func_single: tp.Optional[UFunc],
    func_map: tp.Sequence[tp.Tuple[tp.Hashable, UFunc]]
) -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Sequence[tp.Any]]]:
    '''
    Given a Frame and pivot parameters, perform the group by ont he group_fields and within each group,
    '''
    # NOTE: this delivers results by label row for use in a Frame.from_records_items constructor
    # take_group_index = group_depth > 1
    # columns_loc_to_iloc = frame.columns._loc_to_iloc

    group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[
        0]  #type: ignore
    record_size = len(data_fields_iloc) * (1 if func_single else len(func_map))
    record: tp.List[tp.Any]

    for label, _, part in blocks.group(axis=0, key=group_key):
        # label = group_index if take_group_index else group_index[0]
        record = [None] * record_size  # This size can be pre allocated,
        pos = 0

        if func_single:
            for column_key in data_fields_iloc:
                values = part._extract_array_column(column_key)
                record[pos] = func_single(values)
                pos += 1
        else:
            for column_key in data_fields_iloc:
                values = part._extract_array_column(column_key)
                for _, func in func_map:
                    record[pos] = func(values)
                    pos += 1

        yield label, record
コード例 #3
0
def pivot_records_items_to_frame(
    *,
    blocks: TypeBlocks,
    group_fields_iloc: tp.Iterable[tp.Hashable],
    group_depth: int,
    data_fields_iloc: tp.Iterable[tp.Hashable],
    func_single: tp.Optional[UFunc],
    func_map: tp.Sequence[tp.Tuple[tp.Hashable, UFunc]],
    func_no: bool,
    kind: str,
    columns_constructor: IndexConstructor,
    columns: tp.List[tp.Hashable],
    index_constructor: IndexConstructor,
    dtypes: tp.Tuple[tp.Optional[np.dtype]],
    frame_cls: tp.Type['Frame'],
) -> 'Frame':
    '''
    Given a Frame and pivot parameters, perform the group by ont he group_fields and within each group,
    '''
    group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[
        0]  #type: ignore
    record_size = len(data_fields_iloc) * (1 if (func_single or func_no) else
                                           len(func_map))

    index_labels = []
    arrays: tp.List[tp.List[tp.Any]] = [list() for _ in range(record_size)]

    for label, _, part in blocks.group(axis=0, key=group_key, kind=kind):
        index_labels.append(label)
        if func_no:
            if len(part) != 1:
                raise RuntimeError(
                    'pivot requires aggregation of values; provide a `func` argument.'
                )
            for i, column_key in enumerate(data_fields_iloc):
                arrays[i].append(part._extract(0, column_key))
        elif func_single:
            for i, column_key in enumerate(data_fields_iloc):
                arrays[i].append(
                    func_single(part._extract_array_column(column_key)))
        else:
            i = 0
            for column_key in data_fields_iloc:
                values = part._extract_array_column(column_key)
                for _, func in func_map:
                    arrays[i].append(func(values))
                    i += 1

    def gen() -> tp.Iterator[np.ndarray]:
        for b, dtype in zip(arrays, dtypes):
            if dtype is None:
                array, _ = iterable_to_array_1d(b)
            else:
                array = np.array(b, dtype=dtype)
            array.flags.writeable = False
            yield array

    tb = TypeBlocks.from_blocks(gen())
    return frame_cls(
        tb,
        index=index_constructor(index_labels),
        columns=columns_constructor(columns),
        own_data=True,
        own_index=True,
        own_columns=True,
    )
コード例 #4
0
def pivot_records_items_to_blocks(
    *,
    blocks: TypeBlocks,
    group_fields_iloc: tp.Iterable[tp.Hashable],
    group_depth: int,
    data_fields_iloc: tp.Iterable[tp.Hashable],
    func_single: tp.Optional[UFunc],
    func_map: tp.Sequence[tp.Tuple[tp.Hashable, UFunc]],
    func_no: bool,
    fill_value: tp.Any,
    fill_value_dtype: np.dtype,
    index_outer: 'IndexBase',
    dtypes: tp.Tuple[tp.Optional[np.dtype]],
    kind: str,
) -> tp.List[np.ndarray]:
    '''
    Given a Frame and pivot parameters, perform the group by ont he group_fields and within each group,
    '''
    # NOTE: this delivers results by label, row for use in a Frame.from_records_items constructor

    group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[
        0]  #type: ignore
    arrays: tp.List[tp.Union[tp.List[tp.Any], np.ndarray]] = []
    for dtype in dtypes:
        if dtype is None:
            # we can use fill_value here, as either it will be completely replaced (and not effect dtype evaluation) or be needed (and already there)
            arrays.append([fill_value] * len(index_outer))
        else:
            arrays.append(np.empty(len(index_outer), dtype=dtype))

    # try to use the dtype specified; fill values at end if necessary
    # collect all possible ilocs, and remove as observerd; if any remain, we have fill targets
    iloc_not_found: tp.Set[int] = set(range(len(index_outer)))
    # each group forms a row, each label a value in the index
    for label, _, part in blocks.group(axis=0, key=group_key, kind=kind):
        iloc: int = index_outer._loc_to_iloc(label)  #type: ignore
        iloc_not_found.remove(iloc)
        if func_no:
            if len(part) != 1:
                raise RuntimeError(
                    'pivot requires aggregation of values; provide a `func` argument.'
                )
            for arrays_key, column_key in enumerate(data_fields_iloc):
                # this is equivalent to extracting a row, but doing so would force a type consolidation
                arrays[arrays_key][iloc] = part._extract(0, column_key)
        elif func_single:
            for arrays_key, column_key in enumerate(data_fields_iloc):
                arrays[arrays_key][iloc] = func_single(
                    part._extract_array_column(column_key))
        else:
            arrays_key = 0
            for column_key in data_fields_iloc:
                values = part._extract_array_column(column_key)
                for _, func in func_map:
                    arrays[arrays_key][iloc] = func(values)
                    arrays_key += 1

    if iloc_not_found:
        # we did not fill all arrrays and have values that need to be filled
        # order does not matter
        fill_targets = list(iloc_not_found)
        # mutate in place then make immutable
        for arrays_key in range(len(arrays)):  #pylint: disable=C0200
            array = arrays[arrays_key]
            if not array.__class__ is np.ndarray:  # a list
                array, _ = iterable_to_array_1d(array, count=len(index_outer))
                arrays[arrays_key] = array  # restore new array
            else:
                dtype_resolved = resolve_dtype(
                    array.dtype, fill_value_dtype)  # type: ignore
                if array.dtype != dtype_resolved:  # type: ignore
                    array = array.astype(dtype_resolved)  #type: ignore
                    array[fill_targets] = fill_value
                    arrays[arrays_key] = array  # re-assign new array
            array.flags.writeable = False  # type: ignore
    else:
        for arrays_key in range(len(arrays)):  #pylint: disable=C0200
            array = arrays[arrays_key]
            if not array.__class__ is np.ndarray:  # a list
                array, _ = iterable_to_array_1d(array, count=len(index_outer))
                arrays[arrays_key] = array  # re-assign new array
            array.flags.writeable = False
    return arrays