예제 #1
0
    def loc_searchsorted(
        self,
        values: tp.Any,
        *,
        side_left: bool = True,
        fill_value: tp.Any = np.nan,
    ) -> tp.Union[tp.Hashable, tp.Iterable[tp.Hashable]]:
        '''
        {doc}

        Args:
            {values}
            {side_left}
            {fill_value}
        '''
        sel = self.iloc_searchsorted(values, side_left=side_left)

        length = self.__len__()
        if sel.ndim == 0 and sel == length:  # an element:
            return fill_value  #type: ignore [no-any-return]

        mask = sel == length
        if not mask.any():
            return self.values[sel]  #type: ignore [no-any-return]

        post = np.empty(len(sel),
                        dtype=resolve_dtype(self.dtype,
                                            dtype_from_element(fill_value)))
        sel[mask] = 0  # set out of range values to zero
        post[:] = self.values[sel]
        post[mask] = fill_value
        post.flags.writeable = False
        return post  #type: ignore [no-any-return]
예제 #2
0
def pivot_index_map(
    *,
    index_src: IndexBase,
    depth_level: DepthLevelSpecifier,
    dtypes_src: tp.Optional[tp.Sequence[np.dtype]],
) -> PivotIndexMap:
    '''
    Args:
        dtypes_src: must be of length equal to axis
    '''
    # We are always moving levels from one axis to another; after application, the expanded axis will always be hierarchical, while the contracted axis may or may not be. From the contract axis, we need to divide the depths into two categories: targets (the depths to be moved and added to expand axis) and groups (unique combinations that remain on the contract axis after removing targets).

    # Unique target labels are added to labels on the expand axis; unique group labels become the new contract axis.

    target_select = np.full(index_src.depth, False)
    target_select[depth_level] = True
    group_select = ~target_select

    group_arrays = []
    target_arrays = []
    for i, v in enumerate(target_select):
        if v:
            target_arrays.append(index_src.values_at_depth(i))
        else:
            group_arrays.append(index_src.values_at_depth(i))

    group_depth = len(group_arrays)
    target_depth = len(target_arrays)
    group_to_dtype: tp.Dict[tp.Optional[tp.Hashable], np.dtype] = {}
    targets_unique: tp.Iterable[tp.Hashable]

    if group_depth == 0:
        # targets must be a tuple
        group_to_target_map = {
            None: {v: idx
                   for idx, v in enumerate(zip(*target_arrays))}
        }
        targets_unique = [k for k in group_to_target_map[None]]
        if dtypes_src is not None:
            group_to_dtype[None] = resolve_dtype_iter(dtypes_src)
    else:
        group_to_target_map = defaultdict(dict)
        targets_unique = dict()  # Store targets in order observed

        for axis_idx, (group, target, dtype) in enumerate(
                zip(
                    zip(*group_arrays),  # get tuples of len 1 to depth
                    zip(*target_arrays),
                    (dtypes_src if dtypes_src is not None else repeat(None)),
                )):
            if group_depth == 1:
                group = group[0]
            # targets are transfered labels; groups are the new columns
            group_to_target_map[group][target] = axis_idx
            targets_unique[target] = None  #type: ignore

            if dtypes_src is not None:
                if group in group_to_dtype:
                    group_to_dtype[group] = resolve_dtype(
                        group_to_dtype[group], dtype)
                else:
                    group_to_dtype[group] = dtype

    return PivotIndexMap(  #pylint: disable=E1120
        targets_unique=targets_unique,
        target_depth=target_depth,
        target_select=target_select,
        group_to_target_map=group_to_target_map,  #type: ignore
        group_depth=group_depth,
        group_select=group_select,
        group_to_dtype=group_to_dtype)
예제 #3
0
def pivot_records_items_to_blocks(
    *,
    blocks: TypeBlocks,
    group_fields_iloc: tp.Iterable[tp.Hashable],
    group_depth: int,
    data_fields_iloc: tp.Iterable[tp.Hashable],
    func_single: tp.Optional[UFunc],
    func_map: tp.Sequence[tp.Tuple[tp.Hashable, UFunc]],
    func_no: bool,
    fill_value: tp.Any,
    fill_value_dtype: np.dtype,
    index_outer: 'IndexBase',
    dtypes: tp.Tuple[tp.Optional[np.dtype]],
    kind: str,
) -> tp.List[np.ndarray]:
    '''
    Given a Frame and pivot parameters, perform the group by ont he group_fields and within each group,
    '''
    # NOTE: this delivers results by label, row for use in a Frame.from_records_items constructor

    group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[
        0]  #type: ignore
    arrays: tp.List[tp.Union[tp.List[tp.Any], np.ndarray]] = []
    for dtype in dtypes:
        if dtype is None:
            # we can use fill_value here, as either it will be completely replaced (and not effect dtype evaluation) or be needed (and already there)
            arrays.append([fill_value] * len(index_outer))
        else:
            arrays.append(np.empty(len(index_outer), dtype=dtype))

    # try to use the dtype specified; fill values at end if necessary
    # collect all possible ilocs, and remove as observerd; if any remain, we have fill targets
    iloc_not_found: tp.Set[int] = set(range(len(index_outer)))
    # each group forms a row, each label a value in the index
    for label, _, part in blocks.group(axis=0, key=group_key, kind=kind):
        iloc: int = index_outer._loc_to_iloc(label)  #type: ignore
        iloc_not_found.remove(iloc)
        if func_no:
            if len(part) != 1:
                raise RuntimeError(
                    'pivot requires aggregation of values; provide a `func` argument.'
                )
            for arrays_key, column_key in enumerate(data_fields_iloc):
                # this is equivalent to extracting a row, but doing so would force a type consolidation
                arrays[arrays_key][iloc] = part._extract(0, column_key)
        elif func_single:
            for arrays_key, column_key in enumerate(data_fields_iloc):
                arrays[arrays_key][iloc] = func_single(
                    part._extract_array_column(column_key))
        else:
            arrays_key = 0
            for column_key in data_fields_iloc:
                values = part._extract_array_column(column_key)
                for _, func in func_map:
                    arrays[arrays_key][iloc] = func(values)
                    arrays_key += 1

    if iloc_not_found:
        # we did not fill all arrrays and have values that need to be filled
        # order does not matter
        fill_targets = list(iloc_not_found)
        # mutate in place then make immutable
        for arrays_key in range(len(arrays)):  #pylint: disable=C0200
            array = arrays[arrays_key]
            if not array.__class__ is np.ndarray:  # a list
                array, _ = iterable_to_array_1d(array, count=len(index_outer))
                arrays[arrays_key] = array  # restore new array
            else:
                dtype_resolved = resolve_dtype(
                    array.dtype, fill_value_dtype)  # type: ignore
                if array.dtype != dtype_resolved:  # type: ignore
                    array = array.astype(dtype_resolved)  #type: ignore
                    array[fill_targets] = fill_value
                    arrays[arrays_key] = array  # re-assign new array
            array.flags.writeable = False  # type: ignore
    else:
        for arrays_key in range(len(arrays)):  #pylint: disable=C0200
            array = arrays[arrays_key]
            if not array.__class__ is np.ndarray:  # a list
                array, _ = iterable_to_array_1d(array, count=len(index_outer))
                arrays[arrays_key] = array  # re-assign new array
            array.flags.writeable = False
    return arrays
예제 #4
0
def pivot_items_to_block(
    *,
    blocks: TypeBlocks,
    group_fields_iloc: tp.Iterable[tp.Hashable],
    group_depth: int,
    data_field_iloc: tp.Hashable,
    func_single: tp.Optional[UFunc],
    dtype: tp.Optional[np.dtype],
    fill_value: tp.Any,
    fill_value_dtype: np.dtype,
    index_outer: 'IndexBase',
    kind: str,
) -> np.ndarray:
    '''
    Specialized generator of pairs for when we have only one data_field and one function.
    '''
    from static_frame.core.series import Series
    group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[
        0]  #type: ignore

    if func_single and dtype is not None:
        array = np.full(
            len(index_outer),
            fill_value,
            dtype=resolve_dtype(dtype, fill_value_dtype),
        )
        for label, _, values in blocks.group_extract(
                axis=0,
                key=group_key,
                extract=data_field_iloc,
                kind=kind,
        ):
            array[index_outer._loc_to_iloc(label)] = func_single(values)
        array.flags.writeable = False
        return array

    if func_single and dtype is None:

        def gen() -> tp.Iterator[tp.Tuple[int, tp.Any]]:
            for label, _, values in blocks.group_extract(
                    axis=0,
                    key=group_key,
                    extract=data_field_iloc,
                    kind=kind,
            ):
                yield index_outer._loc_to_iloc(label), func_single(values)

        post = Series.from_items(gen())
        if len(post) == len(index_outer):
            array = np.empty(len(index_outer), dtype=post.dtype)
        else:
            array = np.full(
                len(index_outer),
                fill_value,
                dtype=resolve_dtype(post.dtype, fill_value_dtype),
            )
        array[post.index.values] = post.values
        array.flags.writeable = False
        return array

    # func_no scenario as no mapping here
    if group_depth == 1:
        labels = [
            index_outer._loc_to_iloc(label)
            for label in blocks._extract_array_column(group_key)
        ]
    else:
        # NOTE: might replace _extract_array_column with an iterator of tuples
        labels = [
            index_outer._loc_to_iloc(tuple(label))
            for label in blocks._extract_array(column_key=group_key)
        ]

    values = blocks._extract_array_column(data_field_iloc)
    if len(values) == len(index_outer):
        array = np.empty(len(index_outer), dtype=dtype)
    else:
        array = np.full(
            len(index_outer),
            fill_value,
            dtype=resolve_dtype(values.dtype, fill_value_dtype),
        )
    array[labels] = values
    array.flags.writeable = False
    return array
예제 #5
0
    def test_resolve_dtype(self, dtype_pair: tp.Tuple[np.dtype,
                                                      np.dtype]) -> None:

        x = resolve_dtype(*dtype_pair)
        self.assertTrue(isinstance(x, np.dtype))