コード例 #1
0
ファイル: index.py プロジェクト: ForeverWintr/static-frame
    def _extract_labels(mapping: tp.Optional[tp.Dict[tp.Hashable, int]],
                        labels: tp.Iterable[tp.Hashable],
                        dtype: tp.Optional[np.dtype] = None) -> np.ndarray:
        '''Derive labels, a cache of the mapping keys in a sequence type (either an ndarray or a list).

        If the labels passed at instantiation are an ndarray, they are used after immutable filtering. Otherwise, the mapping keys are used to create an ndarray.

        This method is overridden in the derived class.

        Args:
            mapping: Can be None if loc_is_iloc.
            labels: might be an expired Generator, but if it is an immutable ndarray, we can use it without a copy.
        '''
        # pre-fetching labels for faster get_item construction
        if isinstance(labels, np.ndarray):
            if dtype is not None and dtype != labels.dtype:
                raise ErrorInitIndex('invalid label dtype for this Index')
            return immutable_filter(labels)

        if hasattr(labels, '__len__'):  # not a generator, not an array
            # resolving the dtype is expensive, pass if possible
            if len(labels) == 0:  #type: ignore
                labels = EMPTY_ARRAY
            else:
                labels, _ = iterable_to_array_1d(labels, dtype=dtype)
        else:  # labels may be an expired generator, must use the mapping
            if len(mapping) == 0:  #type: ignore
                labels = EMPTY_ARRAY
            else:
                labels, _ = iterable_to_array_1d(mapping,
                                                 dtype=dtype)  #type: ignore
        # all arrays are immutable
        # assert labels.flags.writeable == False
        return labels
コード例 #2
0
    def test_iterable_to_array_a(self, array: np.ndarray) -> None:
        values = array.tolist()
        post, _ = util.iterable_to_array_1d(values)
        self.assertAlmostEqualValues(post, values)

        # explicitly giving object dtype
        post, _ = util.iterable_to_array_1d(values, dtype=util.DTYPE_OBJECT)
        self.assertAlmostEqualValues(post, values)
コード例 #3
0
    def to_series_values(
        self,
        values: tp.Iterator[tp.Any],
        *,
        dtype: DtypeSpecifier,
        name: NameType = None,
        index_constructor: tp.Optional[IndexConstructor] = None,
        axis: int = 0,
    ) -> 'Series':
        from static_frame.core.series import Series

        # Creating a Series that will have the same index as source container
        if self._container._NDIM == 2 and axis == 0:
            index = self._container._columns  #type: ignore
            own_index = False
        else:
            index = self._container._index
            own_index = True

        if index_constructor is not None:
            index = index_constructor(index)

        # PERF: passing count here permits faster generator realization
        values, _ = iterable_to_array_1d(
            values,
            count=index.shape[0],
            dtype=dtype,
        )
        return Series(
            values,
            name=name,
            index=index,
            own_index=own_index,
        )
コード例 #4
0
 def gen() -> tp.Iterator[np.ndarray]:
     for b, dtype in zip(arrays, dtypes):
         if dtype is None:
             array, _ = iterable_to_array_1d(b)
         else:
             array = np.array(b, dtype=dtype)
         array.flags.writeable = False
         yield array
コード例 #5
0
 def to_index_labels(
     self,
     values: tp.Iterator[tp.Hashable],  #pylint: disable=function-redefined
     dtype: DtypeSpecifier = None,
     name: NameType = None,
     index_constructor: tp.Optional[IndexConstructor] = None,
 ) -> np.ndarray:
     # NOTE: name argument is for common interface
     if index_constructor is not None:
         raise RuntimeError(
             'index_constructor not supported with this interface')
     # PERF: passing count here permits faster generator realization
     shape = self._container.shape
     array, _ = iterable_to_array_1d(values, count=shape[0], dtype=dtype)
     return array
コード例 #6
0
    def to_index(labels: tp.Iterable[tp.Hashable],
            *,
            default_constructor: tp.Type[IndexBase],
            name: NameType = None,
            ) -> IndexBase:
        '''Create and return the ``Index`` based on the array ``dtype``
        '''
        from static_frame.core.index_datetime import dtype_to_index_cls

        if labels.__class__ is not np.ndarray:
            # we can assume that this is 1D; returns an immutable array
            labels, _ = iterable_to_array_1d(labels)

        return dtype_to_index_cls(
                static=default_constructor.STATIC,
                dtype=labels.dtype)(labels, name=name) #type: ignore
コード例 #7
0
def array_from_value_iter(
        key: tp.Hashable,
        idx: int,
        get_value_iter: tp.Callable[[tp.Hashable], tp.Iterator[tp.Any]],
        get_col_dtype: tp.Optional[tp.Callable[[int], np.dtype]],
        row_count: int,
        ) -> np.ndarray:
    '''
    Return a single array given keys and collections.

    Args:
        get_value_iter: Iterator of a values
        dtypes: if an
        key: hashable for looking up field in `get_value_iter`.
        idx: integer position to extract from dtypes
    '''
    # for each column, try to get a dtype, or None
    if get_col_dtype is None:
        dtype = None
    else: # dtype returned here can be None.
        dtype = get_col_dtype(idx)
        # if this value is None we cannot tell if it was explicitly None or just was not specified

    # NOTE: shown to be faster to try fromiter in some performance tests
    # values, _ = iterable_to_array_1d(get_value_iter(key), dtype=dtype)

    values = None
    if dtype is not None:
        try:
            values = np.fromiter(
                    get_value_iter(key),
                    count=row_count,
                    dtype=dtype)
            values.flags.writeable = False
        except (ValueError, TypeError):
            # the dtype may not be compatible, so must fall back on using np.array to determine the type, i.e., ValueError: cannot convert float NaN to integer
            pass
    if values is None:
        # returns an immutable array
        values, _ = iterable_to_array_1d(
                get_value_iter(key),
                dtype=dtype
                )
    return values
コード例 #8
0
    def iloc_searchsorted(self,
            values: tp.Any,
            *,
            side_left: bool = True,
            ) -> tp.Union[tp.Hashable, tp.Iterable[tp.Hashable]]:
        '''
        {doc}

        Args:
            {values}
            {side_left}
        '''
        if not isinstance(values, str) and hasattr(values, '__len__'):
            if not values.__class__ is np.ndarray:
                values, _ = iterable_to_array_1d(values)
        return np.searchsorted(self.values, #type: ignore [no-any-return]
                values,
                'left' if side_left else 'right',
                )
コード例 #9
0
ファイル: index.py プロジェクト: adamczykm/static-frame
    def _extract_labels(
            mapping,
            labels,
            dtype: tp.Optional[np.dtype] = None
            ) -> np.ndarray:
        '''Derive labels, a cache of the mapping keys in a sequence type (either an ndarray or a list).

        If the labels passed at instantiation are an ndarray, they are used after immutable filtering. Otherwise, the mapping keys are used to create an ndarray.

        This method is overridden in the derived class.

        Args:
            labels: might be an expired Generator, but if it is an immutable ndarray, we can use it without a copy.
        '''
        # pre-fetching labels for faster get_item construction
        if isinstance(labels, np.ndarray):
            if dtype is not None and dtype != labels.dtype:
                raise RuntimeError('invalid label dtype for this Index')
            return immutable_filter(labels)

        if hasattr(labels, '__len__'): # not a generator, not an array
            # resolving the detype is expensive, pass if possible
            labels, _ = iterable_to_array_1d(labels, dtype=dtype)

        else: # labels may be an expired generator, must use the mapping

            # NOTE: explore why this does not work
            # if dtype is None:
            #     labels = np.array(list(mapping.keys()), dtype=object)
            # else:
            #     labels = np.fromiter(mapping.keys(), count=len(mapping), dtype=dtype)

            labels_len = len(mapping)
            if labels_len == 0:
                labels = EMPTY_ARRAY
            else:
                labels = np.empty(labels_len, dtype=dtype if dtype else object)
                for k, v in mapping.items():
                    labels[v] = k

        labels.flags.writeable = False
        return labels
コード例 #10
0
def pivot_items_to_frame(
    *,
    blocks: TypeBlocks,
    group_fields_iloc: tp.Iterable[tp.Hashable],
    group_depth: int,
    data_field_iloc: tp.Hashable,
    func_single: tp.Optional[UFunc],
    frame_cls: tp.Type['Frame'],
    name: NameType,
    dtype: np.dtype,
    index_constructor: IndexConstructor,
    columns_constructor: IndexConstructor,
    kind: str,
) -> 'Frame':
    '''
    Specialized generator of pairs for when we have only one data_field and one function.
    This version returns a Frame.
    '''

    from static_frame.core.series import Series
    group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[
        0]  #type: ignore

    if func_single:
        labels = []
        values = []
        for label, _, v in blocks.group_extract(
                axis=0,
                key=group_key,
                extract=data_field_iloc,
                kind=kind,
        ):
            labels.append(label)
            values.append(func_single(v))

        if dtype is None:
            array, _ = iterable_to_array_1d(values, count=len(values))
        else:
            array = np.array(values, dtype=dtype)
        array.flags.writeable = False
        index = index_constructor(labels)
        return frame_cls.from_elements(
            array,
            index=index,
            own_index=True,
            columns=(name, ),
            columns_constructor=columns_constructor,
        )
    # func_no scenario
    if group_depth == 1:
        index = index_constructor(blocks._extract_array_column(group_key))
    else:
        index = index_constructor(
            tuple(label)
            for label in blocks._extract_array(column_key=group_key))

    array = blocks._extract_array_column(data_field_iloc)
    return frame_cls.from_elements(
        array,
        index=index,
        own_index=True,
        columns=(name, ),
        columns_constructor=columns_constructor,
    )
コード例 #11
0
def pivot_records_items_to_blocks(
    *,
    blocks: TypeBlocks,
    group_fields_iloc: tp.Iterable[tp.Hashable],
    group_depth: int,
    data_fields_iloc: tp.Iterable[tp.Hashable],
    func_single: tp.Optional[UFunc],
    func_map: tp.Sequence[tp.Tuple[tp.Hashable, UFunc]],
    func_no: bool,
    fill_value: tp.Any,
    fill_value_dtype: np.dtype,
    index_outer: 'IndexBase',
    dtypes: tp.Tuple[tp.Optional[np.dtype]],
    kind: str,
) -> tp.List[np.ndarray]:
    '''
    Given a Frame and pivot parameters, perform the group by ont he group_fields and within each group,
    '''
    # NOTE: this delivers results by label, row for use in a Frame.from_records_items constructor

    group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[
        0]  #type: ignore
    arrays: tp.List[tp.Union[tp.List[tp.Any], np.ndarray]] = []
    for dtype in dtypes:
        if dtype is None:
            # we can use fill_value here, as either it will be completely replaced (and not effect dtype evaluation) or be needed (and already there)
            arrays.append([fill_value] * len(index_outer))
        else:
            arrays.append(np.empty(len(index_outer), dtype=dtype))

    # try to use the dtype specified; fill values at end if necessary
    # collect all possible ilocs, and remove as observerd; if any remain, we have fill targets
    iloc_not_found: tp.Set[int] = set(range(len(index_outer)))
    # each group forms a row, each label a value in the index
    for label, _, part in blocks.group(axis=0, key=group_key, kind=kind):
        iloc: int = index_outer._loc_to_iloc(label)  #type: ignore
        iloc_not_found.remove(iloc)
        if func_no:
            if len(part) != 1:
                raise RuntimeError(
                    'pivot requires aggregation of values; provide a `func` argument.'
                )
            for arrays_key, column_key in enumerate(data_fields_iloc):
                # this is equivalent to extracting a row, but doing so would force a type consolidation
                arrays[arrays_key][iloc] = part._extract(0, column_key)
        elif func_single:
            for arrays_key, column_key in enumerate(data_fields_iloc):
                arrays[arrays_key][iloc] = func_single(
                    part._extract_array_column(column_key))
        else:
            arrays_key = 0
            for column_key in data_fields_iloc:
                values = part._extract_array_column(column_key)
                for _, func in func_map:
                    arrays[arrays_key][iloc] = func(values)
                    arrays_key += 1

    if iloc_not_found:
        # we did not fill all arrrays and have values that need to be filled
        # order does not matter
        fill_targets = list(iloc_not_found)
        # mutate in place then make immutable
        for arrays_key in range(len(arrays)):  #pylint: disable=C0200
            array = arrays[arrays_key]
            if not array.__class__ is np.ndarray:  # a list
                array, _ = iterable_to_array_1d(array, count=len(index_outer))
                arrays[arrays_key] = array  # restore new array
            else:
                dtype_resolved = resolve_dtype(
                    array.dtype, fill_value_dtype)  # type: ignore
                if array.dtype != dtype_resolved:  # type: ignore
                    array = array.astype(dtype_resolved)  #type: ignore
                    array[fill_targets] = fill_value
                    arrays[arrays_key] = array  # re-assign new array
            array.flags.writeable = False  # type: ignore
    else:
        for arrays_key in range(len(arrays)):  #pylint: disable=C0200
            array = arrays[arrays_key]
            if not array.__class__ is np.ndarray:  # a list
                array, _ = iterable_to_array_1d(array, count=len(index_outer))
                arrays[arrays_key] = array  # re-assign new array
            array.flags.writeable = False
    return arrays
コード例 #12
0
 def test_iterable_to_array_b(self, labels: tp.Iterable[tp.Any]) -> None:
     post, _ = util.iterable_to_array_1d(labels)
     self.assertAlmostEqualValues(post, labels)
     self.assertTrue(isinstance(post, np.ndarray))