Exemple #1
0
    def from_pandas(cls,
                    value,
                    *,
                    own_data: bool = False,
                    own_index: bool = False) -> 'Series':
        '''Given a Pandas Series, return a Series.

        Args:
            own_data: If True, the underlying NumPy data array will be made immutable and used without a copy.
            own_index: If True, the underlying NumPy index label array will be made immutable and used without a copy.

        Returns:
            :py:class:`static_frame.Series`
        '''
        if own_data:
            data = value.values
            data.flags.writeable = False
        else:
            data = immutable_filter(value.values)

        if own_index:
            index = value.index.values
            index.flags.writeable = False
        else:
            index = immutable_filter(value.index.values)

        # index is already managed, can own
        return cls(data, index=index)
Exemple #2
0
    def __init__(self,
                 iterable: tp.Union[np.ndarray, tp.List[object]],
                 *,
                 own_iterable: bool = False) -> None:
        '''
        Args:
            own_iterable: flag iterable as ownable by this instance.
        '''

        self._dtype = object  # only object arrays are supported

        if isinstance(iterable, np.ndarray):
            if own_iterable:
                self._array = iterable
                self._array.flags.writeable = False
            else:
                self._array = immutable_filter(iterable)
            if self._array.dtype != self._dtype:
                raise NotImplementedError('only object arrays are supported')
            self._recache = False
            self._array_mutable = None
        else:  # assume it is a list or listable
            self._array = None
            self._recache = True
            # always call list to get new object, or realize a generator
            if own_iterable:
                self._array_mutable = iterable
            else:
                self._array_mutable = list(iterable)
    def __init__(self, iterable, *, dtype=object, own_iterable=False):
        '''
        Args:
            own_iterable: flag iterable as ownable by this instance.
        '''

        self._dtype = dtype

        if isinstance(iterable, np.ndarray):
            if own_iterable:
                self._array = iterable
                self._array.flags.writeable = False
            else:
                self._array = immutable_filter(iterable)
            assert self._array.dtype == self._dtype
            self._recache = False
            self._array_mutable = None
        else:
            self._array = None
            self._recache = True
            # always call list to get new object, or realize a generator
            if own_iterable:
                self._array_mutable = iterable
            else:
                self._array_mutable = list(iterable)
Exemple #4
0
    def from_pandas(cls,
            value,
            *,
            own_data: bool = False) -> 'Series':
        '''Given a Pandas Series, return a Series.

        Args:
            value: Pandas Series.
            {own_data}
            {own_index}

        Returns:
            :py:class:`static_frame.Series`
        '''
        if own_data:
            data = value.values
            data.flags.writeable = False
        else:
            data = immutable_filter(value.values)

        return cls(data,
                index=IndexBase.from_pandas(value.index),
                name=value.name,
                own_index=True
                )
Exemple #5
0
    def _extract_labels(mapping: tp.Optional[tp.Dict[tp.Hashable, int]],
                        labels: tp.Iterable[tp.Hashable],
                        dtype: tp.Optional[np.dtype] = None) -> np.ndarray:
        '''Derive labels, a cache of the mapping keys in a sequence type (either an ndarray or a list).

        If the labels passed at instantiation are an ndarray, they are used after immutable filtering. Otherwise, the mapping keys are used to create an ndarray.

        This method is overridden in the derived class.

        Args:
            mapping: Can be None if loc_is_iloc.
            labels: might be an expired Generator, but if it is an immutable ndarray, we can use it without a copy.
        '''
        # pre-fetching labels for faster get_item construction
        if isinstance(labels, np.ndarray):
            if dtype is not None and dtype != labels.dtype:
                raise ErrorInitIndex('invalid label dtype for this Index')
            return immutable_filter(labels)

        if hasattr(labels, '__len__'):  # not a generator, not an array
            # resolving the dtype is expensive, pass if possible
            if len(labels) == 0:  #type: ignore
                labels = EMPTY_ARRAY
            else:
                labels, _ = iterable_to_array_1d(labels, dtype=dtype)
        else:  # labels may be an expired generator, must use the mapping
            if len(mapping) == 0:  #type: ignore
                labels = EMPTY_ARRAY
            else:
                labels, _ = iterable_to_array_1d(mapping,
                                                 dtype=dtype)  #type: ignore
        # all arrays are immutable
        # assert labels.flags.writeable == False
        return labels
Exemple #6
0
 def _extract_positions(mapping, positions):
     # positions is either None or an ndarray
     if isinstance(positions,
                   np.ndarray):  # if an np array can handle directly
         return immutable_filter(positions)
     positions = np.arange(len(mapping))
     positions.flags.writeable = False
     return positions
Exemple #7
0
    def _extract_positions(
            mapping,
            positions: tp.Iterable[int]):
        # positions is either None or an ndarray
        if isinstance(positions, np.ndarray): # if an np array can handle directly
            return immutable_filter(positions)

        return PositionsAllocator.get(len(mapping))
Exemple #8
0
    def _extract_labels(
            mapping,
            labels,
            dtype: tp.Optional[np.dtype] = None
            ) -> np.ndarray:
        '''Derive labels, a cache of the mapping keys in a sequence type (either an ndarray or a list).

        If the labels passed at instantiation are an ndarray, they are used after immutable filtering. Otherwise, the mapping keys are used to create an ndarray.

        This method is overridden in the derived class.

        Args:
            labels: might be an expired Generator, but if it is an immutable ndarray, we can use it without a copy.
        '''
        # pre-fetching labels for faster get_item construction
        if isinstance(labels, np.ndarray):
            if dtype is not None and dtype != labels.dtype:
                raise RuntimeError('invalid label dtype for this Index')
            return immutable_filter(labels)

        if hasattr(labels, '__len__'): # not a generator, not an array
            # resolving the detype is expensive, pass if possible
            labels, _ = iterable_to_array_1d(labels, dtype=dtype)

        else: # labels may be an expired generator, must use the mapping

            # NOTE: explore why this does not work
            # if dtype is None:
            #     labels = np.array(list(mapping.keys()), dtype=object)
            # else:
            #     labels = np.fromiter(mapping.keys(), count=len(mapping), dtype=dtype)

            labels_len = len(mapping)
            if labels_len == 0:
                labels = EMPTY_ARRAY
            else:
                labels = np.empty(labels_len, dtype=dtype if dtype else object)
                for k, v in mapping.items():
                    labels[v] = k

        labels.flags.writeable = False
        return labels
Exemple #9
0
    def _extract_labels(
            mapping,
            labels,
            dtype=None) -> tp.Tuple[tp.Iterable[int], tp.Iterable[tp.Any]]:
        '''Derive labels, a cache of the mapping keys in a sequence type (either an ndarray or a list).

        If the labels passed at instantiation are an ndarray, they are used after immutable filtering. Otherwise, the mapping keys are used to create an ndarray.

        This method is overridden in the derived class.

        Args:
            labels: might be an expired Generator, but if it is an immutable ndarray, we can use it without a copy.
        '''
        # pre-fetching labels for faster get_item construction
        if isinstance(labels, np.ndarray): # if an np array can handle directly
            if dtype is not None and dtype != labels.dtype:
                raise RuntimeError('invalid label dtype for this Index')
            return immutable_filter(labels)

        if hasattr(labels, '__len__'): # not a generator, not an array
            if not len(labels):
                return EMPTY_ARRAY # already immutable

            if isinstance(labels[0], tuple):
                assert dtype is None or dtype == object
                array = np.empty(len(labels), object)
                array[:] = labels
                labels = array # rename
            else:
                labels = np.array(labels, dtype)
        else: # labels may be an expired generator
            # until all Python dictionaries are ordered, we cannot just take keys()
            # labels = np.array(tuple(mapping.keys()))
            # assume object type so as to not create a temporary list
            labels = np.empty(len(mapping), dtype=dtype if dtype else object)
            for k, v in mapping.items():
                labels[v] = k

        labels.flags.writeable = False
        return labels
Exemple #10
0
    def __init__(self,
                 values: SeriesInitializer,
                 *,
                 index: IndexInitializer = None,
                 dtype: DtypeSpecifier = None,
                 own_index: bool = False) -> None:
        #-----------------------------------------------------------------------
        # values assignment

        values_constructor = None  # if deferred

        # expose .values directly as it is immutable
        if not isinstance(values, np.ndarray):
            if isinstance(values, dict):
                # not sure if we should sort; not sure what to do if index is provided
                if index is not None:
                    raise Exception(
                        'cannot create a Series from a dictionary when an index is defined'
                    )
                index = []

                def values_gen():
                    for k, v in _dict_to_sorted_items(values):
                        # populate index as side effect of iterating values
                        index.append(k)
                        yield v

                if dtype and dtype != object:
                    # fromiter does not work with object types
                    self.values = np.fromiter(values_gen(),
                                              dtype=dtype,
                                              count=len(values))
                else:
                    self.values = np.array(tuple(values_gen()), dtype=dtype)
                self.values.flags.writeable = False

            # NOTE: not sure if we need to check __iter__ here
            elif (dtype and dtype != object and dtype != str
                  and hasattr(values, '__iter__')
                  and hasattr(values, '__len__')):
                self.values = np.fromiter(values,
                                          dtype=dtype,
                                          count=len(values))
                self.values.flags.writeable = False
            elif hasattr(values, '__len__') and not isinstance(values, str):
                self.values = np.array(values, dtype=dtype)
                self.values.flags.writeable = False
            elif hasattr(values, '__next__'):  # a generator-like
                self.values = np.array(tuple(values), dtype=dtype)
                self.values.flags.writeable = False
            else:  # it must be a single item
                # we cannot create the values until we realize the index, which might be hierarchical and not have final size equal to length
                def values_constructor(shape):
                    self.values = np.full(shape, values, dtype=dtype)
                    self.values.flags.writeable = False
        else:  # is numpy
            if dtype is not None and dtype != values.dtype:
                # what to do here?
                raise Exception(
                    'when supplying values via array, the dtype argument is not necessary; if provided, it must agree with the dtype of the array'
                )
            if values.shape == ():  # handle special case of NP element

                def values_constructor(shape):
                    self.values = np.repeat(values, shape)
                    self.values.flags.writeable = False
            else:
                self.values = immutable_filter(values)

        #-----------------------------------------------------------------------
        # index assignment
        # NOTE: this generally must be done after values assignment, as from_items needs a values generator to be exhausted before looking to values

        if index is None or (hasattr(index, '__len__') and len(index) == 0):
            # create an integer index
            self._index = Index(range(len(self.values)), loc_is_iloc=True)
        elif own_index:
            self._index = index
        elif hasattr(index, 'STATIC'):
            if index.STATIC:
                self._index = index
            else:
                raise Exception(
                    'non-static index cannot be assigned to Series')
        else:  # let index handle instantiation
            if isinstance(index, (Index, IndexHierarchy)):
                # call with the class of the passed-in index, in case it is hierarchical
                self._index = index.__class__(index)
            else:
                self._index = Index(index)

        shape = self._index.__len__()

        if values_constructor:
            values_constructor(shape)  # updates self.values

        if len(self.values) != shape:
            raise Exception('values and index do not match length')
Exemple #11
0
 def _extract_positions(
         size: int, positions: tp.Optional[tp.Sequence[int]]) -> np.ndarray:
     # positions is either None or an ndarray
     if isinstance(positions, np.ndarray):
         return immutable_filter(positions)
     return PositionsAllocator.get(size)