def from_pandas(cls, value, *, own_data: bool = False, own_index: bool = False) -> 'Series': '''Given a Pandas Series, return a Series. Args: own_data: If True, the underlying NumPy data array will be made immutable and used without a copy. own_index: If True, the underlying NumPy index label array will be made immutable and used without a copy. Returns: :py:class:`static_frame.Series` ''' if own_data: data = value.values data.flags.writeable = False else: data = immutable_filter(value.values) if own_index: index = value.index.values index.flags.writeable = False else: index = immutable_filter(value.index.values) # index is already managed, can own return cls(data, index=index)
def __init__(self, iterable: tp.Union[np.ndarray, tp.List[object]], *, own_iterable: bool = False) -> None: ''' Args: own_iterable: flag iterable as ownable by this instance. ''' self._dtype = object # only object arrays are supported if isinstance(iterable, np.ndarray): if own_iterable: self._array = iterable self._array.flags.writeable = False else: self._array = immutable_filter(iterable) if self._array.dtype != self._dtype: raise NotImplementedError('only object arrays are supported') self._recache = False self._array_mutable = None else: # assume it is a list or listable self._array = None self._recache = True # always call list to get new object, or realize a generator if own_iterable: self._array_mutable = iterable else: self._array_mutable = list(iterable)
def __init__(self, iterable, *, dtype=object, own_iterable=False): ''' Args: own_iterable: flag iterable as ownable by this instance. ''' self._dtype = dtype if isinstance(iterable, np.ndarray): if own_iterable: self._array = iterable self._array.flags.writeable = False else: self._array = immutable_filter(iterable) assert self._array.dtype == self._dtype self._recache = False self._array_mutable = None else: self._array = None self._recache = True # always call list to get new object, or realize a generator if own_iterable: self._array_mutable = iterable else: self._array_mutable = list(iterable)
def from_pandas(cls, value, *, own_data: bool = False) -> 'Series': '''Given a Pandas Series, return a Series. Args: value: Pandas Series. {own_data} {own_index} Returns: :py:class:`static_frame.Series` ''' if own_data: data = value.values data.flags.writeable = False else: data = immutable_filter(value.values) return cls(data, index=IndexBase.from_pandas(value.index), name=value.name, own_index=True )
def _extract_labels(mapping: tp.Optional[tp.Dict[tp.Hashable, int]], labels: tp.Iterable[tp.Hashable], dtype: tp.Optional[np.dtype] = None) -> np.ndarray: '''Derive labels, a cache of the mapping keys in a sequence type (either an ndarray or a list). If the labels passed at instantiation are an ndarray, they are used after immutable filtering. Otherwise, the mapping keys are used to create an ndarray. This method is overridden in the derived class. Args: mapping: Can be None if loc_is_iloc. labels: might be an expired Generator, but if it is an immutable ndarray, we can use it without a copy. ''' # pre-fetching labels for faster get_item construction if isinstance(labels, np.ndarray): if dtype is not None and dtype != labels.dtype: raise ErrorInitIndex('invalid label dtype for this Index') return immutable_filter(labels) if hasattr(labels, '__len__'): # not a generator, not an array # resolving the dtype is expensive, pass if possible if len(labels) == 0: #type: ignore labels = EMPTY_ARRAY else: labels, _ = iterable_to_array_1d(labels, dtype=dtype) else: # labels may be an expired generator, must use the mapping if len(mapping) == 0: #type: ignore labels = EMPTY_ARRAY else: labels, _ = iterable_to_array_1d(mapping, dtype=dtype) #type: ignore # all arrays are immutable # assert labels.flags.writeable == False return labels
def _extract_positions(mapping, positions): # positions is either None or an ndarray if isinstance(positions, np.ndarray): # if an np array can handle directly return immutable_filter(positions) positions = np.arange(len(mapping)) positions.flags.writeable = False return positions
def _extract_positions( mapping, positions: tp.Iterable[int]): # positions is either None or an ndarray if isinstance(positions, np.ndarray): # if an np array can handle directly return immutable_filter(positions) return PositionsAllocator.get(len(mapping))
def _extract_labels( mapping, labels, dtype: tp.Optional[np.dtype] = None ) -> np.ndarray: '''Derive labels, a cache of the mapping keys in a sequence type (either an ndarray or a list). If the labels passed at instantiation are an ndarray, they are used after immutable filtering. Otherwise, the mapping keys are used to create an ndarray. This method is overridden in the derived class. Args: labels: might be an expired Generator, but if it is an immutable ndarray, we can use it without a copy. ''' # pre-fetching labels for faster get_item construction if isinstance(labels, np.ndarray): if dtype is not None and dtype != labels.dtype: raise RuntimeError('invalid label dtype for this Index') return immutable_filter(labels) if hasattr(labels, '__len__'): # not a generator, not an array # resolving the detype is expensive, pass if possible labels, _ = iterable_to_array_1d(labels, dtype=dtype) else: # labels may be an expired generator, must use the mapping # NOTE: explore why this does not work # if dtype is None: # labels = np.array(list(mapping.keys()), dtype=object) # else: # labels = np.fromiter(mapping.keys(), count=len(mapping), dtype=dtype) labels_len = len(mapping) if labels_len == 0: labels = EMPTY_ARRAY else: labels = np.empty(labels_len, dtype=dtype if dtype else object) for k, v in mapping.items(): labels[v] = k labels.flags.writeable = False return labels
def _extract_labels( mapping, labels, dtype=None) -> tp.Tuple[tp.Iterable[int], tp.Iterable[tp.Any]]: '''Derive labels, a cache of the mapping keys in a sequence type (either an ndarray or a list). If the labels passed at instantiation are an ndarray, they are used after immutable filtering. Otherwise, the mapping keys are used to create an ndarray. This method is overridden in the derived class. Args: labels: might be an expired Generator, but if it is an immutable ndarray, we can use it without a copy. ''' # pre-fetching labels for faster get_item construction if isinstance(labels, np.ndarray): # if an np array can handle directly if dtype is not None and dtype != labels.dtype: raise RuntimeError('invalid label dtype for this Index') return immutable_filter(labels) if hasattr(labels, '__len__'): # not a generator, not an array if not len(labels): return EMPTY_ARRAY # already immutable if isinstance(labels[0], tuple): assert dtype is None or dtype == object array = np.empty(len(labels), object) array[:] = labels labels = array # rename else: labels = np.array(labels, dtype) else: # labels may be an expired generator # until all Python dictionaries are ordered, we cannot just take keys() # labels = np.array(tuple(mapping.keys())) # assume object type so as to not create a temporary list labels = np.empty(len(mapping), dtype=dtype if dtype else object) for k, v in mapping.items(): labels[v] = k labels.flags.writeable = False return labels
def __init__(self, values: SeriesInitializer, *, index: IndexInitializer = None, dtype: DtypeSpecifier = None, own_index: bool = False) -> None: #----------------------------------------------------------------------- # values assignment values_constructor = None # if deferred # expose .values directly as it is immutable if not isinstance(values, np.ndarray): if isinstance(values, dict): # not sure if we should sort; not sure what to do if index is provided if index is not None: raise Exception( 'cannot create a Series from a dictionary when an index is defined' ) index = [] def values_gen(): for k, v in _dict_to_sorted_items(values): # populate index as side effect of iterating values index.append(k) yield v if dtype and dtype != object: # fromiter does not work with object types self.values = np.fromiter(values_gen(), dtype=dtype, count=len(values)) else: self.values = np.array(tuple(values_gen()), dtype=dtype) self.values.flags.writeable = False # NOTE: not sure if we need to check __iter__ here elif (dtype and dtype != object and dtype != str and hasattr(values, '__iter__') and hasattr(values, '__len__')): self.values = np.fromiter(values, dtype=dtype, count=len(values)) self.values.flags.writeable = False elif hasattr(values, '__len__') and not isinstance(values, str): self.values = np.array(values, dtype=dtype) self.values.flags.writeable = False elif hasattr(values, '__next__'): # a generator-like self.values = np.array(tuple(values), dtype=dtype) self.values.flags.writeable = False else: # it must be a single item # we cannot create the values until we realize the index, which might be hierarchical and not have final size equal to length def values_constructor(shape): self.values = np.full(shape, values, dtype=dtype) self.values.flags.writeable = False else: # is numpy if dtype is not None and dtype != values.dtype: # what to do here? raise Exception( 'when supplying values via array, the dtype argument is not necessary; if provided, it must agree with the dtype of the array' ) if values.shape == (): # handle special case of NP element def values_constructor(shape): self.values = np.repeat(values, shape) self.values.flags.writeable = False else: self.values = immutable_filter(values) #----------------------------------------------------------------------- # index assignment # NOTE: this generally must be done after values assignment, as from_items needs a values generator to be exhausted before looking to values if index is None or (hasattr(index, '__len__') and len(index) == 0): # create an integer index self._index = Index(range(len(self.values)), loc_is_iloc=True) elif own_index: self._index = index elif hasattr(index, 'STATIC'): if index.STATIC: self._index = index else: raise Exception( 'non-static index cannot be assigned to Series') else: # let index handle instantiation if isinstance(index, (Index, IndexHierarchy)): # call with the class of the passed-in index, in case it is hierarchical self._index = index.__class__(index) else: self._index = Index(index) shape = self._index.__len__() if values_constructor: values_constructor(shape) # updates self.values if len(self.values) != shape: raise Exception('values and index do not match length')
def _extract_positions( size: int, positions: tp.Optional[tp.Sequence[int]]) -> np.ndarray: # positions is either None or an ndarray if isinstance(positions, np.ndarray): return immutable_filter(positions) return PositionsAllocator.get(size)