def _prep_index(self, data, index, columns): N, K = data.shape if index is None: index = com._default_index(N) if columns is None: columns = com._default_index(K) if len(columns) != K: raise ValueError('Column length mismatch: {columns} vs. {K}' .format(columns=len(columns), K=K)) if len(index) != N: raise ValueError('Index length mismatch: {index} vs. {N}' .format(index=len(index), N=N)) return index, columns
def __new__(cls, data, index=None, dtype=None, name=None, copy=False): if isinstance(data, Series): if index is None: index = data.index elif isinstance(data, dict): if index is None: index = Index(sorted(data.keys())) data = [data[idx] for idx in index] # Create array, do *not* copy data by default, infer type try: subarr = np.array(data, dtype=dtype, copy=copy) except ValueError: if dtype: raise subarr = np.array(data, dtype=object) if subarr.ndim == 0: if isinstance(data, list): # pragma: no cover subarr = np.array(data, dtype=object) elif index is not None: value = data # If we create an empty array using a string to infer # the dtype, NumPy will only allocate one character per entry # so this is kind of bad. Alternately we could use np.repeat # instead of np.empty (but then you still don't want things # coming out as np.str_! if isinstance(value, basestring) and dtype is None: dtype = np.object_ if dtype is None: subarr = np.empty(len(index), dtype=type(value)) else: subarr = np.empty(len(index), dtype=dtype) subarr.fill(value) else: return subarr.item() elif subarr.ndim > 1: raise Exception('Data must be 1-dimensional') if index is None: index = _default_index(len(subarr)) # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. if issubclass(subarr.dtype.type, basestring): subarr = np.array(data, dtype=object, copy=copy) # Change the class of the array to be the subclass type. subarr = subarr.view(cls) subarr.index = index subarr.name = name if subarr.index.is_all_dates(): subarr = subarr.view(TimeSeries) return subarr
def _get_concat_axis(self): """ Return index to be used along concatenation axis. """ if self._is_series: if self.axis == 0: indexes = [x.index for x in self.objs] elif self.ignore_index: idx = com._default_index(len(self.objs)) return idx elif self.keys is None: names = [None] * len(self.objs) num = 0 has_names = False for i, x in enumerate(self.objs): if not isinstance(x, Series): raise TypeError("Cannot concatenate type 'Series' " "with object of type " "%r" % type(x).__name__) if x.name is not None: names[i] = x.name has_names = True else: names[i] = num num += 1 if has_names: return Index(names) else: return com._default_index(len(self.objs)) else: return _ensure_index(self.keys) else: indexes = [x._data.axes[self.axis] for x in self.objs] if self.ignore_index: idx = com._default_index(sum(len(i) for i in indexes)) return idx if self.keys is None: concat_axis = _concat_indexes(indexes) else: concat_axis = _make_concat_multiindex(indexes, self.keys, self.levels, self.names) self._maybe_check_integrity(concat_axis) return concat_axis
def _get_concat_axis(self): """ Return index to be used along concatenation axis. """ if self._is_series: if self.axis == 0: indexes = [x.index for x in self.objs] elif self.ignore_index: idx = com._default_index(len(self.objs)) return idx elif self.keys is None: names = [None] * len(self.objs) num = 0 has_names = False for i, x in enumerate(self.objs): if not isinstance(x, Series): raise TypeError("Cannot concatenate type 'Series' " "with object of type {type!r}".format( type=type(x).__name__)) if x.name is not None: names[i] = x.name has_names = True else: names[i] = num num += 1 if has_names: return Index(names) else: return com._default_index(len(self.objs)) else: return _ensure_index(self.keys) else: indexes = [x._data.axes[self.axis] for x in self.objs] if self.ignore_index: idx = com._default_index(sum(len(i) for i in indexes)) return idx if self.keys is None: concat_axis = _concat_indexes(indexes) else: concat_axis = _make_concat_multiindex(indexes, self.keys, self.levels, self.names) self._maybe_check_integrity(concat_axis) return concat_axis
def __init__(self, data, index=None, columns=None, **kwargs): if len(data.shape) > 2: raise ValueError("Only two dimensional data supported") if len(data.shape) == 1 and isinstance(data, pd.Series): data = data.to_frame() elif len(data.shape) == 1: data = data.reshape(-1, 1) self.empty = False N, K = data.shape if index is None: self._index = _default_index(N) else: # assert len(index) == N self._index = _ensure_index(index) if columns is None: self._columns = _default_index(K) else: # assert len(columns) == K self._columns = _ensure_index(columns) if not sparse.isspmatrix_csr(data): try: self._init_values(data, kwargs) except TypeError: raise TypeError(traceback.format_exc() + "\nThe error described above occurred while " "converting data to sparse matrix.") else: self._init_csr(data) # register indexers self.ndim = 2 self.iloc = _CsrILocationIndexer(self, 'iloc') self.loc = _CsrLocIndexer(self, 'loc')
def _init_matrix(self, data, axes, dtype=None, copy=False): values = self._prep_ndarray(self, data, copy=copy) if dtype is not None: try: values = values.astype(dtype) except Exception: raise ValueError('failed to cast to %s' % dtype) shape = values.shape fixed_axes = [] for i, ax in enumerate(axes): if ax is None: ax = _default_index(shape[i]) else: ax = _ensure_index(ax) fixed_axes.append(ax) return create_block_manager_from_blocks([values], fixed_axes)
def _init_matrix(self, data, axes, dtype=None, copy=False): values = _prep_ndarray(data, copy=copy) if dtype is not None: try: values = values.astype(dtype) except Exception: raise ValueError('failed to cast to %s' % dtype) shape = values.shape fixed_axes = [] for i, ax in enumerate(axes): if ax is None: ax = _default_index(shape[i]) else: ax = _ensure_index(ax) fixed_axes.append(ax) items = fixed_axes[0] block = make_block(values, items, items) return BlockManager([block], fixed_axes)
def __init__(self, data=None, index=None, sparse_index=None, kind='block', fill_value=None, name=None, dtype=None, copy=False, fastpath=False): # we are called internally, so short-circuit if fastpath: # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): data = SingleBlockManager(data, index, fastpath=True) if copy: data = data.copy() else: if data is None: data = [] if isinstance(data, Series) and name is None: name = data.name is_sparse_array = isinstance(data, SparseArray) if fill_value is None: if is_sparse_array: fill_value = data.fill_value else: fill_value = np.nan if is_sparse_array: if isinstance(data, SparseSeries) and index is None: index = data.index.view() elif index is not None: assert (len(index) == len(data)) sparse_index = data.sp_index data = np.asarray(data) elif isinstance(data, SparseSeries): if index is None: index = data.index.view() # extract the SingleBlockManager data = data._data elif isinstance(data, (Series, dict)): if index is None: index = data.index.view() data = Series(data) data, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: data, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) else: assert (len(data) == sparse_index.npoints) elif isinstance(data, SingleBlockManager): if dtype is not None: data = data.astype(dtype) if index is None: index = data.index.view() else: data = data.reindex(index, copy=False) else: length = len(index) if data == fill_value or (isnull(data) and isnull(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) data = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) v = data data = np.empty(length) data.fill(v) if index is None: index = com._default_index(sparse_index.length) index = _ensure_index(index) # create/copy the manager if isinstance(data, SingleBlockManager): if copy: data = data.copy() else: # create a sparse array if not isinstance(data, SparseArray): data = SparseArray(data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype, copy=copy) data = SingleBlockManager(data, index) generic.NDFrame.__init__(self, data) self.index = index self.name = name
def __init__(self, data=None, index=None, sparse_index=None, kind='block', fill_value=None, name=None, dtype=None, copy=False, fastpath=False): # we are called internally, so short-circuit if fastpath: # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): data = SingleBlockManager(data, index, fastpath=True) if copy: data = data.copy() else: if data is None: data = [] if isinstance(data, Series) and name is None: name = data.name if isinstance(data, SparseArray): if index is not None: assert (len(index) == len(data)) sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value data = np.asarray(data) elif isinstance(data, SparseSeries): if index is None: index = data.index.view() if fill_value is None: fill_value = data.fill_value # extract the SingleBlockManager data = data._data elif isinstance(data, (Series, dict)): if index is None: index = data.index.view() data = Series(data) res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res else: assert (len(data) == sparse_index.npoints) elif isinstance(data, SingleBlockManager): if dtype is not None: data = data.astype(dtype) if index is None: index = data.index.view() else: data = data.reindex(index, copy=False) else: length = len(index) if data == fill_value or (isnull(data) and isnull(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) data = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) v = data data = np.empty(length) data.fill(v) if index is None: index = com._default_index(sparse_index.length) index = _ensure_index(index) # create/copy the manager if isinstance(data, SingleBlockManager): if copy: data = data.copy() else: # create a sparse array if not isinstance(data, SparseArray): data = SparseArray(data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype, copy=copy) data = SingleBlockManager(data, index) generic.NDFrame.__init__(self, data) self.index = index self.name = name
def __new__(cls, data, index=None, sparse_index=None, kind='block', fill_value=None, name=None, copy=False): is_sparse_array = isinstance(data, SparseArray) if fill_value is None: if is_sparse_array: fill_value = data.fill_value else: fill_value = nan if is_sparse_array: if isinstance(data, SparseSeries) and index is None: index = data.index elif index is not None: assert(len(index) == len(data)) sparse_index = data.sp_index values = np.asarray(data) elif isinstance(data, (Series, dict)): if index is None: index = data.index data = Series(data) values, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: values, sparse_index = make_sparse(data, kind=kind, fill_value=fill_value) else: values = data assert(len(values) == sparse_index.npoints) else: if index is None: raise Exception('must pass index!') length = len(index) if data == fill_value or (isnull(data) and isnull(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) values = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) values = np.empty(length) values.fill(data) if index is None: index = com._default_index(sparse_index.length) index = _ensure_index(index) # Create array, do *not* copy data by default if copy: subarr = np.array(values, dtype=np.float64, copy=True) else: subarr = np.asarray(values, dtype=np.float64) if index.is_all_dates: cls = SparseTimeSeries # Change the class of the array to be the subclass type. output = subarr.view(cls) output.sp_index = sparse_index output.fill_value = np.float64(fill_value) output.index = index output.name = name return output
def __init__(self, data=None, index=None, sparse_index=None, kind='block', fill_value=None, name=None, dtype=None, copy=False, fastpath=False): # we are called internally, so short-circuit if fastpath: # data is an ndarray, index is defined if not isinstance(data, SingleBlockManager): data = SingleBlockManager(data, index, fastpath=True) if copy: data = data.copy() else: if data is None: data = [] if isinstance(data, Series) and name is None: name = data.name if isinstance(data, SparseArray): if index is not None: assert (len(index) == len(data)) sparse_index = data.sp_index if fill_value is None: fill_value = data.fill_value data = np.asarray(data) elif isinstance(data, SparseSeries): if index is None: index = data.index.view() if fill_value is None: fill_value = data.fill_value # extract the SingleBlockManager data = data._data elif isinstance(data, (Series, dict)): data = Series(data, index=index) index = data.index.view() res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res elif isinstance(data, (tuple, list, np.ndarray)): # array-like if sparse_index is None: res = make_sparse(data, kind=kind, fill_value=fill_value) data, sparse_index, fill_value = res else: assert (len(data) == sparse_index.npoints) elif isinstance(data, SingleBlockManager): if dtype is not None: data = data.astype(dtype) if index is None: index = data.index.view() elif not data.index.equals(index) or copy: # pragma: no cover # GH#19275 SingleBlockManager input should only be called # internally raise AssertionError('Cannot pass both SingleBlockManager ' '`data` argument and a different ' '`index` argument. `copy` must ' 'be False.') else: length = len(index) if data == fill_value or (isna(data) and isna(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: sparse_index = IntIndex(length, []) data = np.array([]) else: if kind == 'block': locs, lens = ([0], [length]) if length else ([], []) sparse_index = BlockIndex(length, locs, lens) else: sparse_index = IntIndex(length, index) v = data data = np.empty(length) data.fill(v) if index is None: index = com._default_index(sparse_index.length) index = _ensure_index(index) # create/copy the manager if isinstance(data, SingleBlockManager): if copy: data = data.copy() else: # create a sparse array if not isinstance(data, SparseArray): data = SparseArray(data, sparse_index=sparse_index, fill_value=fill_value, dtype=dtype, copy=copy) data = SingleBlockManager(data, index) generic.NDFrame.__init__(self, data) self.index = index self.name = name
def __init__(self, data, index=None, columns=None, **kwargs): """Init SparseFrame Parameters ---------- data: sparse.csr_matrix | np.ndarray | pandas.DataFrame Data to initialize matrix with. Can be one of above types, or anything accepted by sparse.csr_matrix along with the correct kwargs. index: pd.Index or array-like Index to use for resulting frame. Will default to RangeIndex if input data has no indexing information and no index provided. columns : pd.Index or array-like Column labels to use for resulting frame. Defaults like in index. """ if len(data.shape) > 2: raise ValueError("Only two dimensional data supported") if len(data.shape) == 1 and isinstance(data, pd.Series): data = data.to_frame() elif len(data.shape) == 1: data = data.reshape(-1, 1) self.empty = False N, K = data.shape if index is None: self._index = _default_index(N) elif len(index) != N and data.size: if columns is not None: implied_axis_1 = len(columns) else: implied_axis_1 = data.shape[1] raise ValueError('Shape of passed values is {},' 'indices imply {}'.format( data.shape, (len(index), implied_axis_1))) else: self._index = _ensure_index(index) if columns is None: self._columns = _default_index(K) elif len(columns) != K and data.size: if index is not None: implied_axis_0 = len(index) else: implied_axis_0 = data.shape[0] raise ValueError('Shape of passed values is {},' 'indices imply {}'.format( data.shape, (implied_axis_0, len(columns)))) else: self._columns = _ensure_index(columns) if not sparse.isspmatrix_csr(data): try: self._init_values(data, init_index=index is None, init_columns=columns is None, **kwargs) except TypeError: raise TypeError(traceback.format_exc() + "\nThe error described above occurred while " "converting data to sparse matrix.") else: self.empty = True if _is_empty(data) else False self._init_csr(data) self.ndim = 2
def reset_index(self, drop=False): if not drop: raise NotImplementedError("drop=False is not supported.") new_idx = _default_index(len(self)) return SparseFrame(self.data, index=new_idx, columns=self.columns)