def _init_spmatrix(self, data, index, columns, dtype=None, fill_value=None): """ Init self from scipy.sparse matrix """ index, columns = self._prep_index(data, index, columns) data = data.tocoo() N = len(index) # Construct a dict of SparseSeries sdict = {} values = Series(data.data, index=data.row, copy=False) for col, rowvals in values.groupby(data.col): # get_blocks expects int32 row indices in sorted order rows = rowvals.index.values.astype(np.int32) rows.sort() blocs, blens = get_blocks(rows) sdict[columns[col]] = SparseSeries(rowvals.values, index=index, fill_value=fill_value, sparse_index=BlockIndex( N, blocs, blens)) # Add any columns that were empty and thus not grouped on above sdict.update({ column: SparseSeries(index=index, fill_value=fill_value, sparse_index=BlockIndex(N, [], [])) for column in columns if column not in sdict }) return self._init_dict(sdict, index, columns, dtype)
def __init__(self, data=None, index=None, columns=None, default_kind='block', default_fill_value=None): if default_fill_value is None: default_fill_value = np.nan self.default_kind = default_kind self.default_fill_value = default_fill_value if isinstance(data, dict): sdict, columns, index = self._init_dict(data, index, columns) elif isinstance(data, (np.ndarray, list)): sdict, columns, index = self._init_matrix(data, index, columns) elif isinstance(data, DataFrame): sdict, columns, index = self._init_dict(data, data.index, data.columns) elif data is None: sdict = {} if index is None: index = Index([]) else: index = _ensure_index(index) if columns is None: columns = Index([]) else: for c in columns: sdict[c] = SparseSeries(np.nan, index=index, kind=self.default_kind, fill_value=self.default_fill_value) self._series = sdict self.columns = columns self.index = index
def shift(self, periods, freq=None, **kwds): """ Analogous to DataFrame.shift """ from pandas.core.series import _resolve_offset offset = _resolve_offset(freq, kwds) new_series = {} if offset is None: new_index = self.index for col, s in self.iteritems(): new_series[col] = s.shift(periods) else: new_index = self.index.shift(periods, offset) for col, s in self.iteritems(): new_series[col] = SparseSeries(s.sp_values, index=new_index, sparse_index=s.sp_index, fill_value=s.fill_value) return SparseDataFrame(new_series, index=new_index, columns=self.columns, default_fill_value=self.default_fill_value, default_kind=self.default_kind)
def _unpickle_sparse_frame_compat(self, state): """ original pickle format """ series, cols, idx, fv, kind = state if not isinstance(cols, Index): # pragma: no cover from pandas.io.pickle import _unpickle_array columns = _unpickle_array(cols) else: columns = cols if not isinstance(idx, Index): # pragma: no cover from pandas.io.pickle import _unpickle_array index = _unpickle_array(idx) else: index = idx series_dict = DataFrame() for col, (sp_index, sp_values) in compat.iteritems(series): series_dict[col] = SparseSeries(sp_values, sparse_index=sp_index, fill_value=fv) self._data = to_manager(series_dict, columns, index) self._default_fill_value = fv self._default_kind = kind
def shift(self, periods, offset=None, timeRule=None): """ Analogous to DataFrame.shift """ if timeRule is not None and offset is None: offset = datetools.getOffset(timeRule) new_series = {} if offset is None: new_index = self.index for col, s in self.iteritems(): new_series[col] = s.shift(periods) else: new_index = self.index.shift(periods, offset) for col, s in self.iteritems(): new_series[col] = SparseSeries(s.sp_values, index=new_index, sparse_index=s.sp_index, fill_value=s.fill_value) return SparseDataFrame(new_series, index=new_index, columns=self.columns, default_fill_value=self.default_fill_value, default_kind=self.default_kind)
def __setstate__(self, state): series, cols, idx, fv, kind = state columns = _unpickle_array(cols) index = _unpickle_array(idx) series_dict = {} for col, (sp_index, sp_values) in series.iteritems(): series_dict[col] = SparseSeries(sp_values, sparse_index=sp_index, fill_value=fv) self._series = series_dict self.index = index self.columns = columns self.default_fill_value = fv self.default_kind = kind
def _set_item(self, key, value): sp_maker = lambda x: SparseSeries(x, index=self.index, fill_value=self.default_fill_value, kind=self.default_kind) if hasattr(value, '__iter__'): if isinstance(value, Series): clean_series = value.reindex(self.index) if not isinstance(value, SparseSeries): clean_series = sp_maker(clean_series) else: clean_series = sp_maker(value) self._series[key] = clean_series # Scalar else: self._series[key] = sp_maker(value) if key not in self.columns: self._insert_column(key)
def _init_dict(self, data, index, columns, dtype=None): # pre-filter out columns if we passed it if columns is not None: columns = _ensure_index(columns) data = dict((k, v) for k, v in data.iteritems() if k in columns) else: columns = Index(_try_sort(data.keys())) if index is None: index = extract_index(data) sp_maker = lambda x: SparseSeries(x, index=index, kind=self.default_kind, fill_value=self.default_fill_value, copy=True) sdict = {} for k, v in data.iteritems(): if isinstance(v, Series): # Force alignment, no copy necessary if not v.index.equals(index): v = v.reindex(index) if not isinstance(v, SparseSeries): v = sp_maker(v) else: if isinstance(v, dict): v = [v.get(i, nan) for i in index] v = sp_maker(v) sdict[k] = v # TODO: figure out how to handle this case, all nan's? # add in any other columns we want to have (completeness) nan_vec = np.empty(len(index)) nan_vec.fill(nan) for c in columns: if c not in sdict: sdict[c] = sp_maker(nan_vec) return sdict, columns, index
def __setstate__(self, state): series, cols, idx, fv, kind = state if not isinstance(cols, Index): # pragma: no cover columns = _unpickle_array(cols) else: columns = cols if not isinstance(idx, Index): # pragma: no cover index = _unpickle_array(idx) else: index = idx series_dict = {} for col, (sp_index, sp_values) in compat.iteritems(series): series_dict[col] = SparseSeries(sp_values, sparse_index=sp_index, fill_value=fv) self._series = series_dict self.index = index self.columns = columns self.default_fill_value = fv self.default_kind = kind