Esempio n. 1
0
    def _init_spmatrix(self,
                       data,
                       index,
                       columns,
                       dtype=None,
                       fill_value=None):
        """ Init self from scipy.sparse matrix """
        index, columns = self._prep_index(data, index, columns)
        data = data.tocoo()
        N = len(index)

        # Construct a dict of SparseSeries
        sdict = {}
        values = Series(data.data, index=data.row, copy=False)
        for col, rowvals in values.groupby(data.col):
            # get_blocks expects int32 row indices in sorted order
            rows = rowvals.index.values.astype(np.int32)
            rows.sort()
            blocs, blens = get_blocks(rows)

            sdict[columns[col]] = SparseSeries(rowvals.values,
                                               index=index,
                                               fill_value=fill_value,
                                               sparse_index=BlockIndex(
                                                   N, blocs, blens))

        # Add any columns that were empty and thus not grouped on above
        sdict.update({
            column: SparseSeries(index=index,
                                 fill_value=fill_value,
                                 sparse_index=BlockIndex(N, [], []))
            for column in columns if column not in sdict
        })

        return self._init_dict(sdict, index, columns, dtype)
Esempio n. 2
0
    def __init__(self, data=None, index=None, columns=None,
                 default_kind='block', default_fill_value=None):
        if default_fill_value is None:
            default_fill_value = np.nan

        self.default_kind = default_kind
        self.default_fill_value = default_fill_value

        if isinstance(data, dict):
            sdict, columns, index = self._init_dict(data, index, columns)
        elif isinstance(data, (np.ndarray, list)):
            sdict, columns, index = self._init_matrix(data, index, columns)
        elif isinstance(data, DataFrame):
            sdict, columns, index = self._init_dict(data, data.index,
                                                    data.columns)
        elif data is None:
            sdict = {}

            if index is None:
                index = Index([])
            else:
                index = _ensure_index(index)

            if columns is None:
                columns = Index([])
            else:
                for c in columns:
                    sdict[c] = SparseSeries(np.nan, index=index,
                                            kind=self.default_kind,
                                            fill_value=self.default_fill_value)

        self._series = sdict
        self.columns = columns
        self.index = index
Esempio n. 3
0
    def shift(self, periods, freq=None, **kwds):
        """
        Analogous to DataFrame.shift
        """
        from pandas.core.series import _resolve_offset

        offset = _resolve_offset(freq, kwds)

        new_series = {}
        if offset is None:
            new_index = self.index
            for col, s in self.iteritems():
                new_series[col] = s.shift(periods)
        else:
            new_index = self.index.shift(periods, offset)
            for col, s in self.iteritems():
                new_series[col] = SparseSeries(s.sp_values,
                                               index=new_index,
                                               sparse_index=s.sp_index,
                                               fill_value=s.fill_value)

        return SparseDataFrame(new_series,
                               index=new_index,
                               columns=self.columns,
                               default_fill_value=self.default_fill_value,
                               default_kind=self.default_kind)
Esempio n. 4
0
    def _unpickle_sparse_frame_compat(self, state):
        """ original pickle format """
        series, cols, idx, fv, kind = state

        if not isinstance(cols, Index):  # pragma: no cover
            from pandas.io.pickle import _unpickle_array
            columns = _unpickle_array(cols)
        else:
            columns = cols

        if not isinstance(idx, Index):  # pragma: no cover
            from pandas.io.pickle import _unpickle_array
            index = _unpickle_array(idx)
        else:
            index = idx

        series_dict = DataFrame()
        for col, (sp_index, sp_values) in compat.iteritems(series):
            series_dict[col] = SparseSeries(sp_values,
                                            sparse_index=sp_index,
                                            fill_value=fv)

        self._data = to_manager(series_dict, columns, index)
        self._default_fill_value = fv
        self._default_kind = kind
Esempio n. 5
0
    def shift(self, periods, offset=None, timeRule=None):
        """
        Analogous to DataFrame.shift
        """
        if timeRule is not None and offset is None:
            offset = datetools.getOffset(timeRule)

        new_series = {}
        if offset is None:
            new_index = self.index
            for col, s in self.iteritems():
                new_series[col] = s.shift(periods)
        else:
            new_index = self.index.shift(periods, offset)
            for col, s in self.iteritems():
                new_series[col] = SparseSeries(s.sp_values,
                                               index=new_index,
                                               sparse_index=s.sp_index,
                                               fill_value=s.fill_value)

        return SparseDataFrame(new_series,
                               index=new_index,
                               columns=self.columns,
                               default_fill_value=self.default_fill_value,
                               default_kind=self.default_kind)
Esempio n. 6
0
    def __setstate__(self, state):
        series, cols, idx, fv, kind = state
        columns = _unpickle_array(cols)
        index = _unpickle_array(idx)

        series_dict = {}
        for col, (sp_index, sp_values) in series.iteritems():
            series_dict[col] = SparseSeries(sp_values, sparse_index=sp_index,
                                            fill_value=fv)

        self._series = series_dict
        self.index = index
        self.columns = columns
        self.default_fill_value = fv
        self.default_kind = kind
Esempio n. 7
0
    def _set_item(self, key, value):
        sp_maker = lambda x: SparseSeries(x, index=self.index,
                                          fill_value=self.default_fill_value,
                                          kind=self.default_kind)
        if hasattr(value, '__iter__'):
            if isinstance(value, Series):
                clean_series = value.reindex(self.index)
                if not isinstance(value, SparseSeries):
                    clean_series = sp_maker(clean_series)
            else:
                clean_series = sp_maker(value)

            self._series[key] = clean_series
        # Scalar
        else:
            self._series[key] = sp_maker(value)

        if key not in self.columns:
            self._insert_column(key)
Esempio n. 8
0
    def _init_dict(self, data, index, columns, dtype=None):
        # pre-filter out columns if we passed it
        if columns is not None:
            columns = _ensure_index(columns)
            data = dict((k, v) for k, v in data.iteritems() if k in columns)
        else:
            columns = Index(_try_sort(data.keys()))

        if index is None:
            index = extract_index(data)

        sp_maker = lambda x: SparseSeries(x,
                                          index=index,
                                          kind=self.default_kind,
                                          fill_value=self.default_fill_value,
                                          copy=True)

        sdict = {}
        for k, v in data.iteritems():
            if isinstance(v, Series):
                # Force alignment, no copy necessary
                if not v.index.equals(index):
                    v = v.reindex(index)

                if not isinstance(v, SparseSeries):
                    v = sp_maker(v)
            else:
                if isinstance(v, dict):
                    v = [v.get(i, nan) for i in index]

                v = sp_maker(v)
            sdict[k] = v

        # TODO: figure out how to handle this case, all nan's?
        # add in any other columns we want to have (completeness)
        nan_vec = np.empty(len(index))
        nan_vec.fill(nan)
        for c in columns:
            if c not in sdict:
                sdict[c] = sp_maker(nan_vec)

        return sdict, columns, index
Esempio n. 9
0
    def __setstate__(self, state):
        series, cols, idx, fv, kind = state

        if not isinstance(cols, Index):  # pragma: no cover
            columns = _unpickle_array(cols)
        else:
            columns = cols

        if not isinstance(idx, Index):  # pragma: no cover
            index = _unpickle_array(idx)
        else:
            index = idx

        series_dict = {}
        for col, (sp_index, sp_values) in compat.iteritems(series):
            series_dict[col] = SparseSeries(sp_values, sparse_index=sp_index,
                                            fill_value=fv)

        self._series = series_dict
        self.index = index
        self.columns = columns
        self.default_fill_value = fv
        self.default_kind = kind