Esempio n. 1
0
def stack_sparse_frame(frame):
    """
    Only makes sense when fill_value is NaN
    """
    lengths = [s.sp_index.npoints for _, s in frame.iteritems()]
    nobs = sum(lengths)

    # this is pretty fast
    minor_labels = np.repeat(np.arange(len(frame.columns)), lengths)

    inds_to_concat = []
    vals_to_concat = []
    for _, series in frame.iteritems():
        if not np.isnan(series.fill_value):
            raise Exception('This routine assumes NaN fill value')

        int_index = series.sp_index.to_int_index()
        inds_to_concat.append(int_index.indices)
        vals_to_concat.append(series.sp_values)

    major_labels = np.concatenate(inds_to_concat)
    stacked_values = np.concatenate(vals_to_concat)
    index = MultiIndex(levels=[frame.index, frame.columns],
                       labels=[major_labels, minor_labels])

    lp = LongPanel(stacked_values.reshape((nobs, 1)), index=index,
                   columns=['foo'])
    return lp.sortlevel(level=0)
Esempio n. 2
0
def stack_sparse_frame(frame):
    """
    Only makes sense when fill_value is NaN
    """
    lengths = [s.sp_index.npoints for _, s in frame.iteritems()]
    nobs = sum(lengths)

    # this is pretty fast
    minor_labels = np.repeat(np.arange(len(frame.columns)), lengths)

    inds_to_concat = []
    vals_to_concat = []
    for _, series in frame.iteritems():
        if not np.isnan(series.fill_value):
            raise Exception('This routine assumes NaN fill value')

        int_index = series.sp_index.to_int_index()
        inds_to_concat.append(int_index.indices)
        vals_to_concat.append(series.sp_values)

    major_labels = np.concatenate(inds_to_concat)
    stacked_values = np.concatenate(vals_to_concat)
    index = MultiIndex(levels=[frame.index, frame.columns],
                       labels=[major_labels, minor_labels])

    lp = LongPanel(stacked_values.reshape((nobs, 1)),
                   index=index,
                   columns=['foo'])
    return lp.sortlevel(level=0)
Esempio n. 3
0
    def to_long(self, filter_observations=True):
        """
        Convert SparsePanel to (dense) LongPanel

        Returns
        -------
        lp : LongPanel
        """
        if not filter_observations:
            raise Exception('filter_observations=False not supported for '
                            'SparsePanel.to_long')

        I, N, K = self.shape
        counts = np.zeros(N * K, dtype=int)

        d_values = {}
        d_indexer = {}

        for item in self.items:
            frame = self[item]

            values, major, minor = _stack_sparse_info(frame)

            # values are stacked column-major
            indexer = minor * N + major
            counts.put(indexer, counts.take(indexer) + 1) # cuteness

            d_values[item] = values
            d_indexer[item] = indexer

        # have full set of observations for each item
        mask = counts == I

        # for each item, take mask values at index locations for those sparse
        # values, and use that to select values
        values = np.column_stack([d_values[item][mask.take(d_indexer[item])]
                                  for item in self.items])

        inds, = mask.nonzero()

        # still column major
        major_labels = inds % N
        minor_labels = inds // N

        index = MultiIndex(levels=[self.major_axis, self.minor_axis],
                           labels=[major_labels, minor_labels])

        lp = LongPanel(values, index=index, columns=self.items)
        return lp.sortlevel(level=0)