def stack_sparse_frame(frame): """ Only makes sense when fill_value is NaN """ lengths = [s.sp_index.npoints for _, s in frame.iteritems()] nobs = sum(lengths) # this is pretty fast minor_labels = np.repeat(np.arange(len(frame.columns)), lengths) inds_to_concat = [] vals_to_concat = [] for _, series in frame.iteritems(): if not np.isnan(series.fill_value): raise Exception('This routine assumes NaN fill value') int_index = series.sp_index.to_int_index() inds_to_concat.append(int_index.indices) vals_to_concat.append(series.sp_values) major_labels = np.concatenate(inds_to_concat) stacked_values = np.concatenate(vals_to_concat) index = MultiIndex(levels=[frame.index, frame.columns], labels=[major_labels, minor_labels]) lp = LongPanel(stacked_values.reshape((nobs, 1)), index=index, columns=['foo']) return lp.sortlevel(level=0)
def to_long(self, filter_observations=True): """ Convert SparsePanel to (dense) LongPanel Returns ------- lp : LongPanel """ if not filter_observations: raise Exception('filter_observations=False not supported for ' 'SparsePanel.to_long') I, N, K = self.shape counts = np.zeros(N * K, dtype=int) d_values = {} d_indexer = {} for item in self.items: frame = self[item] values, major, minor = _stack_sparse_info(frame) # values are stacked column-major indexer = minor * N + major counts.put(indexer, counts.take(indexer) + 1) # cuteness d_values[item] = values d_indexer[item] = indexer # have full set of observations for each item mask = counts == I # for each item, take mask values at index locations for those sparse # values, and use that to select values values = np.column_stack([d_values[item][mask.take(d_indexer[item])] for item in self.items]) inds, = mask.nonzero() # still column major major_labels = inds % N minor_labels = inds // N index = MultiIndex(levels=[self.major_axis, self.minor_axis], labels=[major_labels, minor_labels]) lp = LongPanel(values, index=index, columns=self.items) return lp.sortlevel(level=0)