def test_fromRecords_toRecords(self): # structured array K = 10 recs = np.zeros(K, dtype='O,O,f8,f8') recs['f0'] = range(K // 2) * 2 recs['f1'] = np.arange(K) / (K // 2) recs['f2'] = np.arange(K) * 2 recs['f3'] = np.arange(K) lp = LongPanel.fromRecords(recs, 'f0', 'f1') self.assertEqual(len(lp.items), 2) lp = LongPanel.fromRecords(recs, 'f0', 'f1', exclude=['f2']) self.assertEqual(len(lp.items), 1) torecs = lp.toRecords() self.assertEqual(len(torecs.dtype.names), len(lp.items) + 2) # DataFrame df = DataFrame.from_records(recs) lp = LongPanel.fromRecords(df, 'f0', 'f1', exclude=['f2']) self.assertEqual(len(lp.items), 1) # dict of arrays series = DataFrame.from_records(recs)._series lp = LongPanel.fromRecords(series, 'f0', 'f1', exclude=['f2']) self.assertEqual(len(lp.items), 1) self.assert_('f2' in series) self.assertRaises(Exception, LongPanel.fromRecords, np.zeros((3, 3)), 0, 1)
def _unstack_vector(self, vec, index=None): if index is None: index = self._y_trans.index panel = LongPanel(vec.reshape((len(vec), 1)), index=index, columns=['dummy']) return panel.to_wide()['dummy']
def stack_sparse_frame(frame): """ Only makes sense when fill_value is NaN """ lengths = [s.sp_index.npoints for _, s in frame.iteritems()] nobs = sum(lengths) # this is pretty fast minor_labels = np.repeat(np.arange(len(frame.columns)), lengths) inds_to_concat = [] vals_to_concat = [] for _, series in frame.iteritems(): if not np.isnan(series.fill_value): raise Exception('This routine assumes NaN fill value') int_index = series.sp_index.to_int_index() inds_to_concat.append(int_index.indices) vals_to_concat.append(series.sp_values) major_labels = np.concatenate(inds_to_concat) stacked_values = np.concatenate(vals_to_concat) index = MultiIndex(levels=[frame.index, frame.columns], labels=[major_labels, minor_labels]) lp = LongPanel(stacked_values.reshape((nobs, 1)), index=index, columns=['foo']) return lp.sortlevel(level=0)
def test_fromRecords_toRecords(self): # structured array K = 10 recs = np.zeros(K, dtype="O,O,f8,f8") recs["f0"] = range(K / 2) * 2 recs["f1"] = np.arange(K) / (K / 2) recs["f2"] = np.arange(K) * 2 recs["f3"] = np.arange(K) lp = LongPanel.fromRecords(recs, "f0", "f1") self.assertEqual(len(lp.items), 2) lp = LongPanel.fromRecords(recs, "f0", "f1", exclude=["f2"]) self.assertEqual(len(lp.items), 1) torecs = lp.toRecords() self.assertEqual(len(torecs.dtype.names), len(lp.items) + 2) # DataFrame df = DataFrame.from_records(recs) lp = LongPanel.fromRecords(df, "f0", "f1", exclude=["f2"]) self.assertEqual(len(lp.items), 1) # dict of arrays series = DataFrame.from_records(recs)._series lp = LongPanel.fromRecords(series, "f0", "f1", exclude=["f2"]) self.assertEqual(len(lp.items), 1) self.assert_("f2" in series) self.assertRaises(Exception, LongPanel.fromRecords, np.zeros((3, 3)), 0, 1)
def to_long(self, filter_observations=True): """ Convert SparsePanel to (dense) LongPanel Returns ------- lp : LongPanel """ if not filter_observations: raise Exception('filter_observations=False not supported for ' 'SparsePanel.to_long') I, N, K = self.shape counts = np.zeros(N * K, dtype=int) d_values = {} d_indexer = {} for item in self.items: frame = self[item] values, major, minor = _stack_sparse_info(frame) # values are stacked column-major indexer = minor * N + major counts.put(indexer, counts.take(indexer) + 1) # cuteness d_values[item] = values d_indexer[item] = indexer # have full set of observations for each item mask = counts == I # for each item, take mask values at index locations for those sparse # values, and use that to select values values = np.column_stack([d_values[item][mask.take(d_indexer[item])] for item in self.items]) inds, = mask.nonzero() # still column major major_labels = inds % N minor_labels = inds // N index = MultiIndex(levels=[self.major_axis, self.minor_axis], labels=[major_labels, minor_labels]) lp = LongPanel(values, index=index, columns=self.items) return lp.sortlevel(level=0)
def _var_beta_panel(y, x, beta, xx, rmse, cluster_axis, nw_lags, nobs, df, nw_overlap): from pandas.core.frame import group_agg from pandas.core.panel import LongPanel xx_inv = math.inv(xx) yv = y.values if cluster_axis is None: if nw_lags is None: return xx_inv * (rmse ** 2) else: resid = yv - np.dot(x.values, beta) m = (x.values.T * resid).T xeps = math.newey_west(m, nw_lags, nobs, df, nw_overlap) return np.dot(xx_inv, np.dot(xeps, xx_inv)) else: Xb = np.dot(x.values, beta).reshape((len(x.values), 1)) resid = LongPanel(yv[:, None] - Xb, index=y.index, columns=['resid']) if cluster_axis == 1: x = x.swapaxes() resid = resid.swapaxes() m = group_agg(x.values * resid.values, x.index._bounds, lambda x: np.sum(x, axis=0)) if nw_lags is None: nw_lags = 0 xox = 0 for i in range(len(x.major_axis)): xox += math.newey_west(m[i : i + 1], nw_lags, nobs, df, nw_overlap) return np.dot(xx_inv, np.dot(xox, xx_inv))
def test_factors(self): # structured array K = 10 recs = np.zeros(K, dtype='O,O,f8,f8,O,O') recs['f0'] = ['one'] * 5 + ['two'] * 5 recs['f1'] = ['A', 'B', 'C', 'D', 'E'] * 2 recs['f2'] = np.arange(K) * 2 recs['f3'] = np.arange(K) recs['f4'] = ['A', 'B', 'C', 'D', 'E'] * 2 recs['f5'] = ['foo', 'bar'] * 5 lp = LongPanel.fromRecords(recs, 'f0', 'f1')
def test_factors(self): # structured array K = 10 recs = np.zeros(K, dtype="O,O,f8,f8,O,O") recs["f0"] = ["one"] * 5 + ["two"] * 5 recs["f1"] = ["A", "B", "C", "D", "E"] * 2 recs["f2"] = np.arange(K) * 2 recs["f3"] = np.arange(K) recs["f4"] = ["A", "B", "C", "D", "E"] * 2 recs["f5"] = ["foo", "bar"] * 5 lp = LongPanel.fromRecords(recs, "f0", "f1")
def _var_beta_panel(y, x, beta, xx, rmse, cluster_axis, nw_lags, nobs, df, nw_overlap): from pandas.core.frame import group_agg from pandas.core.panel import LongPanel xx_inv = math.inv(xx) yv = y.values if cluster_axis is None: if nw_lags is None: return xx_inv * (rmse**2) else: resid = yv - np.dot(x.values, beta) m = (x.values.T * resid).T xeps = math.newey_west(m, nw_lags, nobs, df, nw_overlap) return np.dot(xx_inv, np.dot(xeps, xx_inv)) else: Xb = np.dot(x.values, beta).reshape((len(x.values), 1)) resid = LongPanel(yv[:, None] - Xb, index=y.index, columns=['resid']) if cluster_axis == 1: x = x.swapaxes() resid = resid.swapaxes() m = group_agg(x.values * resid.values, x.index._bounds, lambda x: np.sum(x, axis=0)) if nw_lags is None: nw_lags = 0 xox = 0 for i in range(len(x.major_axis)): xox += math.newey_west(m[i:i + 1], nw_lags, nobs, df, nw_overlap) return np.dot(xx_inv, np.dot(xox, xx_inv))
def _convertDummies(dummies, mapping): # cleans up the names of the generated dummies new_items = [] for item in dummies.items: if not mapping: var = str(item) if isinstance(item, float): var = '%g' % item new_items.append(var) else: # renames the dummies if a conversion dict is provided new_items.append(mapping[int(item)]) dummies = LongPanel(dummies.values, index=dummies.index, columns=new_items) return dummies