Esempio n. 1
0
    def test_fromRecords_toRecords(self):
        # structured array
        K = 10

        recs = np.zeros(K, dtype='O,O,f8,f8')
        recs['f0'] = range(K // 2) * 2
        recs['f1'] = np.arange(K) / (K // 2)
        recs['f2'] = np.arange(K) * 2
        recs['f3'] = np.arange(K)

        lp = LongPanel.fromRecords(recs, 'f0', 'f1')
        self.assertEqual(len(lp.items), 2)

        lp = LongPanel.fromRecords(recs, 'f0', 'f1', exclude=['f2'])
        self.assertEqual(len(lp.items), 1)

        torecs = lp.toRecords()
        self.assertEqual(len(torecs.dtype.names), len(lp.items) + 2)

        # DataFrame
        df = DataFrame.from_records(recs)
        lp = LongPanel.fromRecords(df, 'f0', 'f1', exclude=['f2'])
        self.assertEqual(len(lp.items), 1)

        # dict of arrays
        series = DataFrame.from_records(recs)._series
        lp = LongPanel.fromRecords(series, 'f0', 'f1', exclude=['f2'])
        self.assertEqual(len(lp.items), 1)
        self.assert_('f2' in series)

        self.assertRaises(Exception, LongPanel.fromRecords, np.zeros((3, 3)),
                          0, 1)
Esempio n. 2
0
File: plm.py Progetto: ara818/pandas
    def _unstack_vector(self, vec, index=None):
        if index is None:
            index = self._y_trans.index
        panel = LongPanel(vec.reshape((len(vec), 1)), index=index,
                          columns=['dummy'])

        return panel.to_wide()['dummy']
Esempio n. 3
0
    def test_fromRecords_toRecords(self):
        # structured array
        K = 10

        recs = np.zeros(K, dtype='O,O,f8,f8')
        recs['f0'] = range(K // 2) * 2
        recs['f1'] = np.arange(K) / (K // 2)
        recs['f2'] = np.arange(K) * 2
        recs['f3'] = np.arange(K)

        lp = LongPanel.fromRecords(recs, 'f0', 'f1')
        self.assertEqual(len(lp.items), 2)

        lp = LongPanel.fromRecords(recs, 'f0', 'f1', exclude=['f2'])
        self.assertEqual(len(lp.items), 1)

        torecs = lp.toRecords()
        self.assertEqual(len(torecs.dtype.names), len(lp.items) + 2)

        # DataFrame
        df = DataFrame.from_records(recs)
        lp = LongPanel.fromRecords(df, 'f0', 'f1', exclude=['f2'])
        self.assertEqual(len(lp.items), 1)

        # dict of arrays
        series = DataFrame.from_records(recs)._series
        lp = LongPanel.fromRecords(series, 'f0', 'f1', exclude=['f2'])
        self.assertEqual(len(lp.items), 1)
        self.assert_('f2' in series)

        self.assertRaises(Exception, LongPanel.fromRecords, np.zeros((3, 3)),
                          0, 1)
Esempio n. 4
0
def stack_sparse_frame(frame):
    """
    Only makes sense when fill_value is NaN
    """
    lengths = [s.sp_index.npoints for _, s in frame.iteritems()]
    nobs = sum(lengths)

    # this is pretty fast
    minor_labels = np.repeat(np.arange(len(frame.columns)), lengths)

    inds_to_concat = []
    vals_to_concat = []
    for _, series in frame.iteritems():
        if not np.isnan(series.fill_value):
            raise Exception('This routine assumes NaN fill value')

        int_index = series.sp_index.to_int_index()
        inds_to_concat.append(int_index.indices)
        vals_to_concat.append(series.sp_values)

    major_labels = np.concatenate(inds_to_concat)
    stacked_values = np.concatenate(vals_to_concat)
    index = MultiIndex(levels=[frame.index, frame.columns],
                       labels=[major_labels, minor_labels])

    lp = LongPanel(stacked_values.reshape((nobs, 1)), index=index,
                   columns=['foo'])
    return lp.sortlevel(level=0)
Esempio n. 5
0
    def test_fromRecords_toRecords(self):
        # structured array
        K = 10

        recs = np.zeros(K, dtype="O,O,f8,f8")
        recs["f0"] = range(K / 2) * 2
        recs["f1"] = np.arange(K) / (K / 2)
        recs["f2"] = np.arange(K) * 2
        recs["f3"] = np.arange(K)

        lp = LongPanel.fromRecords(recs, "f0", "f1")
        self.assertEqual(len(lp.items), 2)

        lp = LongPanel.fromRecords(recs, "f0", "f1", exclude=["f2"])
        self.assertEqual(len(lp.items), 1)

        torecs = lp.toRecords()
        self.assertEqual(len(torecs.dtype.names), len(lp.items) + 2)

        # DataFrame
        df = DataFrame.from_records(recs)
        lp = LongPanel.fromRecords(df, "f0", "f1", exclude=["f2"])
        self.assertEqual(len(lp.items), 1)

        # dict of arrays
        series = DataFrame.from_records(recs)._series
        lp = LongPanel.fromRecords(series, "f0", "f1", exclude=["f2"])
        self.assertEqual(len(lp.items), 1)
        self.assert_("f2" in series)

        self.assertRaises(Exception, LongPanel.fromRecords, np.zeros((3, 3)), 0, 1)
Esempio n. 6
0
def stack_sparse_frame(frame):
    """
    Only makes sense when fill_value is NaN
    """
    lengths = [s.sp_index.npoints for _, s in frame.iteritems()]
    nobs = sum(lengths)

    # this is pretty fast
    minor_labels = np.repeat(np.arange(len(frame.columns)), lengths)

    inds_to_concat = []
    vals_to_concat = []
    for _, series in frame.iteritems():
        if not np.isnan(series.fill_value):
            raise Exception('This routine assumes NaN fill value')

        int_index = series.sp_index.to_int_index()
        inds_to_concat.append(int_index.indices)
        vals_to_concat.append(series.sp_values)

    major_labels = np.concatenate(inds_to_concat)
    stacked_values = np.concatenate(vals_to_concat)
    index = MultiIndex(levels=[frame.index, frame.columns],
                       labels=[major_labels, minor_labels])

    lp = LongPanel(stacked_values.reshape((nobs, 1)),
                   index=index,
                   columns=['foo'])
    return lp.sortlevel(level=0)
Esempio n. 7
0
    def _unstack_vector(self, vec, index=None):
        if index is None:
            index = self._y_trans.index
        panel = LongPanel(vec.reshape((len(vec), 1)),
                          index=index,
                          columns=['dummy'])

        return panel.to_wide()['dummy']
Esempio n. 8
0
    def to_long(self, filter_observations=True):
        """
        Convert SparsePanel to (dense) LongPanel

        Returns
        -------
        lp : LongPanel
        """
        if not filter_observations:
            raise Exception('filter_observations=False not supported for '
                            'SparsePanel.to_long')

        I, N, K = self.shape
        counts = np.zeros(N * K, dtype=int)

        d_values = {}
        d_indexer = {}

        for item in self.items:
            frame = self[item]

            values, major, minor = _stack_sparse_info(frame)

            # values are stacked column-major
            indexer = minor * N + major
            counts.put(indexer, counts.take(indexer) + 1) # cuteness

            d_values[item] = values
            d_indexer[item] = indexer

        # have full set of observations for each item
        mask = counts == I

        # for each item, take mask values at index locations for those sparse
        # values, and use that to select values
        values = np.column_stack([d_values[item][mask.take(d_indexer[item])]
                                  for item in self.items])

        inds, = mask.nonzero()

        # still column major
        major_labels = inds % N
        minor_labels = inds // N

        index = MultiIndex(levels=[self.major_axis, self.minor_axis],
                           labels=[major_labels, minor_labels])

        lp = LongPanel(values, index=index, columns=self.items)
        return lp.sortlevel(level=0)
Esempio n. 9
0
File: plm.py Progetto: ara818/pandas
def _var_beta_panel(y, x, beta, xx, rmse, cluster_axis,
                   nw_lags, nobs, df, nw_overlap):
    from pandas.core.frame import group_agg
    from pandas.core.panel import LongPanel

    xx_inv = math.inv(xx)

    yv = y.values

    if cluster_axis is None:
        if nw_lags is None:
            return xx_inv * (rmse ** 2)
        else:
            resid = yv - np.dot(x.values, beta)
            m = (x.values.T * resid).T

            xeps = math.newey_west(m, nw_lags, nobs, df, nw_overlap)

            return np.dot(xx_inv, np.dot(xeps, xx_inv))
    else:
        Xb = np.dot(x.values, beta).reshape((len(x.values), 1))
        resid = LongPanel(yv[:, None] - Xb, index=y.index,
                          columns=['resid'])

        if cluster_axis == 1:
            x = x.swapaxes()
            resid = resid.swapaxes()

        m = group_agg(x.values * resid.values, x.index._bounds,
                      lambda x: np.sum(x, axis=0))

        if nw_lags is None:
            nw_lags = 0

        xox = 0
        for i in range(len(x.major_axis)):
            xox += math.newey_west(m[i : i + 1], nw_lags,
                                   nobs, df, nw_overlap)

        return np.dot(xx_inv, np.dot(xox, xx_inv))
Esempio n. 10
0
    def test_factors(self):
        # structured array
        K = 10

        recs = np.zeros(K, dtype='O,O,f8,f8,O,O')
        recs['f0'] = ['one'] * 5 + ['two'] * 5
        recs['f1'] = ['A', 'B', 'C', 'D', 'E'] * 2
        recs['f2'] = np.arange(K) * 2
        recs['f3'] = np.arange(K)
        recs['f4'] = ['A', 'B', 'C', 'D', 'E'] * 2
        recs['f5'] = ['foo', 'bar'] * 5

        lp = LongPanel.fromRecords(recs, 'f0', 'f1')
Esempio n. 11
0
    def test_factors(self):
        # structured array
        K = 10

        recs = np.zeros(K, dtype='O,O,f8,f8,O,O')
        recs['f0'] = ['one'] * 5 + ['two'] * 5
        recs['f1'] = ['A', 'B', 'C', 'D', 'E'] * 2
        recs['f2'] = np.arange(K) * 2
        recs['f3'] = np.arange(K)
        recs['f4'] = ['A', 'B', 'C', 'D', 'E'] * 2
        recs['f5'] = ['foo', 'bar'] * 5

        lp = LongPanel.fromRecords(recs, 'f0', 'f1')
Esempio n. 12
0
    def test_factors(self):
        # structured array
        K = 10

        recs = np.zeros(K, dtype="O,O,f8,f8,O,O")
        recs["f0"] = ["one"] * 5 + ["two"] * 5
        recs["f1"] = ["A", "B", "C", "D", "E"] * 2
        recs["f2"] = np.arange(K) * 2
        recs["f3"] = np.arange(K)
        recs["f4"] = ["A", "B", "C", "D", "E"] * 2
        recs["f5"] = ["foo", "bar"] * 5

        lp = LongPanel.fromRecords(recs, "f0", "f1")
Esempio n. 13
0
def _var_beta_panel(y, x, beta, xx, rmse, cluster_axis, nw_lags, nobs, df,
                    nw_overlap):
    from pandas.core.frame import group_agg
    from pandas.core.panel import LongPanel

    xx_inv = math.inv(xx)

    yv = y.values

    if cluster_axis is None:
        if nw_lags is None:
            return xx_inv * (rmse**2)
        else:
            resid = yv - np.dot(x.values, beta)
            m = (x.values.T * resid).T

            xeps = math.newey_west(m, nw_lags, nobs, df, nw_overlap)

            return np.dot(xx_inv, np.dot(xeps, xx_inv))
    else:
        Xb = np.dot(x.values, beta).reshape((len(x.values), 1))
        resid = LongPanel(yv[:, None] - Xb, index=y.index, columns=['resid'])

        if cluster_axis == 1:
            x = x.swapaxes()
            resid = resid.swapaxes()

        m = group_agg(x.values * resid.values, x.index._bounds,
                      lambda x: np.sum(x, axis=0))

        if nw_lags is None:
            nw_lags = 0

        xox = 0
        for i in range(len(x.major_axis)):
            xox += math.newey_west(m[i:i + 1], nw_lags, nobs, df, nw_overlap)

        return np.dot(xx_inv, np.dot(xox, xx_inv))
Esempio n. 14
0
def _convertDummies(dummies, mapping):
    # cleans up the names of the generated dummies
    new_items = []
    for item in dummies.items:
        if not mapping:
            var = str(item)
            if isinstance(item, float):
                var = '%g' % item

            new_items.append(var)
        else:
            # renames the dummies if a conversion dict is provided
            new_items.append(mapping[int(item)])

    dummies = LongPanel(dummies.values, index=dummies.index, columns=new_items)

    return dummies