def _read_panel_table(self, group, where=None): from pandas.core.panel import _make_long_index table = getattr(group, 'table') # create the selection sel = Selection(table, where) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) # reconstruct long_index = _make_long_index(np.asarray(index), np.asarray(columns)) lp = LongPanel(sel.values['values'], index=long_index, columns=fields) lp = lp.sortlevel(level=0) wp = lp.to_wide() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp
def _read_panel_table(self, group, where=None): from pandas.core.common import _asarray_tuplesafe table = getattr(group, 'table') # create the selection sel = Selection(table, where) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = LongPanel(sel.values['values'], index=long_index, columns=fields) if lp.consistent: lp = lp.sortlevel(level=0) wp = lp.to_wide() else: if not self._quiet: # pragma: no cover print ('Duplicate entries in table, taking most recently ' 'appended') # need a better algorithm tuple_index = long_index.get_tuple_index() index_map = lib.map_indices_object(tuple_index) unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = lib.merge_indexer_object(unique_tuples, index_map) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = LongPanel(new_values, index=new_index, columns=lp.columns) wp = lp.to_wide() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp
def _read_long(self, group, where=None): from pandas.core.index import MultiIndex items = self._read_index(group, 'items') major_axis = self._read_index(group, 'major_axis') minor_axis = self._read_index(group, 'minor_axis') major_labels = _read_array(group, 'major_labels') minor_labels = _read_array(group, 'minor_labels') values = _read_array(group, 'values') index = MultiIndex(levels=[major_axis, minor_axis], labels=[major_labels, minor_labels]) return LongPanel(values, index=index, columns=items)
def _read_panel_table(self, group, where=None): from pandas.core.common import _asarray_tuplesafe table = getattr(group, 'table') # create the selection sel = Selection(table, where) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = LongPanel(sel.values['values'], index=long_index, columns=fields) if lp.consistent: lp = lp.sortlevel(level=0) wp = lp.to_wide() else: if not self._quiet: # pragma: no cover print( 'Duplicate entries in table, taking most recently ' 'appended') # need a better algorithm tuple_index = long_index.get_tuple_index() index_map = lib.map_indices_object(tuple_index) unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = lib.merge_indexer_object(unique_tuples, index_map) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = LongPanel(new_values, index=new_index, columns=lp.columns) wp = lp.to_wide() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp
import pandas pandas.version >= .1 except: raise ImportError, "pandas >= .10 not installed" from pandas import LongPanel import scikits.statsmodels as sm import numpy.lib.recfunctions as nprf data = sm.datasets.grunfeld.Load() # Baltagi doesn't include American Steel endog = data.endog[:-20] fullexog = data.exog[:-20] # fullexog.sort(order=['firm','year']) panel_arr = nprf.append_fields(fullexog, 'investment', endog, float, usemask=False) panel_panda = LongPanel.fromRecords(panel_arr, major_field='year', minor_field='firm') # the most cumbersome way of doing it as far as preprocessing by hand exog = fullexog[['value','capital']].view(float).reshape(-1,2) exog = sm.add_constant(exog) panel = group(fullexog['firm']) year = fullexog['year'] panel_mod = PanelModel(endog, exog, panel, year, xtnames=['firm','year'], equation='invest value capital') # note that equation doesn't actually do anything but name the variables panel_ols = panel_mod.fit(model='pooled') panel_be = panel_mod.fit(model='between', effects='oneway') panel_fe = panel_mod.fit(model='fixed', effects='oneway') panel_bet = panel_mod.fit(model='between', effects='time')
pass if __name__ == "__main__": import pandas from pandas import LongPanel import statsmodels.api as sm import numpy.lib.recfunctions as nprf data = sm.datasets.grunfeld.load() # Baltagi doesn't include American Steel endog = data.endog[:-20] fullexog = data.exog[:-20] # fullexog.sort(order=['firm','year']) panel_arr = nprf.append_fields(fullexog, 'investment', endog, float, usemask=False) panel_panda = LongPanel.fromRecords(panel_arr, major_field='year', minor_field='firm') # the most cumbersome way of doing it as far as preprocessing by hand exog = fullexog[['value','capital']].view(float).reshape(-1,2) exog = sm.add_constant(exog, prepend=False) panel = group(fullexog['firm']) year = fullexog['year'] panel_mod = PanelModel(endog, exog, panel, year, xtnames=['firm','year'], equation='invest value capital') # note that equation doesn't actually do anything but name the variables panel_ols = panel_mod.fit(model='pooled') panel_be = panel_mod.fit(model='between', effects='oneway') panel_fe = panel_mod.fit(model='fixed', effects='oneway') panel_bet = panel_mod.fit(model='between', effects='time')