def build_erf_aggregates(variables=None, year=2006, unit=1e6): """ Fetch the relevant aggregates from erf data """ erf = DataCollection(year=year) if variables is not None and "wprm" not in variables: variables.append("wprm") print 'Fetching aggregates from erf %s data' % str(year) df = erf.get_of_values(variables=variables, table="erf_menage") of2erf = get_of2erf() erf2of = get_erf2of() df.rename(columns=erf2of, inplace=True) wprm = df["wprm"] for col in df.columns: try: df[col] = df[col].astype(np.float64) except: pass df = df.mul(wprm, axis=0) for col in list(set(df.columns) - set(['ident', 'wprm'])): try: df[col] = df[col].sum() / 1e6 except: pass return df.ix[0:1] # Aggregate so we only need 1 row
def build_erf_aggregates(variables = None, year = 2006, unit = 1e6): """ Fetch the relevant aggregates from erf data """ erf = DataCollection(year=year) if variables is not None and "wprm" not in variables: variables.append("wprm") print 'Fetching aggregates from erf %s data' %str(year) df = erf.get_of_values(variables=variables, table = "erf_menage") of2erf = get_of2erf() erf2of = get_erf2of() df.rename(columns = erf2of, inplace = True) wprm = df["wprm"] for col in df.columns: try: df[col] = df[col].astype(np.float64) except: pass df = df.mul(wprm, axis = 0) for col in list(set(df.columns) - set(['ident', 'wprm'])): try: df[col] = df[col].sum()/1e6 except: pass return df.ix[0:1] # Aggregate so we only need 1 row
def get_of_values(self, variables=None, table=None): """ Get values Parameters ---------- variables : list of strings, default None list of variables names, if None return the whole table table : string, default None name of the table where to get the variables Returns ------- df : DataFrame, default None A DataFrame containing the variables """ store = HDFStore(self.hdf5_filename) df = store[str(self.year) + "/" + table] # If no variables read the whole table if variables is None: return df from openfisca_france.data.erf import get_erf2of, get_of2erf of2erf = get_of2erf() to_be_renamed_variables = set(of2erf.keys()).intersection(variables) renamed_variables = [] for variable in to_be_renamed_variables: renamed_variables.append(of2erf[variable]) if renamed_variables: variables = list( set(variables).difference( to_be_renamed_variables)) + renamed_variables # if table is None: # for test_table in self.tables.keys: # if set(variables) < set(self.tables[test_table].columns): # table = test_table # print "using guessed table :", table # break # # if table is None: # print "varname not found in any tables" # df = None # else: variables = list(set(variables).intersection(df.columns)) df = df[variables] # rename variables according to their name in openfisca erf2of = get_erf2of() to_be_renamed_variables = set(erf2of.values()).intersection(variables) if to_be_renamed_variables: for var in to_be_renamed_variables: df.rename(columns={var: erf2of[var]}, inplace=True) return df
def get_of_values(self, variables=None, table=None): """ Get values Parameters ---------- variables : list of strings, default None list of variables names, if None return the whole table table : string, default None name of the table where to get the variables Returns ------- df : DataFrame, default None A DataFrame containing the variables """ store = HDFStore(self.hdf5_filename) df = store[str(self.year)+"/"+table] # If no variables read the whole table if variables is None: return df from openfisca_france.data.erf import get_erf2of, get_of2erf of2erf = get_of2erf() to_be_renamed_variables = set(of2erf.keys()).intersection(variables) renamed_variables = [] for variable in to_be_renamed_variables: renamed_variables.append(of2erf[variable]) if renamed_variables: variables = list( set(variables).difference(to_be_renamed_variables)) + renamed_variables # if table is None: # for test_table in self.tables.keys: # if set(variables) < set(self.tables[test_table].columns): # table = test_table # print "using guessed table :", table # break # # if table is None: # print "varname not found in any tables" # df = None # else: variables = list( set(variables).intersection(df.columns)) df = df[variables] # rename variables according to their name in openfisca erf2of = get_erf2of() to_be_renamed_variables = set(erf2of.values()).intersection(variables) if to_be_renamed_variables: for var in to_be_renamed_variables: df.rename(columns = {var: erf2of[var]}, inplace=True) return df