Ejemplo n.º 1
0
def build_erf_aggregates(variables=None, year=2006, unit=1e6):
    """
    Fetch the relevant aggregates from erf data
    """

    erf = DataCollection(year=year)
    if variables is not None and "wprm" not in variables:
        variables.append("wprm")
    print 'Fetching aggregates from erf %s data' % str(year)
    df = erf.get_of_values(variables=variables, table="erf_menage")

    of2erf = get_of2erf()
    erf2of = get_erf2of()

    df.rename(columns=erf2of, inplace=True)
    wprm = df["wprm"]
    for col in df.columns:
        try:
            df[col] = df[col].astype(np.float64)
        except:
            pass
    df = df.mul(wprm, axis=0)
    for col in list(set(df.columns) - set(['ident', 'wprm'])):
        try:
            df[col] = df[col].sum() / 1e6
        except:
            pass

    return df.ix[0:1]  # Aggregate so we only need 1 row
Ejemplo n.º 2
0
def build_erf_aggregates(variables = None, year = 2006, unit = 1e6):
    """
    Fetch the relevant aggregates from erf data
    """

    erf = DataCollection(year=year)
    if variables is not None and "wprm" not in variables:
        variables.append("wprm")
    print 'Fetching aggregates from erf %s data' %str(year)
    df = erf.get_of_values(variables=variables, table = "erf_menage")

    of2erf = get_of2erf()
    erf2of = get_erf2of()

    df.rename(columns = erf2of, inplace = True)
    wprm = df["wprm"]
    for col in df.columns:
        try:
            df[col] = df[col].astype(np.float64)
        except:
            pass
    df = df.mul(wprm, axis = 0)
    for col in list(set(df.columns) - set(['ident', 'wprm'])):
        try:
            df[col] = df[col].sum()/1e6
        except:
            pass

    return df.ix[0:1] # Aggregate so we only need 1 row
Ejemplo n.º 3
0
    def get_of_values(self, variables=None, table=None):
        """
        Get values

        Parameters
        ----------
        variables : list of strings, default None
                  list of variables names, if None return the whole table
        table : string, default None
                name of the table where to get the variables
        Returns
        -------
        df : DataFrame, default None
             A DataFrame containing the variables
        """

        store = HDFStore(self.hdf5_filename)
        df = store[str(self.year) + "/" + table]

        # If no variables read the whole table
        if variables is None:
            return df

        from openfisca_france.data.erf import get_erf2of, get_of2erf
        of2erf = get_of2erf()
        to_be_renamed_variables = set(of2erf.keys()).intersection(variables)
        renamed_variables = []

        for variable in to_be_renamed_variables:
            renamed_variables.append(of2erf[variable])

        if renamed_variables:
            variables = list(
                set(variables).difference(
                    to_be_renamed_variables)) + renamed_variables


#        if table is None:
#            for test_table in self.tables.keys:
#                if set(variables) < set(self.tables[test_table].columns):
#                    table = test_table
#                    print "using guessed table :", table
#                    break
#
#        if table is None:
#            print "varname not found in any tables"
#            df = None
#        else:

        variables = list(set(variables).intersection(df.columns))
        df = df[variables]

        # rename variables according to their name in openfisca
        erf2of = get_erf2of()
        to_be_renamed_variables = set(erf2of.values()).intersection(variables)
        if to_be_renamed_variables:
            for var in to_be_renamed_variables:
                df.rename(columns={var: erf2of[var]}, inplace=True)
        return df
    def get_of_values(self, variables=None, table=None):
        """
        Get values

        Parameters
        ----------
        variables : list of strings, default None
                  list of variables names, if None return the whole table
        table : string, default None
                name of the table where to get the variables
        Returns
        -------
        df : DataFrame, default None
             A DataFrame containing the variables
        """

        store = HDFStore(self.hdf5_filename)
        df = store[str(self.year)+"/"+table]

        # If no variables read the whole table
        if variables is None:
            return df

        from openfisca_france.data.erf import get_erf2of, get_of2erf
        of2erf = get_of2erf()
        to_be_renamed_variables = set(of2erf.keys()).intersection(variables)
        renamed_variables = []


        for variable in to_be_renamed_variables:
            renamed_variables.append(of2erf[variable])

        if renamed_variables:
            variables = list( set(variables).difference(to_be_renamed_variables)) + renamed_variables

#        if table is None:
#            for test_table in self.tables.keys:
#                if set(variables) < set(self.tables[test_table].columns):
#                    table = test_table
#                    print "using guessed table :", table
#                    break
#
#        if table is None:
#            print "varname not found in any tables"
#            df = None
#        else:

        variables = list( set(variables).intersection(df.columns))
        df = df[variables]

        # rename variables according to their name in openfisca
        erf2of = get_erf2of()
        to_be_renamed_variables = set(erf2of.values()).intersection(variables)
        if to_be_renamed_variables:
            for var in to_be_renamed_variables:
                df.rename(columns = {var: erf2of[var]}, inplace=True)
        return df