Python Aggregates Examples, openfisca_qt.plugins.survey.aggregates.Aggregates Python Examples

Example #1

0

Show file

File: aggregates_all_years.py Project: Iliato/openfisca-qt

def build_aggregates():

    writer = None
    years = range(2006,2010)
    for year in years:
        yr = str(year)
#        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year = yr)
        simu.set_param()
        simu.set_survey()
        inflator = get_loyer_inflator(year)
        simu.inflate_survey({'loyer' : inflator})
        simu.compute()

        agg = Aggregates()
        agg.set_simulation(simu)
        agg.compute()

        if writer is None:
            writer = ExcelWriter(str(fname_all))
        agg.aggr_frame.to_excel(writer, yr, index= False, header= True, float_format="%.2f")
        print agg.aggr_frame.to_string()
        del simu
        del agg
        import gc
        gc.collect()


    writer.save()

Example #2

0

Show file

def build_aggregates():
    #    writer = None
    years = range(2009, 2010)
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        simu.set_survey()
        simu.compute()
        variables = ["garext", "ci_garext", "inthab", "ppe_brute", "rni"]
        x = simu.aggregated_by_entity("men", variables, all_output_vars=False)
        #        df = x[0]
        #        print df["ci_garext"].describe()
        agg = Aggregates()
        agg.set_simulation(simu)
        agg.show_default = False
        agg.show_real = False
        agg.show_diff = False
        agg.varlist = var_list  # ERROR: var_list is undefined.
        agg.compute()
        cols = agg.aggr_frame.columns[:4]
        print agg.aggr_frame[cols].to_string()
        #        if writer is None:
        #            writer = ExcelWriter(str(fname_all))
        #        agg.aggr_frame.to_excel(writer, yr, index= False, header= True)
        del simu
        del agg
        import gc
        gc.collect()

Example #3

0

Show file

File: chunk.py Project: Iliato/openfisca-qt

def test_chunk():
    print "debut"
    writer = None
    years = range(2011,2012)
    filename = destination_dir+'output3.h5'
    store = HDFStore(filename)
    for year in years:
        yr = str(year)
#        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year = yr)
        simu.set_param()
        import time

        tps = {}
        for nb_chunk in range(1,5):
            deb_chunk = time.clock()
            simu.set_config(survey_filename='C:\\Til\\output\\to_run_leg.h5', num_table=3, chunks_count=nb_chunk ,
                            print_missing=False)
            simu.compute()
            tps[nb_chunk] = time.clock() - deb_chunk

            voir = simu.output_table.table3['foy']
            print len(voir)
            pdb.set_trace()
            agg3 = Aggregates()
            agg3.set_simulation(simu)
            agg3.compute()
            df1 = agg3.aggr_frame
            print df1.to_string()

    print tps
    store.close()

Example #4

0

Show file

File: example.py Project: Jiss83/openfisca-qt

def survey_case(year):
#        fname = "Agg_%s.%s" %(str(yr), "xls")
    simulation = SurveySimulation()
    simulation.set_config(year = year)
    simulation.set_param()

#    Ignore this
#    inflator = get_loyer_inflator(year)
#    simulation.inflate_survey({'loyer' : inflator})

    simulation.compute()


# Compute aggregates
    agg = Aggregates()
    agg.set_simulation(simulation)
    agg.compute()

    df1 = agg.aggr_frame
    print df1.to_string()

#    Saving aggregates
#    if writer is None:
#        writer = ExcelWriter(str(fname)
#    agg.aggr_frame.to_excel(writer, yr, index= False, header= True)


# Displaying a pivot table
    from openfisca_qt.plugins.survey.distribution import OpenfiscaPivotTable
    pivot_table = OpenfiscaPivotTable()
    pivot_table.set_simulation(simulation)
    df2 = pivot_table.get_table(by = 'so', vars = ['nivvie'])
    print df2.to_string()

Example #5

0

Show file

File: euromod.py Project: Iliato/openfisca-qt

def build_aggregates():
#    writer = None
    years = range(2009,2010)
    for year in years:
        yr = str(year)
#        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year = yr)
        simu.set_param()
        simu.set_survey()
        simu.compute()
        variables = ["garext", "ci_garext", "inthab", "ppe_brute", "rni"]
        x = simu.aggregated_by_entity("men", variables, all_output_vars = False)
#        df = x[0]
#        print df["ci_garext"].describe()
        agg = Aggregates()
        agg.set_simulation(simu)
        agg.show_default = False
        agg.show_real = False
        agg.show_diff = False
        agg.varlist = var_list  # ERROR: var_list is undefined.
        agg.compute()
        cols = agg.aggr_frame.columns[:4]
        print agg.aggr_frame[cols].to_string()
#        if writer is None:
#            writer = ExcelWriter(str(fname_all))
#        agg.aggr_frame.to_excel(writer, yr, index= False, header= True)
        del simu
        del agg
        import gc
        gc.collect()

Example #6

0

Show file

File: example_test.py Project: Iliato/openfisca-qt

def survey_case(year = 2006):
    yr = str(year)
#        fname = "Agg_%s.%s" %(str(yr), "xls")
    simulation = SurveySimulation()
    survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5')
    simulation.set_config(year=yr, survey_filename=survey_filename)
    simulation.set_param()


#    Ignore this
#    inflator = get_loyer_inflator(year)
#    simulation.inflate_survey({'loyer' : inflator})

    simulation.compute()
    simul_out_df = simulation.output_table.table
    simul_in_df = simulation.input_table.table
    print simul_out_df.loc[:,['af', 'af_base', 'af_forf', 'af_majo', 'af_nbenf']].describe()
    print 'input vars'
    print simul_in_df.columns
    print 'output vars'
    print simul_out_df.columns

#     check_inputs_enumcols(simulation)

# Compute aggregates
    agg = Aggregates()
    agg.set_simulation(simulation)
    agg.compute()
    df1 = agg.aggr_frame
    print df1.columns

    print df1.to_string()

#    Saving aggregates
#    if writer is None:
#        writer = ExcelWriter(str(fname)
#    agg.aggr_frame.to_excel(writer, yr, index= False, header= True)


# Displaying a pivot table
    from openfisca_qt.plugins.survey.distribution import OpenfiscaPivotTable
    pivot_table = OpenfiscaPivotTable()
    pivot_table.set_simulation(simulation)
    df2 = pivot_table.get_table(by ='so', vars=['nivvie'])
    print df2.to_string()
    return df1

Example #7

0

Show file

File: recap.py Project: Iliato/openfisca-qt

    def _generate_aggregates(self):
        dfs = list()
        dfs_erf = list()
        years = self.years
        for year in years:
            # Running a standard SurveySimulation to get OF aggregates
            simulation = SurveySimulation()
            survey_filename = self.survey_filename
            simulation.set_config(year=year, survey_filename=survey_filename)
            simulation.set_param()
            simulation.compute()
            agg = Aggregates()
            agg.set_simulation(simulation)
            agg.compute()
            df = agg.aggr_frame
            df['year'] = year
            label_by_name = dict(
                (name, column.label)
                for name, column in simulation.output_table.column_by_name.iteritems()
                )
            #colonnes = simulation.output_table.table.columns
            dfs.append(df)
            variables = agg.varlist
            labels_variables = [
                label_by_name[variable]
                for variable in variables
                ]
            del simulation, agg, df
            # simulation.save_content(name, filename)

            gc.collect()

            # ERFS
            temp = (build_erf_aggregates(variables=variables, year= year))
            temp.rename(columns = label_by_name, inplace = True)
            temp = temp.T
            temp.reset_index(inplace = True)
            temp['year'] = year
            dfs_erf.append(temp)
            del temp
            gc.collect()

        self.labels_variables = labels_variables
        self.aggregates_of_dataframe = dfs
        self.aggregates_erfs_dataframe = dfs_erf

Example #8

0

Show file

File: debugger.py Project: Iliato/openfisca-qt

    def show_aggregates(self):
        from openfisca_france.data.erf.aggregates import build_erf_aggregates

        assert self.simulation is not None, 'simulation attribute is None'
        assert self.variable is not None, 'variable attribute is None'
        variable = self.variable
        of_aggregates = Aggregates()
        of_aggregates.set_simulation(self.simulation)
        of_aggregates.compute()

        temp = (build_erf_aggregates(variables=[variable], year= self.simulation.datesim.year))

        selection = of_aggregates.aggr_frame["Mesure"] == self.simulation.io_column_by_name[variable].label
        print of_aggregates.aggr_frame[selection]

        print temp
        # TODO: clean this
        return

Example #9

0

Show file

File: test_multitable.py Project: Iliato/openfisca-qt

def survey_case_3_tables():
    year = 2006
    yr = str(year)
    simulation = SurveySimulation()


    survey_input = HDFStore(survey3_test)
#     convert_to_3_tables(year=year, survey_file=survey_file, output_file=survey3_file)
    df_men = survey_input['survey_2006/men']
    df_foy = survey_input['survey_2006/foy']
    df_fam = survey_input['survey_2006/fam']
    df_fam['alr'] = 0;
    survey_input['survey_2006/fam'] = df_fam

    simulation.num_table = 3

    simulation.set_config(year = yr, survey_filename=survey3_test)
    simulation.set_param()

    simulation.compute()

# Compute aggregates
    agg = Aggregates()
    agg.set_simulation(simulation)
    agg.compute()

    df1 = agg.aggr_frame
    print df1.to_string()

# #    Saving aggregates
#     if writer is None:
#         writer = ExcelWriter(str(fname)
#     agg.aggr_frame.to_excel(writer, yr, index= False, header= True)


# Displaying a pivot table
    from openfisca_qt.plugins.survey.distribution import OpenfiscaPivotTable
    pivot_table = OpenfiscaPivotTable()
    pivot_table.set_simulation(simulation)
    df2 = pivot_table.get_table(by ='so', vars=['nivvie'])
    print df2.to_string()

Example #10

0

Show file

def test_chunk():
    print "debut"
    writer = None
    years = range(2011, 2012)
    filename = destination_dir + 'output3.h5'
    store = HDFStore(filename)
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        import time

        tps = {}
        for nb_chunk in range(1, 5):
            deb_chunk = time.clock()
            simu.set_config(survey_filename='C:\\Til\\output\\to_run_leg.h5',
                            num_table=3,
                            chunks_count=nb_chunk,
                            print_missing=False)
            simu.compute()
            tps[nb_chunk] = time.clock() - deb_chunk

            voir = simu.output_table.table3['foy']
            print len(voir)
            pdb.set_trace()
            agg3 = Aggregates()
            agg3.set_simulation(simu)
            agg3.compute()
            df1 = agg3.aggr_frame
            print df1.to_string()

    print tps
    store.close()

Example #11

0

Show file

File: aggregates_all_years.py Project: stanislasrybak/openfisca-qt

def build_aggregates():

    writer = None
    years = range(2006, 2010)
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        simu.set_survey()
        inflator = get_loyer_inflator(year)
        simu.inflate_survey({'loyer': inflator})
        simu.compute()

        agg = Aggregates()
        agg.set_simulation(simu)
        agg.compute()

        if writer is None:
            writer = ExcelWriter(str(fname_all))
        agg.aggr_frame.to_excel(writer,
                                yr,
                                index=False,
                                header=True,
                                float_format="%.2f")
        print agg.aggr_frame.to_string()
        del simu
        del agg
        import gc
        gc.collect()

    writer.save()

Example #12

0

Show file

File: example_test.py Project: stanislasrybak/openfisca-qt

def survey_case(year=2006):
    yr = str(year)
    #        fname = "Agg_%s.%s" %(str(yr), "xls")
    simulation = SurveySimulation()
    survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5')
    simulation.set_config(year=yr, survey_filename=survey_filename)
    simulation.set_param()

    #    Ignore this
    #    inflator = get_loyer_inflator(year)
    #    simulation.inflate_survey({'loyer' : inflator})

    simulation.compute()
    simul_out_df = simulation.output_table.table
    simul_in_df = simulation.input_table.table
    print simul_out_df.loc[:,
                           ['af', 'af_base', 'af_forf', 'af_majo', 'af_nbenf'
                            ]].describe()
    print 'input vars'
    print simul_in_df.columns
    print 'output vars'
    print simul_out_df.columns

    #     check_inputs_enumcols(simulation)

    # Compute aggregates
    agg = Aggregates()
    agg.set_simulation(simulation)
    agg.compute()
    df1 = agg.aggr_frame
    print df1.columns

    print df1.to_string()

    #    Saving aggregates
    #    if writer is None:
    #        writer = ExcelWriter(str(fname)
    #    agg.aggr_frame.to_excel(writer, yr, index= False, header= True)

    # Displaying a pivot table
    from openfisca_qt.plugins.survey.distribution import OpenfiscaPivotTable
    pivot_table = OpenfiscaPivotTable()
    pivot_table.set_simulation(simulation)
    df2 = pivot_table.get_table(by='so', vars=['nivvie'])
    print df2.to_string()
    return df1

Example #13

0

Show file

File: recap.py Project: stanislasrybak/openfisca-qt

    def _generate_aggregates(self):
        dfs = list()
        dfs_erf = list()
        years = self.years
        for year in years:
            # Running a standard SurveySimulation to get OF aggregates
            simulation = SurveySimulation()
            survey_filename = self.survey_filename
            simulation.set_config(year=year, survey_filename=survey_filename)
            simulation.set_param()
            simulation.compute()
            agg = Aggregates()
            agg.set_simulation(simulation)
            agg.compute()
            df = agg.aggr_frame
            df['year'] = year
            label_by_name = dict(
                (name, column.label) for name, column in
                simulation.output_table.column_by_name.iteritems())
            #colonnes = simulation.output_table.table.columns
            dfs.append(df)
            variables = agg.varlist
            labels_variables = [
                label_by_name[variable] for variable in variables
            ]
            del simulation, agg, df
            # simulation.save_content(name, filename)

            gc.collect()

            # ERFS
            temp = (build_erf_aggregates(variables=variables, year=year))
            temp.rename(columns=label_by_name, inplace=True)
            temp = temp.T
            temp.reset_index(inplace=True)
            temp['year'] = year
            dfs_erf.append(temp)
            del temp
            gc.collect()

        self.labels_variables = labels_variables
        self.aggregates_of_dataframe = dfs
        self.aggregates_erfs_dataframe = dfs_erf

Example #14

0

Show file

    def show_aggregates(self):
        from openfisca_france.data.erf.aggregates import build_erf_aggregates

        assert self.simulation is not None, 'simulation attribute is None'
        assert self.variable is not None, 'variable attribute is None'
        variable = self.variable
        of_aggregates = Aggregates()
        of_aggregates.set_simulation(self.simulation)
        of_aggregates.compute()

        temp = (build_erf_aggregates(variables=[variable],
                                     year=self.simulation.datesim.year))

        selection = of_aggregates.aggr_frame[
            "Mesure"] == self.simulation.io_column_by_name[variable].label
        print of_aggregates.aggr_frame[selection]

        print temp
        # TODO: clean this
        return

Example #15

0

Show file

def survey_case_3_tables():
    year = 2006
    yr = str(year)
    simulation = SurveySimulation()

    survey_input = HDFStore(survey3_test)
    #     convert_to_3_tables(year=year, survey_file=survey_file, output_file=survey3_file)
    df_men = survey_input['survey_2006/men']
    df_foy = survey_input['survey_2006/foy']
    df_fam = survey_input['survey_2006/fam']
    df_fam['alr'] = 0
    survey_input['survey_2006/fam'] = df_fam

    simulation.num_table = 3

    simulation.set_config(year=yr, survey_filename=survey3_test)
    simulation.set_param()

    simulation.compute()

    # Compute aggregates
    agg = Aggregates()
    agg.set_simulation(simulation)
    agg.compute()

    df1 = agg.aggr_frame
    print df1.to_string()

    # #    Saving aggregates
    #     if writer is None:
    #         writer = ExcelWriter(str(fname)
    #     agg.aggr_frame.to_excel(writer, yr, index= False, header= True)

    # Displaying a pivot table
    from openfisca_qt.plugins.survey.distribution import OpenfiscaPivotTable
    pivot_table = OpenfiscaPivotTable()
    pivot_table.set_simulation(simulation)
    df2 = pivot_table.get_table(by='so', vars=['nivvie'])
    print df2.to_string()

Example #16

0

Show file

File: example_test.py Project: Iliato/openfisca-qt

def test_laurence():
    '''
    Computes the openfisca/real numbers comparaison table in excel worksheet.

    Warning: To add more years you'll have to twitch the code manually.
    Default is years 2006 to 2009 included.
    '''
    def save_as_xls(df, alter_method = True):
        # Saves a datatable under Excel table using XLtable
        if alter_method:
            filename = "C:\desindexation.xls"
            print filename
            writer = ExcelWriter(str(filename))
            df.to_excel(writer)
            writer.save()
        else:
            # XLtable utile pour la mise en couleurs, reliefs, etc. de la table, inutile sinon
            stxl = XLtable(df)
            # <========== HERE TO CHANGE OVERLAY ======>
            wb = xlwt.Workbook()
            ws = wb.add_sheet('resultatstest')
            erfxcel = stxl.place_table(ws)
            try: # I dunno more clever commands
                wb.save("C:\outputtest.xls")
            except:
                n = random.randint(0,100)
                wb.save("C:\outputtest_"+str(n)+".xls")

#===============================================================================
#     from numpy.random import randn
#     mesures = ['cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add',
#                'cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add',
#                'cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add']
#     sources = ['of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel',
#                'of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel',
#                'of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel']
#     year = ['2006', '2006', '2006', '2006', '2006', '2006', '2006', '2006', '2006',
#             '2007', '2007', '2007', '2007', '2007', '2007', '2007', '2007', '2007',
#             '2008', '2008', '2008', '2008', '2008', '2008', '2008', '2008', '2008']
#     ind = zip(*[mesures,sources, year])
# #     print ind
#     from pandas.core.index import MultiIndex
#     ind = MultiIndex.from_tuples(ind, names = ['mesure', 'source', 'year'])
# #     print ind
#     d = pd.DataFrame(randn(27,2), columns = ['Depenses', 'Recettes'], index = ind)
#     d.reset_index(inplace = True, drop = False)
#     d = d.groupby(by = ['mesure', 'source', 'year'], sort = False).sum()
#     print d
#     d_unstacked = d.unstack()
#     print d
#     indtemp1 = d.index.get_level_values(0)
#     indtemp2 = d.index.get_level_values(1)
#     indexi = zip(*[indtemp1, indtemp2])
#     print indexi
#     indexi_bis = []
#     for i in xrange(len(indexi)):
#         if indexi[i] not in indexi_bis:
#             indexi_bis.append(indexi[i])
#     indexi = indexi_bis
#     indexi = MultiIndex.from_tuples(indexi, names = ['Mesure', 'source'])
#     print indexi
#     d_unstacked = d_unstacked.reindex_axis(indexi, axis = 0)
#     print d_unstacked.to_string()
#     save_as_xls(d_unstacked)
#     return
#===============================================================================

    def reshape_tables(dfs, dfs_erf):
        agg = Aggregates()

        # We need this for the columns labels to work

        print 'Resetting index to avoid later trouble on manipulation'
        for d in dfs:
            d.reset_index(inplace = True)
            d.set_index('Mesure', inplace = True, drop = False)
            d.reindex_axis(labels_variables, axis = 0)
            d.reset_index(inplace = True, drop = True)
#             print d.to_string()
        for d in dfs_erf:
            d.reset_index(inplace = True)
            d['Mesure'] = agg.labels['dep']
            d.set_index('index', inplace = True, drop = False)
            d.reindex_axis(agg.labels.values(), axis = 0)
            d.reset_index(inplace = True, drop = True)
#             print d.to_string()

        # Concatening the openfisca tables for =/= years
        temp = pd.concat([dfs[0],dfs[1]], ignore_index = True)
        temp = pd.concat([temp,dfs[2]], ignore_index = True)
        temp = pd.concat([temp,dfs[3]], ignore_index = True)
        del temp[agg.labels['entity']], temp['index']
        gc.collect()

        print 'We split the real aggregates from the of table'
        temp2 = temp[[agg.labels['var'], agg.labels['benef_real'], agg.labels['dep_real'], 'year']]
        del temp[agg.labels['benef_real']], temp[agg.labels['dep_real']]
        temp['source'] = 'of'
        temp2['source'] = 'reel'
        temp2.rename(columns = {agg.labels['benef_real'] : agg.labels['benef'],
                                agg.labels['dep_real'] : agg.labels['dep']},
                     inplace = True)
        temp = pd.concat([temp,temp2], ignore_index = True)

        print 'We add the erf data to the table'
        for df in dfs_erf:
            del df['level_0'], df['Mesure']
            df.rename(columns = {'index' : agg.labels['var'], 1 : agg.labels['dep']}, inplace = True)
        temp3 = pd.concat([dfs_erf[0], dfs_erf[1]], ignore_index = True)
        temp3 = pd.concat([temp3, dfs_erf[2]], ignore_index = True)
        temp3 = pd.concat([temp3, dfs_erf[3]], ignore_index = True)
        temp3['source'] = 'erfs'
        gc.collect()
        temp = pd.concat([temp, temp3], ignore_index = True)
#         print temp.to_string()

        print 'Index manipulation to reshape the output'
        temp.reset_index(drop = True, inplace = True)
        # We set the new index
#         temp.set_index('Mesure', drop = True, inplace = True)
#         temp.set_index('source', drop = True, append = True, inplace = True)
#         temp.set_index('year', drop = False, append = True, inplace = True)
        temp = temp.groupby(by=["Mesure", "source", "year"], sort = False).sum()
        # Tricky, the [mesure, source, year] index is unique so sum() will return the only value
        # Groupby automatically deleted the source, mesure... columns and added them to index
        assert(isinstance(temp, pd.DataFrame))
#         print temp.to_string()

        # We want the years to be in columns, so we use unstack
        temp_unstacked = temp.unstack()
        # Unfortunately, unstack automatically sorts rows and columns, we have to reindex the table :

        ## Reindexing rows
        from pandas.core.index import MultiIndex
        indtemp1 = temp.index.get_level_values(0)
        indtemp2 = temp.index.get_level_values(1)
        indexi = zip(*[indtemp1, indtemp2])
        indexi_bis = []
        for i in xrange(0,len(indexi)):
            if indexi[i] not in indexi_bis:
                indexi_bis.append(indexi[i])
        indexi = indexi_bis
        del indexi_bis
        indexi = MultiIndex.from_tuples(indexi, names = ['Mesure', 'source'])
#         import pdb
#         pdb.set_trace()
        temp_unstacked = temp_unstacked.reindex_axis(indexi, axis = 0) # axis = 0 for rows, 1 for columns

        ## Reindexing columns
        # TODO : still not working
        col_indexi = []
        for col in temp.columns.get_level_values(0).unique():
            for yr in range(2006,2010):
                col_indexi.append((col, str(yr)))
        col_indexi = MultiIndex.from_tuples(col_indexi)
#         print col_indexi
#         print temp_unstacked.columns
        print col_indexi
#         temp_unstacked = temp_unstacked.reindex_axis(col_indexi, axis = 1)

        # Our table is ready to be turned to Excel worksheet !
        print temp_unstacked.to_string()
        temp_unstacked.fillna(0, inplace = True)
        return temp_unstacked



    dfs = []
    dfs_erf = []
    for i in range(2006,2010):
        year = i
        yr = str(i)
        # Running a standard SurveySim to get aggregates
        simulation = SurveySimulation()
        survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5')
        simulation.set_config(year=yr, survey_filename=survey_filename)
        simulation.set_param()
        simulation.compute()
        agg = Aggregates()
        agg.set_simulation(simulation)
        agg.compute()
        df = agg.aggr_frame
        df['year'] = year
        label_by_name = dict(
            (name, column.label)
            for name, column in simulation.output_table.column_by_name.iteritems()
            )
        #colonnes = simulation.output_table.table.columns
        dfs.append(df)
        variables = agg.varlist
        labels_variables = [
            label_by_name[variable]
            for variable in variables
            ]
        del simulation, agg, df
        gc.collect()

        #Getting ERF aggregates from ERF table
        temp = build_erf_aggregates(variables=variables, year= year)
        temp.rename(columns = label_by_name, inplace = True)
        temp = temp.T
        temp.reset_index(inplace = True)
        temp['year'] = year
        dfs_erf.append(temp)
        del temp
        gc.collect()
        print 'Out of data fetching for year ' + str(year)
    print 'Out of data fetching'

    datatest = reshape_tables(dfs, dfs_erf)
    save_as_xls(datatest, alter_method = False)

Example #17

0

Show file

File: test_datatable3.py Project: Iliato/openfisca-qt

def build_aggregates3():

    writer = None
    years = range(2006,2007)
    tot1 = 0
    tot3 = 0
    for year in years:
        yr = str(year)
#        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year = yr)
        simu.set_param()
        import time


        deb3 = time.clock()
        simu.set_survey(num_table=3)
        simu.compute()
        fin3  = time.clock()

        print "coucou"
        col = simu.survey.column_by_name.get("so")
        print col.entity
        agg3 = Aggregates3()
        agg3.set_simulation(simu)
        agg3.compute()
#        if writer is None:
#            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg3.xlsx')
        agg3.aggr_frame.to_excel(fname_all, yr, index= False, header= True)

        for ent in ['ind','men','foy','fam']:
            dir_name = destination_dir + ent +'.csv'
##            simu.survey.table3[ent].to_csv(dir_name)
#            import pdb
#            pdb.set_trace()
##            com.convert_to_r_dataframe
            simu.output_table.table3[ent][:num_output].to_csv(dir_name)


        deb1 = time.clock()
        simu.set_survey(num_table=1)
        print "prob compute"
        simu.compute()
        fin1  = time.clock()

        dir_name = destination_dir + 'en1' +'.csv'
        print "prob output"
        simu.output_table.table[:num_output].to_csv(dir_name)

        agg = Aggregates()
        print "prob set"
        agg.set_simulation(simu)
        print "prob compute"
        agg.compute()

#        if writer is None:
#            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg1.xlsx')
        print "prob ind"
        agg.aggr_frame.to_excel(fname_all, yr, index= False, header= True)
        del simu
        del agg
        import gc
        gc.collect()
        tot1 += fin1 - deb1
        tot3 += fin3 - deb3
        print "Time to process 1 table :" +str(fin1 - deb1)
        print "Time to process 3 table :" +str(fin3 - deb3)
    print tot1, tot3, tot3- tot1

Example #18

0

Show file

File: test_datatable3.py Project: stanislasrybak/openfisca-qt

def build_aggregates3():

    writer = None
    years = range(2006, 2007)
    tot1 = 0
    tot3 = 0
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        import time

        deb3 = time.clock()
        simu.set_survey(num_table=3)
        simu.compute()
        fin3 = time.clock()

        print "coucou"
        col = simu.survey.column_by_name.get("so")
        print col.entity
        agg3 = Aggregates3()
        agg3.set_simulation(simu)
        agg3.compute()
        #        if writer is None:
        #            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg3.xlsx')
        agg3.aggr_frame.to_excel(fname_all, yr, index=False, header=True)

        for ent in ['ind', 'men', 'foy', 'fam']:
            dir_name = destination_dir + ent + '.csv'
            ##            simu.survey.table3[ent].to_csv(dir_name)
            #            import pdb
            #            pdb.set_trace()
            ##            com.convert_to_r_dataframe
            simu.output_table.table3[ent][:num_output].to_csv(dir_name)

        deb1 = time.clock()
        simu.set_survey(num_table=1)
        print "prob compute"
        simu.compute()
        fin1 = time.clock()

        dir_name = destination_dir + 'en1' + '.csv'
        print "prob output"
        simu.output_table.table[:num_output].to_csv(dir_name)

        agg = Aggregates()
        print "prob set"
        agg.set_simulation(simu)
        print "prob compute"
        agg.compute()

        #        if writer is None:
        #            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg1.xlsx')
        print "prob ind"
        agg.aggr_frame.to_excel(fname_all, yr, index=False, header=True)
        del simu
        del agg
        import gc
        gc.collect()
        tot1 += fin1 - deb1
        tot3 += fin3 - deb3
        print "Time to process 1 table :" + str(fin1 - deb1)
        print "Time to process 3 table :" + str(fin3 - deb3)
    print tot1, tot3, tot3 - tot1

Example #19

0

Show file

File: check_num_table.py Project: stanislasrybak/openfisca-qt

def compar_num_table():

    writer = None
    years = range(2006, 2007)
    tot1 = 0
    tot3 = 0
    filename = destination_dir + 'output3.h5'
    store = HDFStore(filename)
    for year in years:
        yr = str(year)
        #        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year=yr)
        simu.set_param()
        import time

        deb3 = time.clock()
        sous_ech = [
            6000080, 6000080, 6000195, 6000195, 6000288, 6000288, 6000499,
            6000499, 6000531, 6000531, 6000542, 6000542
        ]
        sous_ech = [
            6000191, 6000191, 6000531, 6000614, 6000195, 6000195, 6000499,
            6000499, 6000531, 6000614, 6000531, 6000614, 6000531, 6000531,
            6000195, 6000195, 6000288, 6000288, 6000499, 6000499, 6000531,
            6000542, 6000542, 6000614, 6000191
        ]

        #al
        sous_ech = [6000122, 6000865, 6001256]
        # typ_men
        sous_ech = [6006630, 6006753, 6008508]
        # foy
        sous_ech = [6036028, 6028397, 6019248]

        sous_ech = None
        simu.set_survey(num_table=3, subset=sous_ech)
        simu.compute()

        agg3 = Aggregates()
        for ent in ['ind', 'men', 'foy', 'fam']:
            tab = simu.output_table.table3[ent]
            renam = {}
            renam['wprm_' + ent] = 'wprm'
            tab = tab.rename(columns=renam)
        agg3.set_simulation(simu)
        agg3.compute()

        fin3 = time.clock()

        #        if writer is None:
        #            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg3.xlsx')
        agg3.aggr_frame.to_excel(fname_all, yr, index=False, header=True)

        # export to csv to run compar in R
        for ent in ['ind', 'men', 'foy', 'fam']:
            dir_name = destination_dir + ent + '.csv'
            tab = simu.output_table.table3[ent]
            renam = {}
            renam['wprm_' + ent] = 'wprm'
            if ent == 'ind':
                ident = [
                    "idmen", "quimen", "idfam", "quifam", "idfoy", "quifoy"
                ]
            else:
                ident = ["idmen", "idfam", "idfoy"]
            for nom in ident:
                renam[nom + '_' + ent] = nom
            tab = tab.rename(columns=renam)
            order_var = ident + list(tab.columns - ident)
            tab.sort(['idmen', 'idfam',
                      'idfoy']).ix[:num_output, order_var].to_csv(dir_name)

        deb1 = time.clock()
        simu.set_survey(num_table=1, subset=sous_ech)
        simu.compute()

        agg = Aggregates()
        agg.set_simulation(simu)
        agg.compute()

        fin1 = time.clock()

        # export to csv to run compar in R
        dir_name = destination_dir + 'en1' + '.csv'
        tab = simu.output_table.table
        tab.drop([
            'idfam_fam', 'idfam_foy', 'idfam_men', 'idfoy_fam', 'idfoy_foy',
            'idfoy_men', 'idmen_men', 'idmen_fam', 'idmen_foy', 'wprm_foy',
            'wprm_fam'
        ],
                 axis=1,
                 inplace=True)
        renam = {}
        ent = 'ind'
        renam['wprm_' + ent] = 'wprm'
        ident = [
            "noi", "idmen", "quimen", "idfam", "quifam", "idfoy", "quifoy"
        ]
        for nom in ident:
            renam[nom + '_' + ent] = nom
        tab = tab.rename(columns=renam)
        order_var = ident + list(tab.columns - ident)
        tab.sort(['idmen', 'idfam', 'idfoy']).ix[:num_output,
                                                 order_var].to_csv(dir_name)

        #        if writer is None:
        #            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg1.xlsx')
        agg.aggr_frame.to_excel(fname_all, yr, index=False, header=True)
        del simu
        del agg
        import gc
        gc.collect()
        tot1 += fin1 - deb1
        tot3 += fin3 - deb3
        print "Time to process 1 table :" + str(fin1 - deb1)
        print "Time to process 3 table :" + str(fin3 - deb3)
    print tot1, tot3, tot3 - tot1

Example #20

0

Show file

File: check_num_table.py Project: Iliato/openfisca-qt

def compar_num_table():

    writer = None
    years = range(2006,2007)
    tot1 = 0
    tot3 = 0
    filename = destination_dir+'output3.h5'
    store = HDFStore(filename)
    for year in years:
        yr = str(year)
#        fname = "Agg_%s.%s" %(str(yr), "xls")
        simu = SurveySimulation()
        simu.set_config(year = yr)
        simu.set_param()
        import time


        deb3 = time.clock()
        sous_ech =  [6000080, 6000080, 6000195, 6000195, 6000288, 6000288, 6000499, 6000499, 6000531, 6000531, 6000542, 6000542]
        sous_ech =  [6000191, 6000191, 6000531, 6000614, 6000195, 6000195, 6000499, 6000499, 6000531, 6000614, 6000531,
        6000614, 6000531, 6000531, 6000195, 6000195, 6000288, 6000288, 6000499, 6000499, 6000531, 6000542,
         6000542, 6000614, 6000191]

        #al
        sous_ech =  [6000122, 6000865, 6001256]
        # typ_men
        sous_ech =  [6006630, 6006753, 6008508]
        # foy
        sous_ech =  [6036028, 6028397, 6019248]

        sous_ech = None
        simu.set_survey(num_table=3, subset=sous_ech)
        simu.compute()

        agg3 = Aggregates()
        for ent in ['ind','men','foy','fam']:
            tab = simu.output_table.table3[ent]
            renam={}
            renam['wprm_'+ent] = 'wprm'
            tab = tab.rename(columns=renam)
        agg3.set_simulation(simu)
        agg3.compute()

        fin3  = time.clock()


#        if writer is None:
#            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg3.xlsx')
        agg3.aggr_frame.to_excel(fname_all, yr, index= False, header= True)



        # export to csv to run compar in R
        for ent in ['ind','men','foy','fam']:
            dir_name = destination_dir + ent +'.csv'
            tab = simu.output_table.table3[ent]
            renam ={}
            renam['wprm_'+ent] = 'wprm'
            if ent=='ind':
                ident = ["idmen","quimen","idfam","quifam","idfoy","quifoy"]
            else:
                ident = ["idmen","idfam","idfoy"]
            for nom in ident:
                renam[nom+'_'+ent] = nom
            tab = tab.rename(columns=renam)
            order_var = ident+list(tab.columns - ident)
            tab.sort(['idmen','idfam','idfoy']).ix[:num_output,order_var].to_csv(dir_name)


        deb1 = time.clock()
        simu.set_survey(num_table=1, subset=sous_ech)
        simu.compute()

        agg = Aggregates()
        agg.set_simulation(simu)
        agg.compute()

        fin1  = time.clock()

        # export to csv to run compar in R
        dir_name = destination_dir + 'en1' +'.csv'
        tab = simu.output_table.table
        tab.drop(['idfam_fam','idfam_foy','idfam_men','idfoy_fam','idfoy_foy','idfoy_men','idmen_men','idmen_fam','idmen_foy','wprm_foy','wprm_fam'],
                        axis=1, inplace=True)
        renam ={}
        ent = 'ind'
        renam['wprm_'+ent] = 'wprm'
        ident = ["noi","idmen","quimen","idfam","quifam","idfoy","quifoy"]
        for nom in ident:
            renam[nom+'_'+ent] = nom
        tab = tab.rename(columns=renam)
        order_var = ident+list(tab.columns - ident)
        tab.sort(['idmen','idfam','idfoy']).ix[:num_output,order_var].to_csv(dir_name)


#        if writer is None:
#            writer = ExcelWriter(str(fname_all))
        fname_all = os.path.join(destination_dir, 'agg1.xlsx')
        agg.aggr_frame.to_excel(fname_all, yr, index= False, header= True)
        del simu
        del agg
        import gc
        gc.collect()
        tot1 += fin1 - deb1
        tot3 += fin3 - deb3
        print "Time to process 1 table :" +str(fin1 - deb1)
        print "Time to process 3 table :" +str(fin3 - deb3)
    print tot1, tot3, tot3- tot1

Example #21

0

Show file

File: recap.py Project: stanislasrybak/openfisca-qt

    def _reshape_tables(self):
        """
        TODO _reshape_tables should be cleaned !!!
        """
        dfs = self.aggregates_of_dataframe
        dfs_erf = self.aggregates_erfs_dataframe
        labels_variables = self.labels_variables

        agg = Aggregates()

        # We need this for the columns labels to work

        print 'Resetting index to avoid later trouble on manipulation'
        for d in dfs:
            d.reset_index(inplace=True)
            d.set_index('Mesure', inplace=True, drop=False)
            d.reindex_axis(labels_variables, axis=0)
            d.reset_index(inplace=True, drop=True)
#             print d.to_string()
        for d in dfs_erf:
            d.reset_index(inplace=True)
            d['Mesure'] = agg.labels['dep']
            d.set_index('index', inplace=True, drop=False)
            d.reindex_axis(agg.labels.values(), axis=0)
            d.reset_index(inplace=True, drop=True)
#             print d.to_string()

# Concatening the openfisca tables for =/= years

        temp = dfs[0]
        if len(dfs) != 1:
            for d in dfs[1:]:
                temp = pd.concat([temp, d], ignore_index=True)

        del temp[agg.labels['entity']], temp['index']
        gc.collect()

        print 'We split the real aggregates from the of table'
        temp2 = temp[[
            agg.labels['var'], agg.labels['benef_real'],
            agg.labels['dep_real'], 'year'
        ]]
        del temp[agg.labels['benef_real']], temp[agg.labels['dep_real']]
        sauvegarde = temp.columns.get_level_values(0).unique()
        temp['source'] = 'of'
        temp2['source'] = 'reel'
        temp2.rename(columns={
            agg.labels['benef_real']: agg.labels['benef'],
            agg.labels['dep_real']: agg.labels['dep']
        },
                     inplace=True)
        temp = pd.concat([temp, temp2], ignore_index=True)

        print 'We add the erf data to the table'
        for df in dfs_erf:
            del df['level_0'], df['Mesure']
            df.rename(columns={
                'index': agg.labels['var'],
                1: agg.labels['dep']
            },
                      inplace=True)

        temp3 = dfs_erf[0]
        if len(dfs) != 1:
            for d3 in dfs_erf[1:]:
                temp3 = pd.concat([temp3, d3], ignore_index=True)

        temp3['source'] = 'erfs'
        gc.collect()
        temp = pd.concat([temp, temp3], ignore_index=True)
        #         print temp.to_string()

        print 'Index manipulation to reshape the output'
        temp.reset_index(drop=True, inplace=True)
        # We set the new index
        #         temp.set_index('Mesure', drop = True, inplace = True)
        #         temp.set_index('source', drop = True, append = True, inplace = True)
        #         temp.set_index('year', drop = False, append = True, inplace = True)
        temp = temp.groupby(by=["Mesure", "source", "year"], sort=False).sum()
        # Tricky, the [mesure, source, year] index is unique so sum() will return the only value
        # Groupby automatically deleted the source, mesure... columns and added them to index
        assert (isinstance(temp, pd.DataFrame))
        #         print temp.to_string()

        # We want the years to be in columns, so we use unstack
        temp_unstacked = temp.unstack()
        # Unfortunately, unstack automatically sorts rows and columns, we have to reindex the table :

        ## Reindexing rows
        from pandas.core.index import MultiIndex
        indtemp1 = temp.index.get_level_values(0)
        indtemp2 = temp.index.get_level_values(1)
        indexi = zip(*[indtemp1, indtemp2])
        indexi_bis = []
        for i in xrange(0, len(indexi)):
            if indexi[i] not in indexi_bis:
                indexi_bis.append(indexi[i])
        indexi = indexi_bis
        del indexi_bis
        indexi = MultiIndex.from_tuples(indexi, names=['Mesure', 'source'])
        #         import pdb
        #         pdb.set_trace()
        temp_unstacked = temp_unstacked.reindex_axis(
            indexi, axis=0)  # axis = 0 for rows, 1 for columns

        ## Reindexing columns
        # TODO : still not working
        col_indexi = []
        print temp.columns
        for i in xrange(len(sauvegarde)):
            #         for col in temp.columns.get_level_values(0).unique():
            col = sauvegarde[i]
            for yr in self.years:
                col_indexi.append((col, yr))
        col_indexi = MultiIndex.from_tuples(col_indexi)
        #         print col_indexi
        #         print temp_unstacked.columns
        print col_indexi
        print temp_unstacked.columns
        temp_unstacked = temp_unstacked.reindex_axis(col_indexi, axis=1)

        # Our table is ready to be turned to Excel worksheet !
        #         print temp_unstacked.to_string()
        del temp_unstacked['Mesure'], temp_unstacked['year']
        temp_unstacked.fillna(0, inplace=True)
        return temp_unstacked

Example #22

0

Show file

File: example_test.py Project: stanislasrybak/openfisca-qt

def test_laurence():
    '''
    Computes the openfisca/real numbers comparaison table in excel worksheet.

    Warning: To add more years you'll have to twitch the code manually.
    Default is years 2006 to 2009 included.
    '''
    def save_as_xls(df, alter_method=True):
        # Saves a datatable under Excel table using XLtable
        if alter_method:
            filename = "C:\desindexation.xls"
            print filename
            writer = ExcelWriter(str(filename))
            df.to_excel(writer)
            writer.save()
        else:
            # XLtable utile pour la mise en couleurs, reliefs, etc. de la table, inutile sinon
            stxl = XLtable(df)
            # <========== HERE TO CHANGE OVERLAY ======>
            wb = xlwt.Workbook()
            ws = wb.add_sheet('resultatstest')
            erfxcel = stxl.place_table(ws)
            try:  # I dunno more clever commands
                wb.save("C:\outputtest.xls")
            except:
                n = random.randint(0, 100)
                wb.save("C:\outputtest_" + str(n) + ".xls")

#===============================================================================
#     from numpy.random import randn
#     mesures = ['cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add',
#                'cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add',
#                'cotsoc','af', 'add', 'cotsoc','af', 'add', 'cotsoc','af', 'add']
#     sources = ['of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel',
#                'of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel',
#                'of', 'of', 'of', 'erfs', 'erfs', 'erfs', 'reel', 'reel', 'reel']
#     year = ['2006', '2006', '2006', '2006', '2006', '2006', '2006', '2006', '2006',
#             '2007', '2007', '2007', '2007', '2007', '2007', '2007', '2007', '2007',
#             '2008', '2008', '2008', '2008', '2008', '2008', '2008', '2008', '2008']
#     ind = zip(*[mesures,sources, year])
# #     print ind
#     from pandas.core.index import MultiIndex
#     ind = MultiIndex.from_tuples(ind, names = ['mesure', 'source', 'year'])
# #     print ind
#     d = pd.DataFrame(randn(27,2), columns = ['Depenses', 'Recettes'], index = ind)
#     d.reset_index(inplace = True, drop = False)
#     d = d.groupby(by = ['mesure', 'source', 'year'], sort = False).sum()
#     print d
#     d_unstacked = d.unstack()
#     print d
#     indtemp1 = d.index.get_level_values(0)
#     indtemp2 = d.index.get_level_values(1)
#     indexi = zip(*[indtemp1, indtemp2])
#     print indexi
#     indexi_bis = []
#     for i in xrange(len(indexi)):
#         if indexi[i] not in indexi_bis:
#             indexi_bis.append(indexi[i])
#     indexi = indexi_bis
#     indexi = MultiIndex.from_tuples(indexi, names = ['Mesure', 'source'])
#     print indexi
#     d_unstacked = d_unstacked.reindex_axis(indexi, axis = 0)
#     print d_unstacked.to_string()
#     save_as_xls(d_unstacked)
#     return
#===============================================================================

    def reshape_tables(dfs, dfs_erf):
        agg = Aggregates()

        # We need this for the columns labels to work

        print 'Resetting index to avoid later trouble on manipulation'
        for d in dfs:
            d.reset_index(inplace=True)
            d.set_index('Mesure', inplace=True, drop=False)
            d.reindex_axis(labels_variables, axis=0)
            d.reset_index(inplace=True, drop=True)
#             print d.to_string()
        for d in dfs_erf:
            d.reset_index(inplace=True)
            d['Mesure'] = agg.labels['dep']
            d.set_index('index', inplace=True, drop=False)
            d.reindex_axis(agg.labels.values(), axis=0)
            d.reset_index(inplace=True, drop=True)


#             print d.to_string()

# Concatening the openfisca tables for =/= years
        temp = pd.concat([dfs[0], dfs[1]], ignore_index=True)
        temp = pd.concat([temp, dfs[2]], ignore_index=True)
        temp = pd.concat([temp, dfs[3]], ignore_index=True)
        del temp[agg.labels['entity']], temp['index']
        gc.collect()

        print 'We split the real aggregates from the of table'
        temp2 = temp[[
            agg.labels['var'], agg.labels['benef_real'],
            agg.labels['dep_real'], 'year'
        ]]
        del temp[agg.labels['benef_real']], temp[agg.labels['dep_real']]
        temp['source'] = 'of'
        temp2['source'] = 'reel'
        temp2.rename(columns={
            agg.labels['benef_real']: agg.labels['benef'],
            agg.labels['dep_real']: agg.labels['dep']
        },
                     inplace=True)
        temp = pd.concat([temp, temp2], ignore_index=True)

        print 'We add the erf data to the table'
        for df in dfs_erf:
            del df['level_0'], df['Mesure']
            df.rename(columns={
                'index': agg.labels['var'],
                1: agg.labels['dep']
            },
                      inplace=True)
        temp3 = pd.concat([dfs_erf[0], dfs_erf[1]], ignore_index=True)
        temp3 = pd.concat([temp3, dfs_erf[2]], ignore_index=True)
        temp3 = pd.concat([temp3, dfs_erf[3]], ignore_index=True)
        temp3['source'] = 'erfs'
        gc.collect()
        temp = pd.concat([temp, temp3], ignore_index=True)
        #         print temp.to_string()

        print 'Index manipulation to reshape the output'
        temp.reset_index(drop=True, inplace=True)
        # We set the new index
        #         temp.set_index('Mesure', drop = True, inplace = True)
        #         temp.set_index('source', drop = True, append = True, inplace = True)
        #         temp.set_index('year', drop = False, append = True, inplace = True)
        temp = temp.groupby(by=["Mesure", "source", "year"], sort=False).sum()
        # Tricky, the [mesure, source, year] index is unique so sum() will return the only value
        # Groupby automatically deleted the source, mesure... columns and added them to index
        assert (isinstance(temp, pd.DataFrame))
        #         print temp.to_string()

        # We want the years to be in columns, so we use unstack
        temp_unstacked = temp.unstack()
        # Unfortunately, unstack automatically sorts rows and columns, we have to reindex the table :

        ## Reindexing rows
        from pandas.core.index import MultiIndex
        indtemp1 = temp.index.get_level_values(0)
        indtemp2 = temp.index.get_level_values(1)
        indexi = zip(*[indtemp1, indtemp2])
        indexi_bis = []
        for i in xrange(0, len(indexi)):
            if indexi[i] not in indexi_bis:
                indexi_bis.append(indexi[i])
        indexi = indexi_bis
        del indexi_bis
        indexi = MultiIndex.from_tuples(indexi, names=['Mesure', 'source'])
        #         import pdb
        #         pdb.set_trace()
        temp_unstacked = temp_unstacked.reindex_axis(
            indexi, axis=0)  # axis = 0 for rows, 1 for columns

        ## Reindexing columns
        # TODO : still not working
        col_indexi = []
        for col in temp.columns.get_level_values(0).unique():
            for yr in range(2006, 2010):
                col_indexi.append((col, str(yr)))
        col_indexi = MultiIndex.from_tuples(col_indexi)
        #         print col_indexi
        #         print temp_unstacked.columns
        print col_indexi
        #         temp_unstacked = temp_unstacked.reindex_axis(col_indexi, axis = 1)

        # Our table is ready to be turned to Excel worksheet !
        print temp_unstacked.to_string()
        temp_unstacked.fillna(0, inplace=True)
        return temp_unstacked

    dfs = []
    dfs_erf = []
    for i in range(2006, 2010):
        year = i
        yr = str(i)
        # Running a standard SurveySim to get aggregates
        simulation = SurveySimulation()
        survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5')
        simulation.set_config(year=yr, survey_filename=survey_filename)
        simulation.set_param()
        simulation.compute()
        agg = Aggregates()
        agg.set_simulation(simulation)
        agg.compute()
        df = agg.aggr_frame
        df['year'] = year
        label_by_name = dict(
            (name, column.label) for name, column in
            simulation.output_table.column_by_name.iteritems())
        #colonnes = simulation.output_table.table.columns
        dfs.append(df)
        variables = agg.varlist
        labels_variables = [label_by_name[variable] for variable in variables]
        del simulation, agg, df
        gc.collect()

        #Getting ERF aggregates from ERF table
        temp = build_erf_aggregates(variables=variables, year=year)
        temp.rename(columns=label_by_name, inplace=True)
        temp = temp.T
        temp.reset_index(inplace=True)
        temp['year'] = year
        dfs_erf.append(temp)
        del temp
        gc.collect()
        print 'Out of data fetching for year ' + str(year)
    print 'Out of data fetching'

    datatest = reshape_tables(dfs, dfs_erf)
    save_as_xls(datatest, alter_method=False)