def run(years_calage):
    import time
    year_data_list = [1995, 2000, 2005, 2011]
    for year_calage in years_calage:
        start = time.time()
        run_all(year_calage, year_data_list)
        log.info("Finished {}".format(time.time() - start))
        print "Base construite pour l'année {} à partir de l'enquête bdf {}".format(
            year_calage, find_nearest_inferior(year_data_list, year_calage)
            )
def get_inflators_cn_to_cn(target_year):
    '''
        Calcule l'inflateur de vieillissement à partir des masses de comptabilité nationale.
    '''
    data_year = find_nearest_inferior(data_years, target_year)
    data_year_cn_aggregates = get_cn_aggregates(data_year)['consoCN_COICOP_{}'.format(data_year)].to_dict()
    target_year_cn_aggregates = get_cn_aggregates(target_year)['consoCN_COICOP_{}'.format(target_year)].to_dict()

    return dict(
        (key, target_year_cn_aggregates[key] / data_year_cn_aggregates[key])
        for key in data_year_cn_aggregates.keys()
        )
def get_inflators(target_year):
    '''
    Fonction qui calcule les ratios de calage (bdf sur cn pour année de données) et de vieillissement
    à partir des masses de comptabilité nationale et des masses de consommation de bdf.
    '''
    data_year = find_nearest_inferior(data_years, target_year)
    inflators_bdf_to_cn = get_inflators_bdf_to_cn(data_year)
    inflators_cn_to_cn = get_inflators_cn_to_cn(target_year)

    ratio_by_variable = dict()
    for key in inflators_cn_to_cn.keys():
        ratio_by_variable[key] = inflators_bdf_to_cn[key] * inflators_cn_to_cn[key]

    return ratio_by_variable
def run_all(year_calage = 2011, year_data_list = [1995, 2000, 2005, 2011]):

    temporary_store = TemporaryStore.create(file_name = "indirect_taxation_tmp")

    # Quelle base de données choisir pour le calage ?
    year_data = find_nearest_inferior(year_data_list, year_calage)

    # 4 étape parallèles d'homogénéisation des données sources :
    # Gestion des dépenses de consommation:
    build_depenses_homogenisees(year = year_data)
    build_imputation_loyers_proprietaires(year = year_data)

    depenses = temporary_store["depenses_bdf_{}".format(year_calage)]
    depenses.index = depenses.index.astype(ident_men_dtype)
    depenses_by_grosposte = temporary_store["depenses_by_grosposte_{}".format(year_calage)]
    depenses_by_grosposte.index = depenses_by_grosposte.index.astype(str)

    # Gestion des véhicules:
    build_homogeneisation_vehicules(year = year_data)
    if year_calage != 1995:
        vehicule = temporary_store['automobile_{}'.format(year_data)]
        vehicule.index = vehicule.index.astype(ident_men_dtype)
    else:
        vehicule = None

    # Gestion des variables socio démographiques:
    build_homogeneisation_caracteristiques_sociales(year = year_data)
    menage = temporary_store['donnes_socio_demog_{}'.format(year_data)]
    menage.index = menage.index.astype(ident_men_dtype)

    # Gestion des variables revenus:
    build_homogeneisation_revenus_menages(year = year_data)
    revenus = temporary_store["revenus_{}".format(year_calage)]
    revenus.index = revenus.index.astype(ident_men_dtype)

    temporary_store.close()

    # Concaténation des résultas de ces 4 étapes
    preprocessed_data_frame_by_name = dict(
        revenus = revenus,
        vehicule = vehicule,
        menage = menage,
        depenses = depenses,
        depenses_by_grosposte = depenses_by_grosposte
        )

    for name, preprocessed_data_frame in preprocessed_data_frame_by_name.iteritems():
        assert preprocessed_data_frame.index.name == 'ident_men', \
            'Index is labelled {} instead of ident_men in data frame {} for year {}'.format(
                preprocessed_data_frame.index.name, name, year_data)
        assert len(preprocessed_data_frame) != 0, 'Empty data frame {}'.format(name)
        assert preprocessed_data_frame.index.dtype == numpy.dtype('O'), "index for {} is {}".format(
            name, preprocessed_data_frame.index.dtype)

    data_frame = pandas.concat(
        preprocessed_data_frame_by_name.values(),
        axis = 1,
        )

    if year_data == 2005:
        for vehicule_variable in ['veh_tot', 'veh_essence', 'veh_diesel', 'pourcentage_vehicule_essence']:
            data_frame.loc[data_frame[vehicule_variable].isnull(), vehicule_variable] = 0
        for variable in ['age{}'.format(i) for i in range(3, 14)] + ['agecj', 'agfinetu', 'agfinetu_cj', 'nenfhors']:
            data_frame.loc[data_frame[variable].isnull(), variable] = 0
    if year_data == 2011:
        for vehicule_variable in ['veh_tot', 'veh_essence', 'veh_diesel', 'pourcentage_vehicule_essence',
        'rev_disp_loyerimput', 'rev_disponible', 'loyer_impute']:
            data_frame.loc[data_frame[vehicule_variable].isnull(), vehicule_variable] = 0
    # 'ratio_loyer_impute',  'ratio_revenus' To be added

    data_frame.index.name = "ident_men"
    # TODO: Homogénéiser: soit faire en sorte que ident_men existe pour toutes les années
    # soit qu'elle soit en index pour toutes

    # On ne garde que les ménages métropolitaines
    if year_data == 2011:
        data_frame = data_frame.query('zeat != 0')

    try:
        data_frame.reset_index(inplace = True)
    except ValueError, e:
        log.info('ignoring reset_index because {}'.format(e))