Example #1
0
    def function(self, simulation, period):
        period = period.this_month
        effectif_entreprise = simulation.calculate('effectif_entreprise', period)
        apprenti = simulation.calculate('apprenti', period)
        contrat_de_travail_duree = simulation.calculate('contrat_de_travail_duree', period)
        contrat_de_travail_debut = simulation.calculate('contrat_de_travail_debut', period)
        contrat_de_travail_fin = simulation.calculate('contrat_de_travail_fin', period)
        coefficient_proratisation = simulation.calculate('coefficient_proratisation', period)
        smic_proratise = simulation.calculate('smic_proratise', period)
        salaire_de_base = simulation.calculate('salaire_de_base', period)

        # Cette aide est temporaire.
        # Si toutefois elle est reconduite et modifiée pour 2017, les dates et le montant seront à implémenter comme
        # des params xml.

        # jusqu’à 1,3 fois le Smic
        eligible_salaire = salaire_de_base <= (1.3 * smic_proratise)

        # pour les PME
        eligible_effectif = effectif_entreprise < 250

        # non cumulable avec l'aide pour la première embauche
        # qui est identique, si ce n'est qu'elle couvre tous les salaires
        non_cumulee = effectif_entreprise > 1

        eligible_contrat = and_(
            contrat_de_travail_debut >= datetime64("2016-01-18"),
            contrat_de_travail_debut <= datetime64("2016-12-31")
        )

        # Si CDD, durée du contrat doit être > 1 an
        eligible_duree = or_(
            # durée indéterminée
            contrat_de_travail_duree == 0,
            # durée déterminée supérieure à 1 an
            and_(
                # CDD
                contrat_de_travail_duree == 1,
                # > 6 mois
                (contrat_de_travail_fin - contrat_de_travail_debut).astype('timedelta64[M]') >= timedelta64(6, 'M')
                )
            )

        # Valable 2 ans seulement
        eligible_date = datetime64(period.offset(-24, 'month').start) < contrat_de_travail_debut

        eligible = (
            eligible_salaire * eligible_effectif * non_cumulee * eligible_contrat * eligible_duree *
            eligible_date * not_(apprenti)
            )
        # somme sur 24 mois, à raison de 500 € maximum par trimestre
        montant_max = 4000

        # Si le salarié est embauché à temps partiel,
        # l’aide est proratisée en fonction de sa durée de travail.
        # TODO cette multiplication par le coefficient de proratisation suffit-elle pour le cas du temps partiel ?
        # A tester
        return period, eligible * (montant_max / 24) * coefficient_proratisation
    def formula_2015_06_09(individu, period, parameters):
        effectif_entreprise = individu('effectif_entreprise', period)
        apprenti = individu('apprenti', period)
        contrat_de_travail_duree = individu('contrat_de_travail_duree', period)
        TypesContratDeTravailDuree = contrat_de_travail_duree.possible_values
        contrat_de_travail_debut = individu('contrat_de_travail_debut', period)
        contrat_de_travail_fin = individu('contrat_de_travail_fin', period)
        coefficient_proratisation = individu('coefficient_proratisation', period)
        exoneration_cotisations_employeur_jei = individu('exoneration_cotisations_employeur_jei', period)

        # Cette aide est temporaire.
        # TODO : Si toutefois elle est reconduite et modifiée pour 2017, les dates et le montant seront à
        # implémenter comme des params xml.

        eligible_contrat = and_(
            contrat_de_travail_debut >= datetime64("2015-06-09"),
            contrat_de_travail_debut <= datetime64("2016-12-31")
            )

        # Si CDD, durée du contrat doit être > 1 an
        eligible_duree = or_(
            # durée indéterminée
            contrat_de_travail_duree == TypesContratDeTravailDuree.cdi,
            # durée déterminée supérieure à 1 an
            and_(
                contrat_de_travail_duree == TypesContratDeTravailDuree.cdd,
                # > 6 mois
                (contrat_de_travail_fin - contrat_de_travail_debut).astype('timedelta64[M]') >= timedelta64(6, 'M')
                # Initialement, la condition était d'un contrat >= 12 mois,
                # pour les demandes transmises jusqu'au 26 janvier.
                )
            )

        eligible_date = datetime64(period.offset(-24, 'month').start) < contrat_de_travail_debut
        eligible = \
            (effectif_entreprise == 1) * not_(apprenti) * eligible_contrat * eligible_duree * eligible_date

        # somme sur 24 mois, à raison de 500 € maximum par trimestre
        montant_max = 4000

        # non cumul avec le dispositif Jeune Entreprise Innovante (JEI)
        non_cumulee = not_(exoneration_cotisations_employeur_jei)

        # TODO comment implémenter la condition "premier employé" ? L'effectif est insuffisant en cas de rupture
        # d'un premier contrat
        # Condition : l’entreprise n’a pas conclu de contrat de travail avec un salarié,
        # au-delà de la période d’essai, dans les 12 mois précédant la nouvelle
        # embauche.

        # Si le salarié est embauché à temps partiel,
        # l’aide est proratisée en fonction de sa durée de travail.
        # TODO cette multiplication par le coefficient de proratisation suffit-elle pour le cas du temps partiel ?
        # A tester
        return eligible * (montant_max / 24) * coefficient_proratisation * non_cumulee
Example #3
0
    def function(self, simulation, period):
        period = period.this_month

        # 1 si demandeur d'emploi
        activite = simulation.calculate('activite', period)

        # Indique que l'user a travaillé 5 ans au cours des 10 dernieres années.
        ass_precondition_remplie = simulation.calculate('ass_precondition_remplie', period)

        are_perceived_this_month = simulation.calculate('chomage_net', period)

        return period, and_(and_(activite == 1, ass_precondition_remplie), are_perceived_this_month == 0)
Example #4
0
    def function(self, simulation, period):
        period = period.this_month
        effectif_entreprise = simulation.calculate('effectif_entreprise', period)
        apprenti = simulation.calculate('apprenti', period)
        contrat_de_travail_duree = simulation.calculate('contrat_de_travail_duree', period)
        contrat_de_travail_debut = simulation.calculate('contrat_de_travail_debut', period)
        contrat_de_travail_fin = simulation.calculate('contrat_de_travail_fin', period)
        coefficient_proratisation = simulation.calculate('coefficient_proratisation', period)

        # Cette aide est temporaire.
        # TODO : Si toutefois elle est reconduite et modifiée pour 2017, les dates et le montant seront à
        # implémenter comme des params xml.

        eligible_contrat = and_(
            contrat_de_travail_debut >= datetime64("2015-06-09"),
            contrat_de_travail_debut <= datetime64("2016-12-31")
            )

        # Si CDD, durée du contrat doit être > 1 an
        eligible_duree = or_(
            # durée indéterminée
            contrat_de_travail_duree == 0,
            # durée déterminée supérieure à 1 an
            and_(
                contrat_de_travail_duree == 1,  # CDD
                # > 6 mois
                (contrat_de_travail_fin - contrat_de_travail_debut).astype('timedelta64[M]') >= timedelta64(6, 'M')
                # Initialement, la condition était d'un contrat >= 12 mois,
                # pour les demandes transmises jusqu'au 26 janvier.
                )
            )

        eligible_date = datetime64(period.offset(-24, 'month').start) < contrat_de_travail_debut
        eligible = \
            (effectif_entreprise == 1) * not_(apprenti) * eligible_contrat * eligible_duree * eligible_date

        # somme sur 24 mois, à raison de 500 € maximum par trimestre
        montant_max = 4000

        # TODO comment implémenter la condition "premier employé" ? L'effectif est insuffisant en cas de rupture
        # d'un premier contrat
        # Condition : l’entreprise n’a pas conclu de contrat de travail avec un salarié,
        # au-delà de la période d’essai, dans les 12 mois précédant la nouvelle
        # embauche.

        # Si le salarié est embauché à temps partiel,
        # l’aide est proratisée en fonction de sa durée de travail.
        # TODO cette multiplication par le coefficient de proratisation suffit-elle pour le cas du temps partiel ?
        # A tester
        return period, eligible * (montant_max / 24) * coefficient_proratisation
Example #5
0
    def formula_2009_04(famille, period, parameters):
        '''
        Prime de solidarité active (exceptionnelle, 200€ versés une fois en avril 2009)
        Versement en avril 2009 d’une prime de solidarité active (Psa) aux familles modestes qui ont bénéficié
        en janvier, février ou mars 2009 du Rmi, de l’Api (du Rsa expérimental, du Cav ou du Rma pour
        les ex-bénéficiaires du Rmi ou de l’Api), de la prime forfaitaire mensuelle au titre du Rmi ou de l’Api
        ou enfin d’une aide au logement (à condition d’exercer une activité professionnelle et
        d’être âgé de plus de 25 ans ou d’avoir au moins un enfant à charge).
        La Psa, prime exceptionnelle, s’élève à 200 euros par foyer bénéficiaire.
        '''
        P = parameters(period).prestations.minima_sociaux.rmi
        api = famille('api', period)
        rsa = famille('rsa', period)
        af_nbenf = famille('af_nbenf', period)
        aide_logement = famille('aide_logement', period)

        personne_en_activite_i = (famille.members('activite', period) == TypesActivite.actif)
        parent_en_activite = famille.any(personne_en_activite_i, role = Famille.PARENT)

        dummy_api = api > 0
        dummy_rmi = rsa > 0
        dummy_al = and_(aide_logement > 0, or_(af_nbenf > 0, parent_en_activite))
        condition = (dummy_api + dummy_rmi + dummy_al > 0)
        psa = condition * P.psa
        return psa
Example #6
0
    def function_2009(self, simulation, period):
        """
        Prime de solidarité active (exceptionnelle, 200€ versés une fois en avril 2009)
        Versement en avril 2009 d’une prime de solidarité active (Psa) aux familles modestes qui ont bénéficié
        en janvier, février ou mars 2009 du Rmi, de l’Api (du Rsa expérimental, du Cav ou du Rma pour
        les ex-bénéficiaires du Rmi ou de l’Api), de la prime forfaitaire mensuelle au titre du Rmi ou de l’Api
        ou enfin d’une aide au logement (à condition d’exercer une activité professionnelle et
        d’être âgé de plus de 25 ans ou d’avoir au moins un enfant à charge).
        La Psa, prime exceptionnelle, s’élève à 200 euros par foyer bénéficiaire.
        """
        period = period.start.offset("first-of", "month").period("month")
        api = simulation.calculate("api", period)
        rsa = simulation.calculate("rsa", period)
        activite_holder = simulation.compute("activite", period)
        af_nbenf = simulation.calculate("af_nbenf", period)

        aide_logement = simulation.calculate("aide_logement", period)
        P = simulation.legislation_at(period.start).minim.rmi

        activite = self.split_by_roles(activite_holder, roles=[CHEF, PART])
        dummy_api = api > 0
        dummy_rmi = rsa > 0
        dummy_al = and_(aide_logement > 0, or_(af_nbenf > 0, or_(activite[CHEF] == 0, activite[PART] == 0)))
        condition = dummy_api + dummy_rmi + dummy_al > 0
        psa = condition * P.psa
        return period, psa
Example #7
0
def _enceinte_fam(self, agem_holder, enceinte_holder):
    agem_enf = self.split_by_roles(agem_holder, roles = ENFS)
    enceinte = self.split_by_roles(enceinte_holder, roles = [CHEF, PART])

    benjamin = age_en_mois_benjamin(agem_enf)
    enceinte_compat = and_(benjamin < 0, benjamin > -6)
    return or_(or_(enceinte_compat, enceinte[CHEF]), enceinte[PART])
Example #8
0
    def function(self, simulation, period):
        period = period.start.offset('first-of', 'month').period('month')

        activite = simulation.calculate('activite', period)
        ass_precondition_remplie = simulation.calculate('ass_precondition_remplie', period)

        return period, and_(activite == 1, ass_precondition_remplie)
Example #9
0
    def formula(famille, period):
        enceinte_i = famille.members('enceinte', period)
        parent_enceinte = famille.any(enceinte_i, role = Famille.PARENT)

        age_en_mois_i = famille.members('age_en_mois', period)
        age_en_mois_enfant = famille.min(age_en_mois_i, role = Famille.ENFANT)

        enceinte_compat = and_(age_en_mois_enfant < 0, age_en_mois_enfant > -6)
        return parent_enceinte + enceinte_compat
Example #10
0
    def formula(individu, period, parameters):
        age_max = parameters(period).prestations.minima_sociaux.ass.age_max
        sous_age_limite = individu('age_en_mois', period) <= age_max

        demandeur_emploi_non_indemnise = and_(individu('activite', period) == TypesActivite.chomeur, individu('chomage_net', period) == 0)

        # Indique que l'individu a travaillé 5 ans au cours des 10 dernieres années.
        ass_precondition_remplie = individu('ass_precondition_remplie', period)

        return demandeur_emploi_non_indemnise * ass_precondition_remplie * sous_age_limite
Example #11
0
    def function(self, simulation, period):
        period = period
        age_en_mois_holder = simulation.compute('age_en_mois', period)
        enceinte_holder = simulation.compute('enceinte', period)

        age_en_mois_enf = self.split_by_roles(age_en_mois_holder, roles = ENFS)
        enceinte = self.split_by_roles(enceinte_holder, roles = [CHEF, PART])

        benjamin = age_en_mois_benjamin(age_en_mois_enf)
        enceinte_compat = and_(benjamin < 0, benjamin > -6)
        return period, or_(or_(enceinte_compat, enceinte[CHEF]), enceinte[PART])
Example #12
0
    def function(self, simulation, period):
        period = period.this_month
        chomage_net_m_1 = simulation.calculate('chomage_net', period.offset(-1))
        chomage_net_m_2 = simulation.calculate('chomage_net', period.offset(-2))
        revenus_activite_pro = simulation.calculate_add('salaire_imposable', period.n_2)
        taux_abattement = simulation.legislation_at(period.start).prestations.aides_logement.ressources.abattement_chomage_indemnise
        taux_frais_pro = simulation.legislation_at(period.start).impot_revenu.tspr.abatpro.taux

        abattement = and_(chomage_net_m_1 > 0, chomage_net_m_2 > 0) * taux_abattement * revenus_activite_pro
        abattement = round_((1 - taux_frais_pro) * abattement)

        return period, abattement
        def loyer_retenu():
            # loyer mensuel réel, multiplié par 2/3 pour les meublés
            L1 = round((statut_occupation == 5) * loyer * 2 / 3 + (statut_occupation != 5) * loyer, 2)

            # taux à appliquer sur le loyer plafond
            taux_loyer_plafond = (and_(not_(coloc), not_(chambre)) * 1
                                 + chambre * al.loyers_plafond.chambre
                                 + not_(chambre) * coloc * al.loyers_plafond.colocation)

            loyer_plafond_personne_seule = or_(personne_seule * (al_pac == 0), chambre)
            loyer_plafond_famille = not_(loyer_plafond_personne_seule) * (al_pac > 0)
            loyer_plafond_couple = and_(not_(loyer_plafond_famille), not_(loyer_plafond_personne_seule))

            z1 = al.loyers_plafond.zone1
            z2 = al.loyers_plafond.zone2
            z3 = al.loyers_plafond.zone3

            Lz1 = (
                loyer_plafond_personne_seule * z1.L1 +
                loyer_plafond_couple * z1.L2 +
                loyer_plafond_famille * (z1.L3 + (al_pac > 1) * (al_pac - 1) * z1.L4)
                )
            Lz2 = (
                loyer_plafond_personne_seule * z2.L1 +
                loyer_plafond_couple * z2.L2 +
                loyer_plafond_famille * (z2.L3 + (al_pac > 1) * (al_pac - 1) * z2.L4)
                )
            Lz3 = (
                loyer_plafond_personne_seule * z3.L1 +
                loyer_plafond_couple * z3.L2 +
                loyer_plafond_famille * (z3.L3 + (al_pac > 1) * (al_pac - 1) * z3.L4)
                )

            L2 = Lz1 * (zone_apl == 1) + Lz2 * (zone_apl == 2) + Lz3 * (zone_apl == 3)
            L2 = round(L2 * taux_loyer_plafond, 2)

            # loyer retenu
            L = min_(L1, L2)

            return L
    def function(self, simulation, period):
        period = period.start.offset('first-of', 'month').period('month')
        two_years_ago = period.start.offset('first-of', 'year').period('year').offset(-2)
        chomage_net_m_1 = simulation.calculate('chonet', period.offset(-1))
        chomage_net_m_2 = simulation.calculate('chonet', period.offset(-2))
        revenus_activite_pro = simulation.calculate('salaire_imposable', two_years_ago)
        taux_abattement = simulation.legislation_at(period.start).al.ressources.abattement_chomage_indemnise

        abattement = and_(chomage_net_m_1 > 0, chomage_net_m_2 > 0) * taux_abattement * revenus_activite_pro

        params_abattement_frais_pro = simulation.legislation_at(period.start).ir.tspr.abatpro
        abattement = round((1 - params_abattement_frais_pro.taux) * abattement)

        return period, abattement
Example #15
0
    def function(self, simulation, period):
        period = period.this_month
        mois_precedent = period.offset(-1)
        last_day_reference_year = period.n_2.stop
        base_ressources_defaut = simulation.calculate('aide_logement_base_ressources_defaut', period)
        base_ressources_eval_forfaitaire = simulation.calculate(
            'aide_logement_base_ressources_eval_forfaitaire', period)
        en_couple = simulation.calculate('en_couple', period)
        aah_holder = simulation.compute('aah', mois_precedent)
        aah = self.sum_by_entity(aah_holder, roles = [CHEF, PART])
        age_holder = simulation.compute('age', period)
        age = self.split_by_roles(age_holder, roles = [CHEF, PART])
        smic_horaire_brut_n2 = simulation.legislation_at(last_day_reference_year).cotsoc.gen.smic_h_b
        salaire_imposable_holder = simulation.compute('salaire_imposable', period.offset(-1))
        somme_salaires = self.sum_by_entity(salaire_imposable_holder, roles = [CHEF, PART])

        plafond_eval_forfaitaire = 1015 * smic_horaire_brut_n2

        plafond_salaire_jeune_isole = simulation.legislation_at(period.start).prestations.aides_logement.ressources.dar_8
        plafond_salaire_jeune_couple = simulation.legislation_at(period.start).prestations.aides_logement.ressources.dar_9
        plafond_salaire_jeune = where(en_couple, plafond_salaire_jeune_couple, plafond_salaire_jeune_isole)

        neutral_jeune = or_(age[CHEF] < 25, and_(en_couple, age[PART] < 25))
        neutral_jeune &= somme_salaires < plafond_salaire_jeune

        eval_forfaitaire = base_ressources_defaut <= plafond_eval_forfaitaire
        eval_forfaitaire &= base_ressources_eval_forfaitaire > 0
        eval_forfaitaire &= aah == 0
        eval_forfaitaire &= not_(neutral_jeune)

        ressources = where(eval_forfaitaire, base_ressources_eval_forfaitaire, base_ressources_defaut)

        # Planchers de ressources pour étudiants
        # Seul le statut étudiant (et boursier) du demandeur importe, pas celui du conjoint
        Pr = simulation.legislation_at(period.start).prestations.aides_logement.ressources
        etudiant_holder = simulation.compute('etudiant', period)
        boursier_holder = simulation.compute('boursier', period)
        etudiant = self.split_by_roles(etudiant_holder, roles = [CHEF, PART])
        boursier = self.split_by_roles(boursier_holder, roles = [CHEF, PART])
        montant_plancher_ressources = max_(0, etudiant[CHEF] * Pr.dar_4 - boursier[CHEF] * Pr.dar_5)
        ressources = max_(ressources, montant_plancher_ressources)

        # Arrondi aux 100 euros supérieurs
        ressources = ceil(ressources / 100) * 100

        return period, ressources
Example #16
0
def _psa(api, rsa, activite, af_nbenf, al, _P, _option = {"activite" : [CHEF, PART]}):
    '''
    Prime de solidarité active (exceptionnelle, 200€ versés une fois en avril 2009)
    '''
    # Versement en avril 2009 d’une prime de solidarité active (Psa) aux familles modestes qui ont bénéficié en janvier,
    # février ou mars 2009 du Rmi, de l’Api (du Rsa expérimental, du Cav ou du Rma pour les ex-bénéficiaires du Rmi ou de l’Api),
    # de la prime forfaitaire mensuelle au titre du Rmi ou de l’Api
    # ou enfin d’une aide au logement (à condition d’exercer une activité professionnelle et d’être âgé de plus de 25 ans
    # ou d’avoir au moins un enfant à charge).
    # La Psa, prime exceptionnelle, s’élève à 200 euros par foyer bénéficiaire.
    dummy_api = api > 0
    dummy_rmi = rsa > 0
    dummy_al = and_(al > 0, or_(af_nbenf > 0, or_(activite[CHEF] == 0, activite[PART] == 0)))

    condition = (dummy_api + dummy_rmi + dummy_al > 0)

    P = _P.minim.rmi
    psa = condition * P.psa

    return psa
Example #17
0
def _nbI(self, alt, inv, quifoy):
    enfant_a_charge_garde_alternee_invalide = and_(quifoy >= 2, inv, alt)
    return self.sum_by_entity(enfant_a_charge_garde_alternee_invalide.astype(int16))
Example #18
0
def test(year=2006, variables = ['af']):
    simulation = SurveySimulation()
    survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5')
    simulation.set_config(year=year, survey_filename=survey_filename)
    simulation.set_param()
    simulation.compute()

#     of_aggregates = Aggregates()
#     of_aggregates.set_simulation(simulation)
#     of_aggregates.compute()
#     print of_aggregates.aggr_frame
#
#     from openfisca_france.data.erf.aggregates import build_erf_aggregates
#     temp = (build_erf_aggregates(variables=variables, year= year))
#     print temp
#     return
    variable= "af"
    debugger = Debugger()
    debugger.set_simulation(simulation)
    debugger.set_variable(variable)
    debugger.show_aggregates()





    def get_all_ancestors(varlist):
        if len(varlist) == 0:
            return []
        else:
            if varlist[0]._parents == set():
                return ([varlist[0]]
                      + get_all_ancestors(varlist[1:]))
            else:
                return ([varlist[0]]
                 + get_all_ancestors(list(varlist[0]._parents))
                  + get_all_ancestors(varlist[1:]))

    # We want to get all ancestors + children + the options that we're going to encounter
    parents = map(lambda x: simulation.output_table.column_by_name.get(x), variables)
    parents = get_all_ancestors(parents)
    options = []
    for varcol in parents:
        options.extend(varcol._option.keys())
    options = list(set(options))
    #print options
    parents = map(lambda x: x.name, parents)
    for var in variables:
        children = set()
        varcol = simulation.output_table.column_by_name.get(var)
        children = children.union(set(map(lambda x: x.name, varcol._children)))
    variables = list(set(parents + list(children)))
    #print variables
    del parents, children
    gc.collect()

    def get_var(variable):
        variables =[variable]
        return simulation.aggregated_by_entity(entity="men", variables=variables,
                                                all_output_vars = False, force_sum=True)[0]

    simu_aggr_tables = get_var(variables[0])
    for var in variables[1:]:
        simu_aggr_tables = simu_aggr_tables.merge(get_var(var)[['idmen', var]], on = 'idmen', how = 'outer')

    # We load the data from erf table in case we have to pick data there
    erf_data = DataCollection(year=year)
    os.system('cls')
    todo = set(variables + ["ident", "wprm"]).union(set(options))
    print 'Variables or equivalents to fetch :'
    print todo

    '''
    Méthode générale pour aller chercher les variables de l'erf/eec
    ( qui n'ont pas forcément le même nom
    et parfois sont les variables utilisées pour créér l'of ):
    1 - essayer le get_of2erf, ça doit marcher pour les variables principales ( au moins les aggrégats
    que l'on compare )
    Si les variables ne sont pas directement dans la table,
    elles ont été calculées à partir d'autres variables de données erf/eec
    donc chercher dans :
    2 - build_survey
    3 - model/model.py qui dira éventuellement dans quel module de model/ chercher
    Le 'print todo' vous indique quelles variables chercher
    ( attention à ne pas inclure les enfants directs )
    L'utilisation du Ctrl-H est profitable !
    '''

    fetch_eec = ['statut','titc','chpub','encadr','prosa','age','naim','naia','noindiv']
    fetch_erf = ['zsali','af','ident','wprm','noi','noindiv','quelfic']
    erf_df = erf_data.get_of_values(variables= fetch_erf, table="erf_indivi")
    eec_df = erf_data.get_of_values(variables= fetch_eec, table="eec_indivi")
    erf_eec_indivi = erf_df.merge(eec_df, on ='noindiv', how = 'inner' )
    assert 'quelfic' in erf_eec_indivi.columns, "quelfic not in erf_indivi columns"
    del eec_df, erf_df

    # We then get the aggregate variables for the menage ( mainly to compare with of )
    print 'Loading data from erf_menage table'
    erf_menage = erf_data.get_of_values(variables= list(todo) + ['quelfic'], table="erf_menage")

    del todo
    gc.collect()
    assert 'ident' in erf_menage.columns, "ident not in erf_menage.columns"

    from openfisca_france.data.erf import get_erf2of
    erf2of = get_erf2of()
    erf_menage.rename(columns = erf2of, inplace = True)

# We get the options from the simulation non aggregated tables:

    # First from the output_table
    # We recreate the noindiv in output_table
    simulation.output_table.table['noindiv'] = 100 * simulation.output_table.table.idmen_ind + simulation.output_table.table.noi_ind
    simulation.output_table.table['noindiv'] = simulation.output_table.table['noindiv'].astype(np.int64)
    s1 = [var for var in set(options).intersection(set(simulation.output_table.table.columns))] + ['idmen_ind', 'quimen_ind', 'noindiv']
    simu_nonaggr_tables = (simulation.output_table.table)[s1]
    simu_nonaggr_tables.rename(columns = {'idmen_ind' : 'idmen', 'quimen_ind':'quimen'}, inplace = True)
    assert 'noindiv' in simu_nonaggr_tables.columns

    # If not found, we dwelve into the input_table
    if (set(s1)- set(['idmen_ind', 'quimen_ind','noindiv'])) < set(options):
        assert 'noindiv' in simulation.input_table.table.columns, "'noindiv' not in simulation.input_table.table.columns"
        s2 = [var for var in (set(options).intersection(set(simulation.input_table.table.columns)) - set(s1))] + ['noindiv']
        #print s2
        temp = simulation.input_table.table[s2]
        simu_nonaggr_tables = simu_nonaggr_tables.merge(temp, on = 'noindiv', how = 'inner', sort = False)

        del s2, temp
    del s1
    gc.collect()

    simu_nonaggr_tables = simu_nonaggr_tables[list(set(options)) + ['idmen', 'quimen','noindiv']]
    #print options, variables
    assert 'idmen' in simu_nonaggr_tables.columns, 'Idmen not in simu_nonaggr_tables columns'

    # Check the idmens that are not common
    erf_menage.rename(columns = {'ident' : 'idmen'}, inplace = True)

    print "\n"
    print 'Checking if idmen is here...'
    print '\n ERF : '
    print 'idmen' in erf_menage.columns
    print "\n Simulation output"
    print 'idmen' in simu_aggr_tables.columns
    print "\n"

    #print 'Dropping duplicates of idmen for both tables...'
    assert not erf_menage["idmen"].duplicated().any(), "Duplicated idmen in erf_menage"
    #erf_menage.drop_duplicates('idmen', inplace = True)
    simu_aggr_tables.drop_duplicates('idmen', inplace = True)
    assert not simu_aggr_tables["idmen"].duplicated().any(), "Duplicated idmen in of"

    print 'Checking mismatching idmen... '
    s1 = set(erf_menage['idmen']) - (set(simu_aggr_tables['idmen']))
    if s1:
        print "idmen that aren't in simu_aggr_tables : %s" %str(len(s1))
        pass
    s2 = (set(simu_aggr_tables['idmen'])) - set(erf_menage['idmen'])
    if s2:
        print "idmen that aren't in erf_menage : %s" %str(len(s2))
        pass
    del s1, s2

    # Restrict to common idmens and merge
    s3 = set(erf_menage['idmen']).intersection(set(simu_aggr_tables['idmen']))
    print "Restricting to %s common idmen... \n" %str(len(s3))
    erf_menage = erf_menage[erf_menage['idmen'].isin(s3)]
    simu_aggr_tables = simu_aggr_tables[simu_aggr_tables['idmen'].isin(s3)]
    del s3
    gc.collect()

    #print erf_menage.columns
    #print simu_aggr_tables.columns

    # Compare differences across of and erf dataframes
    print "Comparing differences between dataframes... \n"
    colcom = (set(erf_menage.columns).intersection(set(simu_aggr_tables.columns))) - set(['idmen','wprm'])
    print 'Common variables: '
    print colcom
    erf_menage.reset_index(inplace = True)
    simu_aggr_tables.reset_index(inplace = True)
    for col in colcom:
        temp = set(erf_menage['idmen'][erf_menage[col] != simu_aggr_tables[col]])
        print "Numbers of idmen that aren't equal on variable %s : %s \n" %(col, str(len(temp)))
        del temp


    # Detect the biggest differences
    bigtable = merge(erf_menage, simu_aggr_tables, on = 'idmen', how = 'inner', suffixes=('_erf','_of'))
    print 'Length of new dataframe is %s' %str(len(bigtable))
    #print bigtable.columns
    bigtable.set_index('idmen', drop = False, inplace = True)

    already_met = []
    options_met = []

    for col in colcom:
        bigtemp = None
        table = bigtable[and_(bigtable[col+'_erf']!=0,bigtable[col+'_of']!=0)]
        table[col] = (table[col+'_erf'] - table[col+'_of']) / table[col+'_erf'] #Difference relative
        table[col] = table[col].apply(lambda x: abs(x))
        print 'Minimum difference between the two tables for %s is %s' %(col, str(table[col].min()))
        print 'Maximum difference between the two tables for %s is %s' %(col, str(table[col].max()))
        print table[col].describe()
        try:
            assert len(table[col]) == len(table['wprm_of']), "PINAGS"
            dec, values = mwp(table[col], np.arange(1,11), table['wprm_of'], 2, return_quantiles=True)
            #print sorted(values)
            dec, values = mwp(table[col], np.arange(1,101), table['wprm_erf'], 2, return_quantiles=True)
            #print sorted(values)[90:]
            del dec, values
            gc.collect()
        except:
            #print 'Weighted percentile method didnt work for %s' %col
            pass
        print "\n"

    # Show the relevant information for the most deviant households
        table.sort(columns = col, ascending = False, inplace = True)
        #print table[col][0:10].to_string()
        if bigtemp is None:
            bigtemp = {'table' : table[[col, col+'_of', col+'_erf', 'idmen']][0:10],
                       'options' : None}
        bigtemp['table'][col+'div'] = bigtemp['table'][col+'_of'] / bigtemp['table'][col+'_erf']
        print bigtemp['table'].to_string()

        '''
        bigtemp is the table which will get filled little by little by the relevant variables.
        Up to the last rows of code 'table' refers to a table of aggregated values,
        while 'options is a table of individual variables.
        The reason we call it in a dictionnary is also because we modify it inside the recursive function 'iter_on parents',
        and it causes an error in Python unless for certain types like dictionnary values.
        '''
        #print "\n"

        # If variable is a Prestation, we show the dependancies
        varcol = simulation.output_table.column_by_name.get(col)
        if isinstance(varcol, Prestation):

            '''
            For the direct children
            '''
            if not varcol._children is None:
                ch_to_fetch = list(varcol._children)
                ch_to_fetch = map(lambda x: x.name, ch_to_fetch)
                ch_fetched = []

                if set(ch_to_fetch) <= set(simu_aggr_tables.columns):
                    print "Variables which need %s to be computed :\n %s \n" %(col, str(ch_to_fetch))
                    for var in ch_to_fetch:
                        if var + '_of' in table.columns:
                            ch_fetched.append(var + '_of')
                        else:
                            ch_fetched.append(var)
                elif set(ch_to_fetch) <= set(simu_aggr_tables.columns).union(erf_menage.columns):
                    print "Variables which need %s to be computed (some missing picked in erf):\n %s \n" %(col, str(ch_to_fetch))
                    for var in ch_to_fetch:
                        if var in simu_aggr_tables.columns:
                            if var + '_of' in table.columns:
                                ch_fetched.append(var + '_of')
                        elif var + '_erf' in table.columns:
                                ch_fetched.append(var + '_erf')
                        else:
                            ch_fetched.append(var)
                else:
                    print "Variables which need %s to be computed (some missing):\n %s \n" %(col, str(ch_to_fetch))
                    for var in ch_to_fetch:

                        if var in simu_aggr_tables.columns:
                            if var + '_of' in table.columns:
                                ch_fetched.append(var + '_of')
                        elif var in erf_menage.columns:
                            if var + '_erf' in table.columns:
                                ch_fetched.append(var + '_erf')

                print table[[col] + ch_fetched][0:10]
                print "\n"
                del ch_to_fetch, ch_fetched

            '''
            For the parents
            '''
            def iter_on_parents(varcol):
                if (varcol._parents == set() and varcol._option == {}) or varcol.name in already_met:
                    return
                else:
                    par_to_fetch = list(varcol._parents)
                    par_to_fetch = map(lambda x: x.name, par_to_fetch)
                    par_fetched = []

                    if set(par_fetched) <= set(simu_aggr_tables.columns):
                        #print "Variables the prestation %s depends of :\n %s \n" %(varcol.name, str(par_fetched))
                        for var in par_fetched:
                            if var + '_of' in table.columns:
                                par_fetched.append(var + '_of')
                            else:
                                par_fetched.append(var)
                    elif set(par_fetched) <= set(simu_aggr_tables.columns).union(erf_menage.columns):
                        #print "Variables the prestation %s depends of (some missing picked in erf):\n %s \n" %(varcol.name,str(par_fetched))
                        for var in par_fetched:
                            if var in simu_aggr_tables.columns:
                                if var + '_of' in table.columns:
                                    par_fetched.append(var + '_of')
                            elif var + '_erf' in table.columns:
                                par_fetched.append(var + '_erf')
                            else:
                                par_fetched.append(var)
                    else:
                        for var in par_fetched:
                            if var in simu_aggr_tables.columns:
                                if var + '_of' in table.columns:
                                    par_fetched.append(var + '_of')
                            elif var in erf_menage.columns:
                                if var + '_erf' in table.columns:
                                    par_fetched.append(var + '_erf')
                        if len(par_fetched) > 0:
                            #print "Variables the prestation %s depends of (some missing):\n %s \n" %(varcol.name, str(par_fetched))
                            pass
                        else:
                            #print "Variables the prestation %s depends of couldn't be found :\n %s \n" %(varcol.name, str(par_fetched))
                            pass

                    if len(par_fetched) > 0:
                        temp = table[[col, 'idmen'] + par_fetched][0:10]
                        bigtemp['table'] = pd.merge(temp, bigtemp['table'], how = 'inner')
                        #print temp.to_string(), "\n"
                    if varcol._option != {} and not set(varcol._option.keys()) < set(options_met):
                        vars_to_fetch = list(set(varcol._option.keys())-set(options_met))
                        #print "and the options to current variable %s for the id's with strongest difference :\n %s \n" %(varcol.name, varcol._option.keys())
                        liste = [i for i in range(0,10)]
                        liste = map(lambda x: table['idmen'].iloc[x], liste)
                        temp = simu_nonaggr_tables[['idmen', 'quimen','noindiv']
                                                  + vars_to_fetch][simu_nonaggr_tables['idmen'].isin(table['idmen'][0:10])]

                        temp_sorted = temp[temp['idmen'] == liste[0]]
                        for i in xrange(1,10):
                            temp_sorted = temp_sorted.append(temp[temp['idmen'] == liste[i]])
                        if bigtemp['options'] is None:
                            bigtemp['options'] = temp_sorted
                            bigtemp['options'] = bigtemp['options'].merge(erf_eec_indivi, on = 'noindiv', how = 'outer')
                        else:
                            bigtemp['options'] = bigtemp['options'].merge(temp_sorted, on = ['noindiv','idmen','quimen'], how = 'outer')
#                         temp_sorted.set_index(['idmen',  'quimen'], drop = True, inplace = True) # If we do that
                        del temp, temp_sorted
                        gc.collect()

                    already_met.append(varcol.name)
                    options_met.extend(varcol._option.keys())
                    for var in varcol._parents:
                        iter_on_parents(var)

            iter_on_parents(varcol)
            # We merge the aggregate table with the option table ( for each individual in entity )
            bigtemp['table'] = bigtemp['table'].merge(bigtemp['options'],
                                                       how = 'left',
                                                        on = 'idmen',
                                                         suffixes = ('(agg)', '(ind)'))

            # Reshaping the table to group by descending error on col, common entities
            bigtemp['table'].sort(columns = ['af','quimen'], ascending = [False,True], inplace = True)
            bigtemp['table'] = bigtemp['table'].groupby(['idmen','quimen'], sort = False).sum()
            print "Table of values for %s dependencies : \n" %col
            print bigtemp['table'].to_string()
            del bigtemp['table'], bigtemp['options']
            gc.collect()
    def get_major_differences(self):
        self.build_columns_to_fetch()
        self.build_erf_data_frames()
        self.build_openfisca_data_frames()
        variable = self.variable
        erf_menages_data_frame = self.erf_menages_data_frame
        of_menages_data_frame = self.of_menages_data_frame
        merged_menage_data_frame = merge(
            erf_menages_data_frame[[variable, 'idmen']],
            of_menages_data_frame[[variable, 'idmen']],
            on = 'idmen',
            how = 'inner',
            suffixes = ('_erf', '_of')
            )
        log.info('Length of merged_menage_data_frameis {}'.format(len(merged_menage_data_frame)))
        merged_menage_data_frame.set_index('idmen', drop = False, inplace = True)
        table = merged_menage_data_frame[
            and_(
                merged_menage_data_frame[variable + '_erf'] != 0,
                merged_menage_data_frame[variable + '_of'] != 0
                )
            ]
        table[variable + "_rel_diff"] = (table[variable + '_of'] - table[variable + '_erf']) \
            / table[variable + '_erf']  # Difference relative
        log.info(
            "Minimum difference between the two tables for {} is {}".format(
                variable, str(table[variable + "_rel_diff"].min())
                )
            )
        log.info(
            "Maximum difference between the two tables for {} is {}".format(
                variable, str(table[variable + "_rel_diff"].max())
                )
            )
        table[variable + '_ratio'] = (
            table[variable + '_of'] / table[variable + '_erf']
            )
        log.info(table[variable + "_rel_diff"].describe())
        try:
            assert len(table[variable + "_rel_diff"]) == len(table['wprm_of']), "PINAGS"
            dec, values = mwp(
                table[variable + "_rel_diff"],
                np.arange(1, 11), table['wprm_of'],
                2,
                return_quantiles = True
                )
            log.info(sorted(values))
            dec, values = mwp(
                table[variable + "_rel_diff"],
                np.arange(1, 101),
                table['wprm_erf'],
                2,
                return_quantiles = True
                )
            log.info(sorted(values)[90:])
            del dec, values
        except:
            log.info('Weighted percentile method did not work for {}'.format(variable + "_rel_diff"))
            pass
        table.sort(columns = variable + "_rel_diff", ascending = False, inplace = True)

        print table.to_string()
        return table
    def formula_2016_01_18(individu, period, parameters):
        effectif_entreprise = individu('effectif_entreprise', period)
        apprenti = individu('apprenti', period)
        contrat_de_travail_duree = individu('contrat_de_travail_duree', period)
        TypesContratDeTravailDuree = contrat_de_travail_duree.possible_values
        contrat_de_travail_debut = individu('contrat_de_travail_debut', period)
        contrat_de_travail_fin = individu('contrat_de_travail_fin', period)
        coefficient_proratisation = individu('coefficient_proratisation', period)
        smic_proratise = individu('smic_proratise', period)
        salaire_de_base = individu('salaire_de_base', period)
        exoneration_cotisations_employeur_jei = individu('exoneration_cotisations_employeur_jei', period)
        aide_premier_salarie = individu('aide_premier_salarie', period)

        # Cette aide est temporaire.
        # Si toutefois elle est reconduite et modifiée, les dates et le montant seront à implémenter comme
        # des params xml.

        # jusqu’à 1,3 fois le Smic
        eligible_salaire = salaire_de_base <= (1.3 * smic_proratise)

        # pour les PME
        eligible_effectif = effectif_entreprise < 250

        non_cumulee = and_(
            # non cumulable avec l'aide pour la première embauche
            # qui est identique, si ce n'est qu'elle couvre tous les salaires
            aide_premier_salarie == 0,
            # non cumul avec le dispositif Jeune Entreprise Innovante (JEI)
            not_(exoneration_cotisations_employeur_jei)
            )

        eligible_contrat = and_(
            contrat_de_travail_debut >= datetime64("2016-01-18"),
            contrat_de_travail_debut <= datetime64("2017-06-30")
            )

        # Si CDD, durée du contrat doit être > 1 an
        eligible_duree = or_(
            # durée indéterminée
            contrat_de_travail_duree == TypesContratDeTravailDuree.cdi,
            # durée déterminée supérieure à 1 an
            and_(
                # CDD
                contrat_de_travail_duree == TypesContratDeTravailDuree.cdd,
                # > 6 mois
                (contrat_de_travail_fin - contrat_de_travail_debut).astype('timedelta64[M]') >= timedelta64(6, 'M')
                )
            )

        # Valable 2 ans seulement
        eligible_date = datetime64(period.offset(-24, 'month').start) < contrat_de_travail_debut

        eligible = (
            eligible_salaire
            * eligible_effectif
            * non_cumulee
            * eligible_contrat
            * eligible_duree
            * eligible_date
            * not_(apprenti)
            )

        # somme sur 24 mois, à raison de 500 € maximum par trimestre
        montant_max = 4000

        # Si le salarié est embauché à temps partiel,
        # l’aide est proratisée en fonction de sa durée de travail.
        # TODO cette multiplication par le coefficient de proratisation suffit-elle pour le cas du temps partiel ?
        # A tester

        return eligible * (montant_max / 24) * coefficient_proratisation
    def function(self, simulation, period):
        period = period.start.offset('first-of', 'month').period('month')
        concub = simulation.calculate('concub', period)
        aide_logement_base_ressources = simulation.calculate('aide_logement_base_ressources', period)
        statut_occupation_holder = simulation.compute('statut_occupation', period)
        loyer_holder = simulation.compute('loyer', period)
        coloc_holder = simulation.compute('coloc', period)
        logement_chambre_holder = simulation.compute('logement_chambre', period)
        al_pac = simulation.calculate('al_pac', period)
        enceinte_fam = simulation.calculate('enceinte_fam', period)
        zone_apl_famille = simulation.calculate('zone_apl_famille', period)
        nat_imp_holder = simulation.compute('nat_imp', period.start.period(u'year').offset('first-of'))
        al = simulation.legislation_at(period.start).al

        pfam_n_2 = simulation.legislation_at(period.start.offset(-2, 'year')).fam

        # le barème "couple" est utilisé pour les femmes enceintes isolées
        couple = or_(concub, enceinte_fam)
        personne_seule = not_(couple)

        statut_occupation = self.cast_from_entity_to_roles(statut_occupation_holder)
        statut_occupation = self.filter_role(statut_occupation, role = CHEF)
        loyer = self.cast_from_entity_to_roles(loyer_holder)
        loyer = self.filter_role(loyer, role = CHEF)

        zone_apl = zone_apl_famille
        # Variables individuelles
        coloc = self.any_by_roles(coloc_holder)
        chambre = self.any_by_roles(logement_chambre_holder)

        # Variables du foyer fiscal
        nat_imp = self.cast_from_entity_to_roles(nat_imp_holder)
        nat_imp = self.any_by_roles(nat_imp)

        # ne prend pas en compte les chambres ni les logements-foyers.
        # variables nécéssaires dans FA
        # al_pac : nb de personne à charge du ménage prise en compte pour les AL
        # zone_apl
        # loyer
        # coloc (1 si colocation, 0 sinon)
        # statut_occupation : statut d'occupation du logement
        # Voir statut_occupation dans model/caracteristiques_socio_demographiques/logement.py

        loca = ((3 <= statut_occupation) & (5 >= statut_occupation)) | (statut_occupation == 7)
        acce = statut_occupation == 1

        # # aides au logement pour les locataires
        # loyer mensuel, multiplié par 2/3 pour les meublés
        L1 = round((statut_occupation == 5) * loyer * 2 / 3 + (statut_occupation != 5) * loyer, 2)

        # taux à appliquer sur le loyer plafond
        taux_loyer_plafond = (and_(not_(coloc), not_(chambre)) * 1
                             + chambre * al.loyers_plafond.chambre
                             + not_(chambre) * coloc * al.loyers_plafond.colocation)

        loyer_plafond_personne_seule = or_(personne_seule * (al_pac == 0), chambre)
        loyer_plafond_famille = not_(loyer_plafond_personne_seule) * (al_pac > 0)
        loyer_plafond_couple = and_(not_(loyer_plafond_famille), not_(loyer_plafond_personne_seule))

        z1 = al.loyers_plafond.zone1
        z2 = al.loyers_plafond.zone2
        z3 = al.loyers_plafond.zone3

        Lz1 = (
            loyer_plafond_personne_seule * z1.L1 +
            loyer_plafond_couple * z1.L2 +
            loyer_plafond_famille * (z1.L3 + (al_pac > 1) * (al_pac - 1) * z1.L4)
            )
        Lz2 = (
            loyer_plafond_personne_seule * z2.L1 +
            loyer_plafond_couple * z2.L2 +
            loyer_plafond_famille * (z2.L3 + (al_pac > 1) * (al_pac - 1) * z2.L4)
            )
        Lz3 = (
            loyer_plafond_personne_seule * z3.L1 +
            loyer_plafond_couple * z3.L2 +
            loyer_plafond_famille * (z3.L3 + (al_pac > 1) * (al_pac - 1) * z3.L4)
            )

        L2 = Lz1 * (zone_apl == 1) + Lz2 * (zone_apl == 2) + Lz3 * (zone_apl == 3)
        L2 = round(L2 * taux_loyer_plafond, 2)

        # loyer retenu
        L = min_(L1, L2)

        # forfait de charges
        P_fc = al.forfait_charges
        C = (
            not_(coloc) * (P_fc.fc1 + al_pac * P_fc.fc2) +
            coloc * ((personne_seule * 0.5 + couple) * P_fc.fc1 + al_pac * P_fc.fc2)
            )

        # dépense éligible
        E = L + C

        # ressources prises en compte
        R = aide_logement_base_ressources

        # Plafond RO
        rmi = al.rmi
        R1 = (
            al.R1.taux1 * rmi * personne_seule * (al_pac == 0) +
            al.R1.taux2 * rmi * couple * (al_pac == 0) +
            al.R1.taux3 * rmi * (al_pac == 1) +
            al.R1.taux4 * rmi * (al_pac >= 2) +
            al.R1.taux5 * rmi * (al_pac > 2) * (al_pac - 2)
            )

        bmaf = pfam_n_2.af.bmaf
        R2 = (
            al.R2.taux4 * bmaf * (al_pac >= 2) +
            al.R2.taux5 * bmaf * (al_pac > 2) * (al_pac - 2)
            )

        Ro = round(12 * (R1 - R2) * (1 - al.autres.abat_sal))

        Rp = max_(0, R - Ro)

        # Participation personnelle
        Po = max_(al.pp.taux * E, al.pp.min)

        # Taux de famille
        TF = (
            al.TF.taux1 * (personne_seule) * (al_pac == 0) +
            al.TF.taux2 * (couple) * (al_pac == 0) +
            al.TF.taux3 * (al_pac == 1) +
            al.TF.taux4 * (al_pac == 2) +
            al.TF.taux5 * (al_pac == 3) +
            al.TF.taux6 * (al_pac >= 4) +
            al.TF.taux7 * (al_pac > 4) * (al_pac - 4)
            )

        # Loyer de référence
        L_Ref = (
            z2.L1 * (personne_seule) * (al_pac == 0) +
            z2.L2 * (couple) * (al_pac == 0) +
            z2.L3 * (al_pac >= 1) +
            z2.L4 * (al_pac > 1) * (al_pac - 1)
            )

        RL = L / L_Ref

        # TODO: paramètres en dur ??
        TL = max_(max_(0, al.TL.taux2 * (RL - 0.45)), al.TL.taux3 * (RL - 0.75) + al.TL.taux2 * (0.75 - 0.45))

        Tp = TF + TL

        PP = Po + Tp * Rp
        al_loc = max_(0, E - PP) * loca
        al_loc = al_loc * (al_loc >= al.autres.nv_seuil)

        # # TODO: APL pour les accédants à la propriété
        al_acc = 0 * acce
        # # APL (tous)

        al = al_loc + al_acc
        return period, al
def mark_weighted_percentiles(a, labels, weights, method, return_quantiles=False):
    # from http://pastebin.com/KTLip9ee
    # a is an input array of values.
    # weights is an input array of weights, so weights[i] goes with a[i]
    # labels are the names you want to give to the xtiles
    # method refers to which weighted algorithm.
    #      1 for wikipedia, 2 for the stackexchange post.

    # The code outputs an array the same shape as 'a', but with
    # labels[i] inserted into spot j if a[j] falls in x-tile i.
    # The number of xtiles requested is inferred from the length of 'labels'.

    # First method, "vanilla" weights from Wikipedia article.
    if method == 1:

        # Sort the values and apply the same sort to the weights.
        N = len(a)
        sort_indx = argsort(a)
        tmp_a = a[sort_indx].copy()
        tmp_weights = weights[sort_indx].copy()

        # 'labels' stores the name of the x-tiles the user wants,
        # and it is assumed to be linearly spaced between 0 and 1
        # so 5 labels implies quintiles, for example.
        num_categories = len(labels)
        breaks = linspace(0, 1, num_categories + 1)

        # Compute the percentile values at each explicit data point in a.
        cu_weights = cumsum(tmp_weights)
        p_vals = (1.0 / cu_weights[-1]) * (cu_weights - 0.5 * tmp_weights)

        # Set up the output array.
        ret = repeat(0, len(a))
        if(len(a) < num_categories):
            return ret

        # Set up the array for the values at the breakpoints.
        quantiles = []

        # Find the two indices that bracket the breakpoint percentiles.
        # then do interpolation on the two a_vals for those indices, using
        # interp-weights that involve the cumulative sum of weights.
        for brk in breaks:
            if brk <= p_vals[0]:
                i_low = 0
                i_high = 0
            elif brk >= p_vals[-1]:
                i_low = N - 1
                i_high = N - 1
            else:
                for ii in range(N - 1):
                    if (p_vals[ii] <= brk) and (brk < p_vals[ii + 1]):
                        i_low = ii
                        i_high = ii + 1

            if i_low == i_high:
                v = tmp_a[i_low]
            else:
                # If there are two brackets, then apply the formula as per Wikipedia.
                v = (tmp_a[i_low] +
                    ((brk - p_vals[i_low]) / (p_vals[i_high] - p_vals[i_low])) * (tmp_a[i_high] - tmp_a[i_low]))

            # Append the result.
            quantiles.append(v)

        # Now that the weighted breakpoints are set, just categorize
        # the elements of a with logical indexing.
        for i in range(0, len(quantiles) - 1):
            lower = quantiles[i]
            upper = quantiles[i + 1]
            ret[and_(a >= lower, a < upper)] = labels[i]

        # make sure upper and lower indices are marked
        ret[a <= quantiles[0]] = labels[0]
        ret[a >= quantiles[-1]] = labels[-1]

        return ret

    # The stats.stackexchange suggestion.
    elif method == 2:

        N = len(a)
        sort_indx = argsort(a)
        tmp_a = a[sort_indx].copy()
        tmp_weights = weights[sort_indx].copy()

        num_categories = len(labels)
        breaks = linspace(0, 1, num_categories + 1)

        cu_weights = cumsum(tmp_weights)

        # Formula from stats.stackexchange.com post.
        s_vals = [0.0]
        for ii in range(1, N):
            s_vals.append(ii * tmp_weights[ii] + (N - 1) * cu_weights[ii - 1])
        s_vals = asarray(s_vals)

        # Normalized s_vals for comapring with the breakpoint.
        norm_s_vals = (1.0 / s_vals[-1]) * s_vals

        # Set up the output variable.
        ret = repeat(0, N)
        if(N < num_categories):
            return ret

        # Set up space for the values at the breakpoints.
        quantiles = []

        # Find the two indices that bracket the breakpoint percentiles.
        # then do interpolation on the two a_vals for those indices, using
        # interp-weights that involve the cumulative sum of weights.
        for brk in breaks:
            if brk <= norm_s_vals[0]:
                i_low = 0
                i_high = 0
            elif brk >= norm_s_vals[-1]:
                i_low = N - 1
                i_high = N - 1
            else:
                for ii in range(N - 1):
                    if (norm_s_vals[ii] <= brk) and (brk < norm_s_vals[ii + 1]):
                        i_low = ii
                        i_high = ii + 1

            if i_low == i_high:
                v = tmp_a[i_low]
            else:
                # Interpolate as in the method 1 method, but using the s_vals instead.
                v = (tmp_a[i_low] +
                    (((brk * s_vals[-1]) - s_vals[i_low]) /
                        (s_vals[i_high] - s_vals[i_low])) * (tmp_a[i_high] - tmp_a[i_low]))
            quantiles.append(v)

        # Now that the weighted breakpoints are set, just categorize
        # the elements of a as usual.
        for i in range(0, len(quantiles) - 1):
            lower = quantiles[i]
            upper = quantiles[i + 1]
            ret[and_(a >= lower, a < upper)] = labels[i]

        # make sure upper and lower indices are marked
        ret[a <= quantiles[0]] = labels[0]
        ret[a >= quantiles[-1]] = labels[-1]

        if return_quantiles:
            return ret, quantiles
        else:
            return ret
Example #23
0
def _ass_elig_i(chomeur, ass_precondition_remplie):
  '''
  Éligibilité individuelle à l'ASS
  '''
  return and_(chomeur, ass_precondition_remplie)
Example #24
0
def _nbJ(self, age, inv, quifoy):
    majeur_celibataire_sans_enfant = and_(quifoy >= 2, age >= 18, not_(inv))
    return self.sum_by_entity(majeur_celibataire_sans_enfant.astype(int16))
Example #25
0
def _nbF(self, age, alt, inv, quifoy):
    enfant_a_charge = and_(quifoy >= 2, or_(age < 18, inv), not_(alt))
    return self.sum_by_entity(enfant_a_charge.astype(int16))
Example #26
0
def _nbG(self, alt, inv, quifoy):
    enfant_a_charge_invalide = and_(quifoy >= 2, inv, not_(alt))
    return self.sum_by_entity(enfant_a_charge_invalide.astype(int16))
Example #27
0
def _nbH(self, age, alt, inv, quifoy):
    enfant_a_charge_garde_alternee = and_(quifoy >= 2, or_(age < 18, inv), alt)
    return self.sum_by_entity(enfant_a_charge_garde_alternee.astype(int16))
Example #28
0
    def describe_discrepancies(self, fov = 10, descending = True):
        """
        Describe discrpancies

        Parameters
        ----------
        fov :
        descending :
        """
        erf_menage = self.erf_menage
        erf_eec_indivi = self.erf_eec_indivi
        simu_aggr_tables = self.simu_aggr_tables
        simu_nonaggr_tables = self.simu_nonaggr_tables

        # Detect the biggest differences
        bigtable = merge(erf_menage, simu_aggr_tables, on = 'idmen', how = 'inner', suffixes=('_erf','_of'))
        print 'Length of new dataframe is %s' %str(len(bigtable))
        #print bigtable.columns
        bigtable.set_index('idmen', drop = False, inplace = True)

        already_met = []
        options_met = []

        for col in [self.variable]:
            bigtemp = None
            table = bigtable[and_(bigtable[col+'_erf']!=0,bigtable[col+'_of']!=0)]
            table[col] = (table[col+'_of'] - table[col+'_erf']) / table[col+'_erf'] #Difference relative
            # table[col + "_sign"] = table[col].apply(lambda x: x/abs(x))
            # table[col] = table[col].apply(lambda x: abs(x))


            print 'Minimum difference between the two tables for %s is %s' %(col, str(table[col].min()))
            print 'Maximum difference between the two tables for %s is %s' %(col, str(table[col].max()))
            print table[col].describe()
            # print table[col + "_sign"].describe()

            # TODO: do regular percentiles
            try:
                assert len(table[col]) == len(table['wprm_of']), "PINAGS"
                dec, values = mwp(table[col], np.arange(1,11), table['wprm_of'], 2, return_quantiles=True)
                print sorted(values)
                dec, values = mwp(table[col], np.arange(1,101), table['wprm_erf'], 2, return_quantiles=True)
                print sorted(values)[90:]
                del dec, values
                gc.collect()
            except:
                print 'Weighted percentile method didnt work for %s' %col
                pass
            print "\n"

        # Show the relevant information for the most deviant households
            table.sort(columns = col, ascending = not descending, inplace = True)
            #print table[col][0:10].to_string()
            if bigtemp is None:
                bigtemp = {'table' : table[[col, col+'_of', col+'_erf', 'idmen']][0:fov],
                           'options' : None}
            bigtemp['table'][col+'div'] = bigtemp['table'][col+'_of'] / bigtemp['table'][col+'_erf']
            print bigtemp['table'].to_string()

            '''
            bigtemp is the table which will get filled little by little by the relevant variables.
            Up to the last rows of code 'table' refers to a table of aggregated values,
            while 'options is a table of individual variables.
            The reason we call it in a dictionnary is also because we modify it inside the recursive function 'iter_on parents',
            and it causes an error in Python unless for certain types like dictionnary values.
            '''
            #print "\n"

            # If variable is a Prestation, we show the dependancies
            varcol = self.simulation.output_table.column_by_name.get(col)
            if isinstance(varcol, Prestation):

                '''
                For the direct children
                '''
                if not varcol._children is None:
                    ch_to_fetch = list(varcol._children)
                    ch_to_fetch = map(lambda x: x.name, ch_to_fetch)
                    ch_fetched = []

                    if set(ch_to_fetch) <= set(simu_aggr_tables.columns):
                        print "Variables which need %s to be computed :\n %s \n" %(col, str(ch_to_fetch))
                        for var in ch_to_fetch:
                            if var + '_of' in table.columns:
                                ch_fetched.append(var + '_of')
                            else:
                                ch_fetched.append(var)
                    elif set(ch_to_fetch) <= set(simu_aggr_tables.columns).union(erf_menage.columns):
                        print "Variables which need %s to be computed (some missing picked in erf):\n %s \n" %(col, str(ch_to_fetch))
                        for var in ch_to_fetch:
                            if var in simu_aggr_tables.columns:
                                if var + '_of' in table.columns:
                                    ch_fetched.append(var + '_of')
                            elif var + '_erf' in table.columns:
                                    ch_fetched.append(var + '_erf')
                            else:
                                ch_fetched.append(var)
                    else:
                        print "Variables which need %s to be computed (some missing):\n %s \n" %(col, str(ch_to_fetch))
                        for var in ch_to_fetch:

                            if var in simu_aggr_tables.columns:
                                if var + '_of' in table.columns:
                                    ch_fetched.append(var + '_of')
                            elif var in erf_menage.columns:
                                if var + '_erf' in table.columns:
                                    ch_fetched.append(var + '_erf')

                    print table[[col] + ch_fetched][0:fov]
                    print "\n"
                    del ch_to_fetch, ch_fetched

                '''
                For the parents
                '''
                def iter_on_parents(varcol):
                    if (varcol._parents == set() and varcol._option == {}) or varcol.name in already_met:
                        return
                    else:
                        par_to_fetch = list(varcol._parents)
                        par_to_fetch = map(lambda x: x.name, par_to_fetch)
                        par_fetched = []

                        if set(par_to_fetch) <= set(simu_aggr_tables.columns):
                            #print "Variables the prestation %s depends of :\n %s \n" %(varcol.name, str(par_fetched))
                            for var in par_to_fetch:
                                if var + '_of' in table.columns:
                                    par_fetched.append(var + '_of')
                                else:
                                    par_fetched.append(var)
                        elif set(par_to_fetch) <= set(simu_aggr_tables.columns).union(erf_menage.columns):
                            #print "Variables the prestation %s depends of (some missing picked in erf):\n %s \n" %(varcol.name,str(par_fetched))
                            for var in par_to_fetch:
                                if var in simu_aggr_tables.columns:
                                    if var + '_of' in table.columns:
                                        par_fetched.append(var + '_of')
                                elif var + '_erf' in table.columns:
                                        par_fetched.append(var + '_erf')
                                else:
                                    par_fetched.append(var)
                        else:
                            for var in par_to_fetch:
                                if var in simu_aggr_tables.columns:
                                    if var + '_of' in table.columns:
                                        par_fetched.append(var + '_of')
                                elif var in erf_menage.columns:
                                    if var + '_erf' in table.columns:
                                        par_fetched.append(var + '_erf')
                            if len(par_fetched) > 0:
                                #print "Variables the prestation %s depends of (some missing):\n %s \n" %(varcol.name, str(par_fetched))
                                pass
                            else:
                                #print "Variables the prestation %s depends of couldn't be found :\n %s \n" %(varcol.name, str(par_fetched))
                                pass

                        if len(par_fetched) > 0:
                            temp = table[['idmen'] + par_fetched][0:fov]
                            bigtemp['table'] = bigtemp['table'].merge(temp, how = 'inner', on = 'idmen')
                            #print temp.to_string(), "\n"
                        if varcol._option != {} and not set(varcol._option.keys()) < set(options_met):
                            vars_to_fetch = list(set(varcol._option.keys())-set(options_met))
                            #print "and the options to current variable %s for the id's with strongest difference :\n %s \n" %(varcol.name, varcol._option.keys())
                            liste = [i for i in range(0,fov)]
                            liste = map(lambda x: table['idmen'].iloc[x], liste)
                            temp = simu_nonaggr_tables[['idmen', 'quimen','noindiv']
                                                      + vars_to_fetch][simu_nonaggr_tables['idmen'].isin(table['idmen'][0:fov])]

                            temp_sorted = temp[temp['idmen'] == liste[0]]
                            for i in xrange(1,fov):
                                temp_sorted = temp_sorted.append(temp[temp['idmen'] == liste[i]])
                            if bigtemp['options'] is None:
                                bigtemp['options'] = temp_sorted
                                bigtemp['options'] = bigtemp['options'].merge(erf_eec_indivi, on = 'noindiv', how = 'outer')
                            else:
                                bigtemp['options'] = bigtemp['options'].merge(temp_sorted, on = ['noindiv','idmen','quimen'], how = 'outer')
    #                         temp_sorted.set_index(['idmen',  'quimen'], drop = True, inplace = True) # If we do that
                            del temp, temp_sorted
                            gc.collect()

                        already_met.append(varcol.name)
                        options_met.extend(varcol._option.keys())
                        for var in varcol._parents:
                            iter_on_parents(var)

                iter_on_parents(varcol)
                # We merge the aggregate table with the option table ( for each individual in entity )
                bigtemp['table'] = bigtemp['table'].merge(bigtemp['options'],
                                                           how = 'left',
                                                            on = 'idmen',
                                                             suffixes = ('(agg)', '(ind)'))

                # Reshaping the table to group by descending error on col, common entities
                bigtemp['table'].sort(columns = ['af','quimen'], ascending = [False,True], inplace = True)
                bigtemp['table'] = bigtemp['table'].groupby(['idmen','quimen'], sort = False).sum()
                print "Table of values for %s dependencies : \n" %col
                print bigtemp['table'].to_string()
                del bigtemp['table'], bigtemp['options']
                gc.collect()