def formula_2015_06_09(self, simulation, period): effectif_entreprise = simulation.calculate('effectif_entreprise', period) apprenti = simulation.calculate('apprenti', period) contrat_de_travail_duree = simulation.calculate( 'contrat_de_travail_duree', period) contrat_de_travail_debut = simulation.calculate( 'contrat_de_travail_debut', period) contrat_de_travail_fin = simulation.calculate('contrat_de_travail_fin', period) coefficient_proratisation = simulation.calculate( 'coefficient_proratisation', period) exoneration_cotisations_employeur_jei = simulation.calculate( 'exoneration_cotisations_employeur_jei', period) # Cette aide est temporaire. # TODO : Si toutefois elle est reconduite et modifiée pour 2017, les dates et le montant seront à # implémenter comme des params xml. eligible_contrat = and_( contrat_de_travail_debut >= datetime64("2015-06-09"), contrat_de_travail_debut <= datetime64("2016-12-31")) # Si CDD, durée du contrat doit être > 1 an eligible_duree = or_( # durée indéterminée contrat_de_travail_duree == 0, # durée déterminée supérieure à 1 an and_( contrat_de_travail_duree == 1, # CDD # > 6 mois (contrat_de_travail_fin - contrat_de_travail_debut ).astype('timedelta64[M]') >= timedelta64(6, 'M') # Initialement, la condition était d'un contrat >= 12 mois, # pour les demandes transmises jusqu'au 26 janvier. )) eligible_date = datetime64(period.offset( -24, 'month').start) < contrat_de_travail_debut eligible = \ (effectif_entreprise == 1) * not_(apprenti) * eligible_contrat * eligible_duree * eligible_date # somme sur 24 mois, à raison de 500 € maximum par trimestre montant_max = 4000 # non cumul avec le dispositif Jeune Entreprise Innovante (JEI) non_cumulee = not_(exoneration_cotisations_employeur_jei) # TODO comment implémenter la condition "premier employé" ? L'effectif est insuffisant en cas de rupture # d'un premier contrat # Condition : l’entreprise n’a pas conclu de contrat de travail avec un salarié, # au-delà de la période d’essai, dans les 12 mois précédant la nouvelle # embauche. # Si le salarié est embauché à temps partiel, # l’aide est proratisée en fonction de sa durée de travail. # TODO cette multiplication par le coefficient de proratisation suffit-elle pour le cas du temps partiel ? # A tester return eligible * (montant_max / 24) * coefficient_proratisation * non_cumulee
def function(self, simulation, period): period = period.this_month effectif_entreprise = simulation.calculate('effectif_entreprise', period) apprenti = simulation.calculate('apprenti', period) contrat_de_travail_duree = simulation.calculate('contrat_de_travail_duree', period) contrat_de_travail_debut = simulation.calculate('contrat_de_travail_debut', period) contrat_de_travail_fin = simulation.calculate('contrat_de_travail_fin', period) coefficient_proratisation = simulation.calculate('coefficient_proratisation', period) smic_proratise = simulation.calculate('smic_proratise', period) salaire_de_base = simulation.calculate('salaire_de_base', period) # Cette aide est temporaire. # Si toutefois elle est reconduite et modifiée pour 2017, les dates et le montant seront à implémenter comme # des params xml. # jusqu’à 1,3 fois le Smic eligible_salaire = salaire_de_base <= (1.3 * smic_proratise) # pour les PME eligible_effectif = effectif_entreprise < 250 # non cumulable avec l'aide pour la première embauche # qui est identique, si ce n'est qu'elle couvre tous les salaires non_cumulee = effectif_entreprise > 1 eligible_contrat = and_( contrat_de_travail_debut >= datetime64("2016-01-18"), contrat_de_travail_debut <= datetime64("2016-12-31") ) # Si CDD, durée du contrat doit être > 1 an eligible_duree = or_( # durée indéterminée contrat_de_travail_duree == 0, # durée déterminée supérieure à 1 an and_( # CDD contrat_de_travail_duree == 1, # > 6 mois (contrat_de_travail_fin - contrat_de_travail_debut).astype('timedelta64[M]') >= timedelta64(6, 'M') ) ) # Valable 2 ans seulement eligible_date = datetime64(period.offset(-24, 'month').start) < contrat_de_travail_debut eligible = ( eligible_salaire * eligible_effectif * non_cumulee * eligible_contrat * eligible_duree * eligible_date * not_(apprenti) ) # somme sur 24 mois, à raison de 500 € maximum par trimestre montant_max = 4000 # Si le salarié est embauché à temps partiel, # l’aide est proratisée en fonction de sa durée de travail. # TODO cette multiplication par le coefficient de proratisation suffit-elle pour le cas du temps partiel ? # A tester return period, eligible * (montant_max / 24) * coefficient_proratisation
def formula(individu, period): demandeur_emploi_non_indemnise = and_( individu('activite', period) == TypesActivite.chomeur, individu('chomage_net', period) == 0) # Indique que l'individu a travaillé 5 ans au cours des 10 dernieres années. ass_precondition_remplie = individu('ass_precondition_remplie', period) return and_(demandeur_emploi_non_indemnise, ass_precondition_remplie)
def formula_2015_06_09(individu, period, parameters): effectif_entreprise = individu('effectif_entreprise', period) apprenti = individu('apprenti', period) contrat_de_travail_duree = individu('contrat_de_travail_duree', period) TypesContratDeTravailDuree = contrat_de_travail_duree.possible_values contrat_de_travail_debut = individu('contrat_de_travail_debut', period) contrat_de_travail_fin = individu('contrat_de_travail_fin', period) coefficient_proratisation = individu('coefficient_proratisation', period) exoneration_cotisations_employeur_jei = individu('exoneration_cotisations_employeur_jei', period) # Cette aide est temporaire. # TODO : Si toutefois elle est reconduite et modifiée pour 2017, les dates et le montant seront à # implémenter comme des params xml. eligible_contrat = and_( contrat_de_travail_debut >= datetime64("2015-06-09"), contrat_de_travail_debut <= datetime64("2016-12-31") ) # Si CDD, durée du contrat doit être > 1 an eligible_duree = or_( # durée indéterminée contrat_de_travail_duree == TypesContratDeTravailDuree.cdi, # durée déterminée supérieure à 1 an and_( contrat_de_travail_duree == TypesContratDeTravailDuree.cdd, # > 6 mois (contrat_de_travail_fin - contrat_de_travail_debut).astype('timedelta64[M]') >= timedelta64(6, 'M') # Initialement, la condition était d'un contrat >= 12 mois, # pour les demandes transmises jusqu'au 26 janvier. ) ) eligible_date = datetime64(period.offset(-24, 'month').start) < contrat_de_travail_debut eligible = \ (effectif_entreprise == 1) * not_(apprenti) * eligible_contrat * eligible_duree * eligible_date # somme sur 24 mois, à raison de 500 € maximum par trimestre montant_max = 4000 # non cumul avec le dispositif Jeune Entreprise Innovante (JEI) non_cumulee = not_(exoneration_cotisations_employeur_jei) # TODO comment implémenter la condition "premier employé" ? L'effectif est insuffisant en cas de rupture # d'un premier contrat # Condition : l’entreprise n’a pas conclu de contrat de travail avec un salarié, # au-delà de la période d’essai, dans les 12 mois précédant la nouvelle # embauche. # Si le salarié est embauché à temps partiel, # l’aide est proratisée en fonction de sa durée de travail. # TODO cette multiplication par le coefficient de proratisation suffit-elle pour le cas du temps partiel ? # A tester return eligible * (montant_max / 24) * coefficient_proratisation * non_cumulee
def function(self, simulation, period): period = period.this_month # 1 si demandeur d'emploi activite = simulation.calculate('activite', period) # Indique que l'user a travaillé 5 ans au cours des 10 dernieres années. ass_precondition_remplie = simulation.calculate('ass_precondition_remplie', period) are_perceived_this_month = simulation.calculate('chomage_net', period) return period, and_(and_(activite == 1, ass_precondition_remplie), are_perceived_this_month == 0)
def formula_2017_01_01(individu, period): aah_eligible = individu('aah', period.offset(-1)) > 0 demandeur_emploi_non_indemnise = and_( individu('activite', period) == TypesActivite.chomeur, individu('chomage_net', period) == 0) # Indique que l'individu a travaillé 5 ans au cours des 10 dernieres années. ass_precondition_remplie = individu('ass_precondition_remplie', period) return and_( not_(aah_eligible), and_(demandeur_emploi_non_indemnise, ass_precondition_remplie))
def function(self, simulation, period): period = period.this_month effectif_entreprise = simulation.calculate('effectif_entreprise', period) apprenti = simulation.calculate('apprenti', period) contrat_de_travail_duree = simulation.calculate('contrat_de_travail_duree', period) contrat_de_travail_debut = simulation.calculate('contrat_de_travail_debut', period) contrat_de_travail_fin = simulation.calculate('contrat_de_travail_fin', period) coefficient_proratisation = simulation.calculate('coefficient_proratisation', period) # Cette aide est temporaire. # TODO : Si toutefois elle est reconduite et modifiée pour 2017, les dates et le montant seront à # implémenter comme des params xml. eligible_contrat = and_( contrat_de_travail_debut >= datetime64("2015-06-09"), contrat_de_travail_debut <= datetime64("2016-12-31") ) # Si CDD, durée du contrat doit être > 1 an eligible_duree = or_( # durée indéterminée contrat_de_travail_duree == 0, # durée déterminée supérieure à 1 an and_( contrat_de_travail_duree == 1, # CDD # > 6 mois (contrat_de_travail_fin - contrat_de_travail_debut).astype('timedelta64[M]') >= timedelta64(6, 'M') # Initialement, la condition était d'un contrat >= 12 mois, # pour les demandes transmises jusqu'au 26 janvier. ) ) eligible_date = datetime64(period.offset(-24, 'month').start) < contrat_de_travail_debut eligible = \ (effectif_entreprise == 1) * not_(apprenti) * eligible_contrat * eligible_duree * eligible_date # somme sur 24 mois, à raison de 500 € maximum par trimestre montant_max = 4000 # TODO comment implémenter la condition "premier employé" ? L'effectif est insuffisant en cas de rupture # d'un premier contrat # Condition : l’entreprise n’a pas conclu de contrat de travail avec un salarié, # au-delà de la période d’essai, dans les 12 mois précédant la nouvelle # embauche. # Si le salarié est embauché à temps partiel, # l’aide est proratisée en fonction de sa durée de travail. # TODO cette multiplication par le coefficient de proratisation suffit-elle pour le cas du temps partiel ? # A tester return period, eligible * (montant_max / 24) * coefficient_proratisation
def formula_2009_04(famille, period, parameters): ''' Prime de solidarité active (exceptionnelle, 200€ versés une fois en avril 2009) Versement en avril 2009 d’une prime de solidarité active (Psa) aux familles modestes qui ont bénéficié en janvier, février ou mars 2009 du Rmi, de l’Api (du Rsa expérimental, du Cav ou du Rma pour les ex-bénéficiaires du Rmi ou de l’Api), de la prime forfaitaire mensuelle au titre du Rmi ou de l’Api ou enfin d’une aide au logement (à condition d’exercer une activité professionnelle et d’être âgé de plus de 25 ans ou d’avoir au moins un enfant à charge). La Psa, prime exceptionnelle, s’élève à 200 euros par foyer bénéficiaire. ''' P = parameters(period).prestations.minima_sociaux.rmi api = famille('api', period) rsa = famille('rsa', period) af_nbenf = famille('af_nbenf', period) aide_logement = famille('aide_logement', period) personne_en_activite_i = (famille.members('activite', period) == TypesActivite.actif) parent_en_activite = famille.any(personne_en_activite_i, role = Famille.PARENT) dummy_api = api > 0 dummy_rmi = rsa > 0 dummy_al = and_(aide_logement > 0, or_(af_nbenf > 0, parent_en_activite)) condition = (dummy_api + dummy_rmi + dummy_al > 0) psa = condition * P.psa return psa
def formula_2009_04(famille, period, parameters): ''' Prime de solidarité active (exceptionnelle, 200€ versés une fois en avril 2009) Versement en avril 2009 d’une prime de solidarité active (Psa) aux familles modestes qui ont bénéficié en janvier, février ou mars 2009 du Rmi, de l’Api (du Rsa expérimental, du Cav ou du Rma pour les ex-bénéficiaires du Rmi ou de l’Api), de la prime forfaitaire mensuelle au titre du Rmi ou de l’Api ou enfin d’une aide au logement (à condition d’exercer une activité professionnelle et d’être âgé de plus de 25 ans ou d’avoir au moins un enfant à charge). La Psa, prime exceptionnelle, s’élève à 200 euros par foyer bénéficiaire. ''' P = parameters(period).prestations.minima_sociaux.rmi api = famille('api', period) rsa = famille('rsa', period) af_nbenf = famille('af_nbenf', period) aide_logement = famille('aide_logement', period) personne_en_activite_i = (famille.members( 'activite', period) == TypesActivite.actif) parent_en_activite = famille.any(personne_en_activite_i, role=Famille.PARENT) dummy_api = api > 0 dummy_rmi = rsa > 0 dummy_al = and_(aide_logement > 0, or_(af_nbenf > 0, parent_en_activite)) condition = (dummy_api + dummy_rmi + dummy_al > 0) psa = condition * P.psa return psa
def formula_2017_09_01(individu, period, parameters): ''' Reference : https://www.legifrance.gouv.fr/eli/decret/2017/5/5/ETSD1708117D/jo/article_2 ''' age_max = parameters(period).prestations.minima_sociaux.ass.age_max sous_age_limite = individu('age_en_mois', period) <= age_max aah_eligible = individu('aah', period) > 0 demandeur_emploi_non_indemnise = and_( individu('activite', period) == TypesActivite.chomeur, individu('chomage_net', period) == 0) eligible_cumul_ass = individu('ass_eligibilite_cumul_individu', period) demandeur_emploi_non_indemnise_et_cumul_accepte = or_( demandeur_emploi_non_indemnise, not_(demandeur_emploi_non_indemnise) * eligible_cumul_ass) # Indique que l'individu a travaillé 5 ans au cours des 10 dernieres années. ass_precondition_remplie = individu('ass_precondition_remplie', period) return not_( aah_eligible ) * demandeur_emploi_non_indemnise_et_cumul_accepte * ass_precondition_remplie * sous_age_limite
def function_2009(self, simulation, period): """ Prime de solidarité active (exceptionnelle, 200€ versés une fois en avril 2009) Versement en avril 2009 d’une prime de solidarité active (Psa) aux familles modestes qui ont bénéficié en janvier, février ou mars 2009 du Rmi, de l’Api (du Rsa expérimental, du Cav ou du Rma pour les ex-bénéficiaires du Rmi ou de l’Api), de la prime forfaitaire mensuelle au titre du Rmi ou de l’Api ou enfin d’une aide au logement (à condition d’exercer une activité professionnelle et d’être âgé de plus de 25 ans ou d’avoir au moins un enfant à charge). La Psa, prime exceptionnelle, s’élève à 200 euros par foyer bénéficiaire. """ period = period.start.offset("first-of", "month").period("month") api = simulation.calculate("api", period) rsa = simulation.calculate("rsa", period) activite_holder = simulation.compute("activite", period) af_nbenf = simulation.calculate("af_nbenf", period) aide_logement = simulation.calculate("aide_logement", period) P = simulation.legislation_at(period.start).minim.rmi activite = self.split_by_roles(activite_holder, roles=[CHEF, PART]) dummy_api = api > 0 dummy_rmi = rsa > 0 dummy_al = and_(aide_logement > 0, or_(af_nbenf > 0, or_(activite[CHEF] == 0, activite[PART] == 0))) condition = dummy_api + dummy_rmi + dummy_al > 0 psa = condition * P.psa return period, psa
def function_2009(self, simulation, period): ''' Prime de solidarité active (exceptionnelle, 200€ versés une fois en avril 2009) Versement en avril 2009 d’une prime de solidarité active (Psa) aux familles modestes qui ont bénéficié en janvier, février ou mars 2009 du Rmi, de l’Api (du Rsa expérimental, du Cav ou du Rma pour les ex-bénéficiaires du Rmi ou de l’Api), de la prime forfaitaire mensuelle au titre du Rmi ou de l’Api ou enfin d’une aide au logement (à condition d’exercer une activité professionnelle et d’être âgé de plus de 25 ans ou d’avoir au moins un enfant à charge). La Psa, prime exceptionnelle, s’élève à 200 euros par foyer bénéficiaire. ''' period = period.start.offset('first-of', 'year').offset(3, 'month').period('month') api = simulation.calculate('api', period) rsa = simulation.calculate('rsa', period) activite_holder = simulation.compute('activite', period) af_nbenf = simulation.calculate('af_nbenf', period) aide_logement = simulation.calculate('aide_logement', period) P = simulation.legislation_at(period.start).minim.rmi activite = self.split_by_roles(activite_holder, roles = [CHEF, PART]) dummy_api = api > 0 dummy_rmi = rsa > 0 dummy_al = and_(aide_logement > 0, or_(af_nbenf > 0, or_(activite[CHEF] == 0, activite[PART] == 0))) condition = (dummy_api + dummy_rmi + dummy_al > 0) psa = condition * P.psa return period, psa
def _enceinte_fam(self, agem_holder, enceinte_holder): agem_enf = self.split_by_roles(agem_holder, roles = ENFS) enceinte = self.split_by_roles(enceinte_holder, roles = [CHEF, PART]) benjamin = age_en_mois_benjamin(agem_enf) enceinte_compat = and_(benjamin < 0, benjamin > -6) return or_(or_(enceinte_compat, enceinte[CHEF]), enceinte[PART])
def function(self, simulation, period): period = period.start.offset('first-of', 'month').period('month') activite = simulation.calculate('activite', period) ass_precondition_remplie = simulation.calculate('ass_precondition_remplie', period) return period, and_(activite == 1, ass_precondition_remplie)
def _psa(api, rsa, activite, af_nbenf, al, _P, _option={"activite": [CHEF, PART]}): ''' Prime de solidarité active (exceptionnelle, 200€ versés une fois en avril 2009) ''' # Versement en avril 2009 d’une prime de solidarité active (Psa) aux familles modestes qui ont bénéficié en janvier, # février ou mars 2009 du Rmi, de l’Api (du Rsa expérimental, du Cav ou du Rma pour les ex-bénéficiaires du Rmi ou de l’Api), # de la prime forfaitaire mensuelle au titre du Rmi ou de l’Api # ou enfin d’une aide au logement (à condition d’exercer une activité professionnelle et d’être âgé de plus de 25 ans # ou d’avoir au moins un enfant à charge). # La Psa, prime exceptionnelle, s’élève à 200 euros par foyer bénéficiaire. dummy_api = api > 0 dummy_rmi = rsa > 0 dummy_al = and_( al > 0, or_(af_nbenf > 0, or_(activite[CHEF] == 0, activite[PART] == 0))) condition = (dummy_api + dummy_rmi + dummy_al > 0) P = _P.minim.rmi psa = condition * P.psa return psa
def function(self, simulation, period): period = period.start.offset('first-of', 'month').period('year') activite = simulation.calculate('activite', period) ass_precondition_remplie = simulation.calculate( 'ass_precondition_remplie', period) return period, and_(activite == 1, ass_precondition_remplie)
def formula(famille, period, parameters): mois_precedent = period.offset(-1) last_day_reference_year = period.n_2.stop base_ressources_defaut = famille( 'aide_logement_base_ressources_defaut', period) base_ressources_eval_forfaitaire = famille( 'aide_logement_base_ressources_eval_forfaitaire', period) en_couple = famille('en_couple', period) aah_i = famille.members('aah', mois_precedent) aah = famille.sum(aah_i, role=Famille.PARENT) age_demandeur = famille.demandeur('age', period) age_conjoint = famille.conjoint('age', period) smic_horaire_brut_n2 = parameters( last_day_reference_year).cotsoc.gen.smic_h_b salaire_imposable_i = famille.members('salaire_imposable', period.offset(-1)) somme_salaires = famille.sum(salaire_imposable_i, role=Famille.PARENT) plafond_eval_forfaitaire = 1015 * smic_horaire_brut_n2 plafond_salaire_jeune_isole = parameters( period).prestations.aides_logement.ressources.dar_8 plafond_salaire_jeune_couple = parameters( period).prestations.aides_logement.ressources.dar_9 plafond_salaire_jeune = where(en_couple, plafond_salaire_jeune_couple, plafond_salaire_jeune_isole) neutral_jeune = or_(age_demandeur < 25, and_(en_couple, age_conjoint < 25)) neutral_jeune &= somme_salaires < plafond_salaire_jeune eval_forfaitaire = base_ressources_defaut <= plafond_eval_forfaitaire eval_forfaitaire &= base_ressources_eval_forfaitaire > 0 eval_forfaitaire &= aah == 0 eval_forfaitaire &= not_(neutral_jeune) ressources = where(eval_forfaitaire, base_ressources_eval_forfaitaire, base_ressources_defaut) # Planchers de ressources pour étudiants # Seul le statut étudiant (et boursier) du demandeur importe, pas celui du conjoint Pr = parameters(period).prestations.aides_logement.ressources demandeur_etudiant = famille.demandeur('etudiant', period) demandeur_boursier = famille.demandeur('boursier', period) montant_plancher_ressources = max_( 0, demandeur_etudiant * Pr.dar_4 - demandeur_boursier * Pr.dar_5) ressources = max_(ressources, montant_plancher_ressources) # Arrondi au centime, pour éviter qu'une petite imprécision liée à la recombinaison d'une valeur annuelle éclatée ne fasse monter d'un cran l'arrondi au 100€ supérieur. ressources = round_(ressources * 100) / 100 # Arrondi aux 100 euros supérieurs ressources = ceil(ressources / 100) * 100 return ressources
def formula(famille, period): enceinte_i = famille.members('enceinte', period) parent_enceinte = famille.any(enceinte_i, role = Famille.PARENT) age_en_mois_i = famille.members('age_en_mois', period) age_en_mois_enfant = famille.min(age_en_mois_i, role = Famille.ENFANT) enceinte_compat = and_(age_en_mois_enfant < 0, age_en_mois_enfant > -6) return parent_enceinte + enceinte_compat
def formula(famille, period): enceinte_i = famille.members('enceinte', period) parent_enceinte = famille.any(enceinte_i, role=Famille.PARENT) age_en_mois_i = famille.members('age_en_mois', period) age_en_mois_enfant = famille.min(age_en_mois_i, role=Famille.ENFANT) enceinte_compat = and_(age_en_mois_enfant < 0, age_en_mois_enfant > -6) return parent_enceinte + enceinte_compat
def get_major_differences(self): self.build_columns_to_fetch() self.build_erf_data_frames() self.build_openfisca_data_frames() variable = self.variable erf_menages_data_frame = self.erf_menages_data_frame of_menages_data_frame = self.of_menages_data_frame merged_menage_data_frame = merge( erf_menages_data_frame[[variable, 'idmen']], of_menages_data_frame[[variable, 'idmen']], on='idmen', how='inner', suffixes=('_erf', '_of')) log.info('Length of merged_menage_data_frameis {}'.format( len(merged_menage_data_frame))) merged_menage_data_frame.set_index('idmen', drop=False, inplace=True) table = merged_menage_data_frame[and_( merged_menage_data_frame[variable + '_erf'] != 0, merged_menage_data_frame[variable + '_of'] != 0)] table[variable + "_rel_diff"] = (table[variable + '_of'] - table[variable + '_erf']) \ / table[variable + '_erf'] # Difference relative log.info( "Minimum difference between the two tables for {} is {}".format( variable, str(table[variable + "_rel_diff"].min()))) log.info( "Maximum difference between the two tables for {} is {}".format( variable, str(table[variable + "_rel_diff"].max()))) table[variable + '_ratio'] = (table[variable + '_of'] / table[variable + '_erf']) log.info(table[variable + "_rel_diff"].describe()) try: assert len(table[variable + "_rel_diff"]) == len( table['wprm_of']), "PINAGS" dec, values = mwp(table[variable + "_rel_diff"], np.arange(1, 11), table['wprm_of'], 2, return_quantiles=True) log.info(sorted(values)) dec, values = mwp(table[variable + "_rel_diff"], np.arange(1, 101), table['wprm_erf'], 2, return_quantiles=True) log.info(sorted(values)[90:]) del dec, values except: log.info('Weighted percentile method did not work for {}'.format( variable + "_rel_diff")) pass table.sort(columns=variable + "_rel_diff", ascending=False, inplace=True) print table.to_string() return table
def function(self, simulation, period): period = period.this_month mois_precedent = period.offset(-1) last_day_reference_year = period.n_2.stop base_ressources_defaut = simulation.calculate( 'aide_logement_base_ressources_defaut', period) base_ressources_eval_forfaitaire = simulation.calculate( 'aide_logement_base_ressources_eval_forfaitaire', period) en_couple = simulation.calculate('en_couple', period) aah_holder = simulation.compute('aah', mois_precedent) aah = self.sum_by_entity(aah_holder, roles=[CHEF, PART]) age_holder = simulation.compute('age', period) age = self.split_by_roles(age_holder, roles=[CHEF, PART]) smic_horaire_brut_n2 = simulation.legislation_at( last_day_reference_year).cotsoc.gen.smic_h_b salaire_imposable_holder = simulation.compute('salaire_imposable', period.offset(-1)) somme_salaires = self.sum_by_entity(salaire_imposable_holder, roles=[CHEF, PART]) plafond_eval_forfaitaire = 1015 * smic_horaire_brut_n2 plafond_salaire_jeune_isole = simulation.legislation_at( period.start).prestations.aides_logement.ressources.dar_8 plafond_salaire_jeune_couple = simulation.legislation_at( period.start).prestations.aides_logement.ressources.dar_9 plafond_salaire_jeune = where(en_couple, plafond_salaire_jeune_couple, plafond_salaire_jeune_isole) neutral_jeune = or_(age[CHEF] < 25, and_(en_couple, age[PART] < 25)) neutral_jeune &= somme_salaires < plafond_salaire_jeune eval_forfaitaire = base_ressources_defaut <= plafond_eval_forfaitaire eval_forfaitaire &= base_ressources_eval_forfaitaire > 0 eval_forfaitaire &= aah == 0 eval_forfaitaire &= not_(neutral_jeune) ressources = where(eval_forfaitaire, base_ressources_eval_forfaitaire, base_ressources_defaut) # Planchers de ressources pour étudiants # Seul le statut étudiant (et boursier) du demandeur importe, pas celui du conjoint Pr = simulation.legislation_at( period.start).prestations.aides_logement.ressources etudiant_holder = simulation.compute('etudiant', period) boursier_holder = simulation.compute('boursier', period) etudiant = self.split_by_roles(etudiant_holder, roles=[CHEF, PART]) boursier = self.split_by_roles(boursier_holder, roles=[CHEF, PART]) montant_plancher_ressources = max_( 0, etudiant[CHEF] * Pr.dar_4 - boursier[CHEF] * Pr.dar_5) ressources = max_(ressources, montant_plancher_ressources) # Arrondi aux 100 euros supérieurs ressources = ceil(ressources / 100) * 100 return period, ressources
def function(famille, period): enceinte_i = famille.members('enceinte', period) parent_enceinte = famille.any(enceinte_i, role=Famille.PARENT) age_en_mois_i = famille.members('age_en_mois', period) age_en_mois_benjamin = famille.min(age_en_mois_i, role=Famille.ENFANT) enceinte_compat = and_(age_en_mois_benjamin < 0, age_en_mois_benjamin > -6) return parent_enceinte + enceinte_compat
def formula(individu, period, parameters): age_max = parameters(period).prestations.minima_sociaux.ass.age_max sous_age_limite = individu('age_en_mois', period) <= age_max demandeur_emploi_non_indemnise = and_(individu('activite', period) == TypesActivite.chomeur, individu('chomage_net', period) == 0) # Indique que l'individu a travaillé 5 ans au cours des 10 dernieres années. ass_precondition_remplie = individu('ass_precondition_remplie', period) return demandeur_emploi_non_indemnise * ass_precondition_remplie * sous_age_limite
def function(self, simulation, period): period = period agem_holder = simulation.compute('agem', period) enceinte_holder = simulation.compute('enceinte', period) agem_enf = self.split_by_roles(agem_holder, roles = ENFS) enceinte = self.split_by_roles(enceinte_holder, roles = [CHEF, PART]) benjamin = age_en_mois_benjamin(agem_enf) enceinte_compat = and_(benjamin < 0, benjamin > -6) return period, or_(or_(enceinte_compat, enceinte[CHEF]), enceinte[PART])
def function(self, simulation, period): period = period age_en_mois_holder = simulation.compute('age_en_mois', period) enceinte_holder = simulation.compute('enceinte', period) age_en_mois_enf = self.split_by_roles(age_en_mois_holder, roles = ENFS) enceinte = self.split_by_roles(enceinte_holder, roles = [CHEF, PART]) benjamin = age_en_mois_benjamin(age_en_mois_enf) enceinte_compat = and_(benjamin < 0, benjamin > -6) return period, or_(or_(enceinte_compat, enceinte[CHEF]), enceinte[PART])
def function(self, simulation, period): period = period.this_month chomage_net_m_1 = simulation.calculate('chomage_net', period.offset(-1)) chomage_net_m_2 = simulation.calculate('chomage_net', period.offset(-2)) revenus_activite_pro = simulation.calculate_add('salaire_imposable', period.n_2) taux_abattement = simulation.legislation_at(period.start).prestations.aides_logement.ressources.abattement_chomage_indemnise taux_frais_pro = simulation.legislation_at(period.start).impot_revenu.tspr.abatpro.taux abattement = and_(chomage_net_m_1 > 0, chomage_net_m_2 > 0) * taux_abattement * revenus_activite_pro abattement = round_((1 - taux_frais_pro) * abattement) return period, abattement
def loyer_retenu(): # loyer mensuel réel, multiplié par 2/3 pour les meublés L1 = round((statut_occupation == 5) * loyer * 2 / 3 + (statut_occupation != 5) * loyer, 2) # taux à appliquer sur le loyer plafond taux_loyer_plafond = (and_(not_(coloc), not_(chambre)) * 1 + chambre * al.loyers_plafond.chambre + not_(chambre) * coloc * al.loyers_plafond.colocation) loyer_plafond_personne_seule = or_(personne_seule * (al_pac == 0), chambre) loyer_plafond_famille = not_(loyer_plafond_personne_seule) * (al_pac > 0) loyer_plafond_couple = and_(not_(loyer_plafond_famille), not_(loyer_plafond_personne_seule)) z1 = al.loyers_plafond.zone1 z2 = al.loyers_plafond.zone2 z3 = al.loyers_plafond.zone3 Lz1 = ( loyer_plafond_personne_seule * z1.L1 + loyer_plafond_couple * z1.L2 + loyer_plafond_famille * (z1.L3 + (al_pac > 1) * (al_pac - 1) * z1.L4) ) Lz2 = ( loyer_plafond_personne_seule * z2.L1 + loyer_plafond_couple * z2.L2 + loyer_plafond_famille * (z2.L3 + (al_pac > 1) * (al_pac - 1) * z2.L4) ) Lz3 = ( loyer_plafond_personne_seule * z3.L1 + loyer_plafond_couple * z3.L2 + loyer_plafond_famille * (z3.L3 + (al_pac > 1) * (al_pac - 1) * z3.L4) ) L2 = Lz1 * (zone_apl == 1) + Lz2 * (zone_apl == 2) + Lz3 * (zone_apl == 3) L2 = round(L2 * taux_loyer_plafond, 2) # loyer retenu L = min_(L1, L2) return L
def formula_2017_09_01(individu, period): ''' Reference : https://www.legifrance.gouv.fr/eli/decret/2017/5/5/ETSD1708117D/jo/article_2 ''' aah_eligible = individu('aah', period) > 0 demandeur_emploi_non_indemnise = and_( individu('activite', period) == TypesActivite.chomeur, individu('chomage_net', period) == 0) eligible_cumul_ass = individu('ass_eligibilite_cumul_individu', period) demandeur_emploi_non_indemnise_et_cumul_accepte = or_( demandeur_emploi_non_indemnise, not_(demandeur_emploi_non_indemnise) * eligible_cumul_ass) # Indique que l'individu a travaillé 5 ans au cours des 10 dernieres années. ass_precondition_remplie = individu('ass_precondition_remplie', period) return and_( not_(aah_eligible), and_(demandeur_emploi_non_indemnise_et_cumul_accepte, ass_precondition_remplie))
def function(self, simulation, period): period = period.start.offset('first-of', 'month').period('month') two_years_ago = period.start.offset('first-of', 'year').period('year').offset(-2) chomage_net_m_1 = simulation.calculate('chonet', period.offset(-1)) chomage_net_m_2 = simulation.calculate('chonet', period.offset(-2)) revenus_activite_pro = simulation.calculate('salaire_imposable', two_years_ago) taux_abattement = simulation.legislation_at(period.start).al.ressources.abattement_chomage_indemnise abattement = and_(chomage_net_m_1 > 0, chomage_net_m_2 > 0) * taux_abattement * revenus_activite_pro params_abattement_frais_pro = simulation.legislation_at(period.start).ir.tspr.abatpro abattement = round((1 - params_abattement_frais_pro.taux) * abattement) return period, abattement
def weighted_quantiles(data, labels, weights, return_quantiles=False): num_categories = len(labels) breaks = linspace(0, 1, num_categories + 1) quantiles = [ weighted.quantile_1D(data, weights, mybreak) for mybreak in breaks[1:] ] ret = zeros(len(data)) for i in range(0, len(quantiles) - 1): lower = quantiles[i] upper = quantiles[i + 1] ret[and_(data >= lower, data < upper)] = labels[i] if return_quantiles: return ret + 1, quantiles else: return ret + 1
def function(self, simulation, period): period = period.this_month mois_precedent = period.offset(-1) last_day_reference_year = period.n_2.stop base_ressources_defaut = simulation.calculate('aide_logement_base_ressources_defaut', period) base_ressources_eval_forfaitaire = simulation.calculate( 'aide_logement_base_ressources_eval_forfaitaire', period) en_couple = simulation.calculate('en_couple', period) aah_holder = simulation.compute('aah', mois_precedent) aah = self.sum_by_entity(aah_holder, roles = [CHEF, PART]) age_holder = simulation.compute('age', period) age = self.split_by_roles(age_holder, roles = [CHEF, PART]) smic_horaire_brut_n2 = simulation.legislation_at(last_day_reference_year).cotsoc.gen.smic_h_b salaire_imposable_holder = simulation.compute('salaire_imposable', period.offset(-1)) somme_salaires = self.sum_by_entity(salaire_imposable_holder, roles = [CHEF, PART]) plafond_eval_forfaitaire = 1015 * smic_horaire_brut_n2 plafond_salaire_jeune_isole = simulation.legislation_at(period.start).prestations.aides_logement.ressources.dar_8 plafond_salaire_jeune_couple = simulation.legislation_at(period.start).prestations.aides_logement.ressources.dar_9 plafond_salaire_jeune = where(en_couple, plafond_salaire_jeune_couple, plafond_salaire_jeune_isole) neutral_jeune = or_(age[CHEF] < 25, and_(en_couple, age[PART] < 25)) neutral_jeune &= somme_salaires < plafond_salaire_jeune eval_forfaitaire = base_ressources_defaut <= plafond_eval_forfaitaire eval_forfaitaire &= base_ressources_eval_forfaitaire > 0 eval_forfaitaire &= aah == 0 eval_forfaitaire &= not_(neutral_jeune) ressources = where(eval_forfaitaire, base_ressources_eval_forfaitaire, base_ressources_defaut) # Planchers de ressources pour étudiants # Seul le statut étudiant (et boursier) du demandeur importe, pas celui du conjoint Pr = simulation.legislation_at(period.start).prestations.aides_logement.ressources etudiant_holder = simulation.compute('etudiant', period) boursier_holder = simulation.compute('boursier', period) etudiant = self.split_by_roles(etudiant_holder, roles = [CHEF, PART]) boursier = self.split_by_roles(boursier_holder, roles = [CHEF, PART]) montant_plancher_ressources = max_(0, etudiant[CHEF] * Pr.dar_4 - boursier[CHEF] * Pr.dar_5) ressources = max_(ressources, montant_plancher_ressources) # Arrondi aux 100 euros supérieurs ressources = ceil(ressources / 100) * 100 return period, ressources
def function(self, simulation, period): period = period.this_month chomage_net_m_1 = simulation.calculate('chomage_net', period.offset(-1)) chomage_net_m_2 = simulation.calculate('chomage_net', period.offset(-2)) revenus_activite_pro = simulation.calculate('salaire_imposable', period.n_2) taux_abattement = simulation.legislation_at( period.start).al.ressources.abattement_chomage_indemnise taux_frais_pro = simulation.legislation_at( period.start).ir.tspr.abatpro.taux abattement = and_(chomage_net_m_1 > 0, chomage_net_m_2 > 0) * taux_abattement * revenus_activite_pro abattement = round_((1 - taux_frais_pro) * abattement) return period, abattement
def _psa(api, rsa, activite, af_nbenf, al, _P, _option = {"activite" : [CHEF, PART]}): ''' Prime de solidarité active (exceptionnelle, 200€ versés une fois en avril 2009) ''' # Versement en avril 2009 d’une prime de solidarité active (Psa) aux familles modestes qui ont bénéficié en janvier, # février ou mars 2009 du Rmi, de l’Api (du Rsa expérimental, du Cav ou du Rma pour les ex-bénéficiaires du Rmi ou de l’Api), # de la prime forfaitaire mensuelle au titre du Rmi ou de l’Api # ou enfin d’une aide au logement (à condition d’exercer une activité professionnelle et d’être âgé de plus de 25 ans # ou d’avoir au moins un enfant à charge). # La Psa, prime exceptionnelle, s’élève à 200 euros par foyer bénéficiaire. dummy_api = api > 0 dummy_rmi = rsa > 0 dummy_al = and_(al > 0, or_(af_nbenf > 0, or_(activite[CHEF] == 0, activite[PART] == 0))) condition = (dummy_api + dummy_rmi + dummy_al > 0) P = _P.minim.rmi psa = condition * P.psa return psa
def weightedcalcs_quantiles(data, labels, weights, return_quantiles=False): calc = wc.Calculator("weights") num_categories = len(labels) breaks = linspace(0, 1, num_categories + 1) data_frame = pd.DataFrame({ 'weights': weights, 'data': data, }) quantiles = [ calc.quantile(data_frame, 'data', mybreak) for mybreak in breaks[1:] ] ret = zeros(len(data)) for i in range(0, len(quantiles) - 1): lower = quantiles[i] upper = quantiles[i + 1] ret[and_(data > lower, data <= upper)] = labels[i] if return_quantiles: return ret + 1, quantiles else: return ret + 1
def formula_2016_01_18(self, simulation, period): effectif_entreprise = simulation.calculate('effectif_entreprise', period) apprenti = simulation.calculate('apprenti', period) contrat_de_travail_duree = simulation.calculate( 'contrat_de_travail_duree', period) contrat_de_travail_debut = simulation.calculate( 'contrat_de_travail_debut', period) contrat_de_travail_fin = simulation.calculate('contrat_de_travail_fin', period) coefficient_proratisation = simulation.calculate( 'coefficient_proratisation', period) smic_proratise = simulation.calculate('smic_proratise', period) salaire_de_base = simulation.calculate('salaire_de_base', period) exoneration_cotisations_employeur_jei = simulation.calculate( 'exoneration_cotisations_employeur_jei', period) aide_premier_salarie = simulation.calculate('aide_premier_salarie', period) # Cette aide est temporaire. # Si toutefois elle est reconduite et modifiée, les dates et le montant seront à implémenter comme # des params xml. # jusqu’à 1,3 fois le Smic eligible_salaire = salaire_de_base <= (1.3 * smic_proratise) # pour les PME eligible_effectif = effectif_entreprise < 250 non_cumulee = and_( # non cumulable avec l'aide pour la première embauche # qui est identique, si ce n'est qu'elle couvre tous les salaires aide_premier_salarie == 0, # non cumul avec le dispositif Jeune Entreprise Innovante (JEI) not_(exoneration_cotisations_employeur_jei)) eligible_contrat = and_( contrat_de_travail_debut >= datetime64("2016-01-18"), contrat_de_travail_debut <= datetime64("2017-06-30")) # Si CDD, durée du contrat doit être > 1 an eligible_duree = or_( # durée indéterminée contrat_de_travail_duree == 0, # durée déterminée supérieure à 1 an and_( # CDD contrat_de_travail_duree == 1, # > 6 mois (contrat_de_travail_fin - contrat_de_travail_debut ).astype('timedelta64[M]') >= timedelta64(6, 'M'))) # Valable 2 ans seulement eligible_date = datetime64(period.offset( -24, 'month').start) < contrat_de_travail_debut eligible = (eligible_salaire * eligible_effectif * non_cumulee * eligible_contrat * eligible_duree * eligible_date * not_(apprenti)) # somme sur 24 mois, à raison de 500 € maximum par trimestre montant_max = 4000 # Si le salarié est embauché à temps partiel, # l’aide est proratisée en fonction de sa durée de travail. # TODO cette multiplication par le coefficient de proratisation suffit-elle pour le cas du temps partiel ? # A tester return eligible * (montant_max / 24) * coefficient_proratisation
def describe_discrepancies(self, fov = 10, descending = True): """ Describe discrpancies Parameters ---------- fov : descending : """ erf_menage = self.erf_menage erf_eec_indivi = self.erf_eec_indivi simu_aggr_tables = self.simu_aggr_tables simu_nonaggr_tables = self.simu_nonaggr_tables # Detect the biggest differences bigtable = merge(erf_menage, simu_aggr_tables, on = 'idmen', how = 'inner', suffixes=('_erf','_of')) print 'Length of new dataframe is %s' %str(len(bigtable)) #print bigtable.columns bigtable.set_index('idmen', drop = False, inplace = True) already_met = [] options_met = [] for col in [self.variable]: bigtemp = None table = bigtable[and_(bigtable[col+'_erf']!=0,bigtable[col+'_of']!=0)] table[col] = (table[col+'_of'] - table[col+'_erf']) / table[col+'_erf'] #Difference relative # table[col + "_sign"] = table[col].apply(lambda x: x/abs(x)) # table[col] = table[col].apply(lambda x: abs(x)) print 'Minimum difference between the two tables for %s is %s' %(col, str(table[col].min())) print 'Maximum difference between the two tables for %s is %s' %(col, str(table[col].max())) print table[col].describe() # print table[col + "_sign"].describe() # TODO: do regular percentiles try: assert len(table[col]) == len(table['wprm_of']), "PINAGS" dec, values = mwp(table[col], np.arange(1,11), table['wprm_of'], 2, return_quantiles=True) print sorted(values) dec, values = mwp(table[col], np.arange(1,101), table['wprm_erf'], 2, return_quantiles=True) print sorted(values)[90:] del dec, values gc.collect() except: print 'Weighted percentile method didnt work for %s' %col pass print "\n" # Show the relevant information for the most deviant households table.sort(columns = col, ascending = not descending, inplace = True) #print table[col][0:10].to_string() if bigtemp is None: bigtemp = {'table' : table[[col, col+'_of', col+'_erf', 'idmen']][0:fov], 'options' : None} bigtemp['table'][col+'div'] = bigtemp['table'][col+'_of'] / bigtemp['table'][col+'_erf'] print bigtemp['table'].to_string() ''' bigtemp is the table which will get filled little by little by the relevant variables. Up to the last rows of code 'table' refers to a table of aggregated values, while 'options is a table of individual variables. The reason we call it in a dictionnary is also because we modify it inside the recursive function 'iter_on parents', and it causes an error in Python unless for certain types like dictionnary values. ''' #print "\n" # If variable is a Prestation, we show the dependancies varcol = self.simulation.output_table.column_by_name.get(col) if isinstance(varcol, Prestation): ''' For the direct children ''' if not varcol._children is None: ch_to_fetch = list(varcol._children) ch_to_fetch = map(lambda x: x.name, ch_to_fetch) ch_fetched = [] if set(ch_to_fetch) <= set(simu_aggr_tables.columns): print "Variables which need %s to be computed :\n %s \n" %(col, str(ch_to_fetch)) for var in ch_to_fetch: if var + '_of' in table.columns: ch_fetched.append(var + '_of') else: ch_fetched.append(var) elif set(ch_to_fetch) <= set(simu_aggr_tables.columns).union(erf_menage.columns): print "Variables which need %s to be computed (some missing picked in erf):\n %s \n" %(col, str(ch_to_fetch)) for var in ch_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: ch_fetched.append(var + '_of') elif var + '_erf' in table.columns: ch_fetched.append(var + '_erf') else: ch_fetched.append(var) else: print "Variables which need %s to be computed (some missing):\n %s \n" %(col, str(ch_to_fetch)) for var in ch_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: ch_fetched.append(var + '_of') elif var in erf_menage.columns: if var + '_erf' in table.columns: ch_fetched.append(var + '_erf') print table[[col] + ch_fetched][0:fov] print "\n" del ch_to_fetch, ch_fetched ''' For the parents ''' def iter_on_parents(varcol): if (varcol._parents == set() and varcol._option == {}) or varcol.name in already_met: return else: par_to_fetch = list(varcol._parents) par_to_fetch = map(lambda x: x.name, par_to_fetch) par_fetched = [] if set(par_to_fetch) <= set(simu_aggr_tables.columns): #print "Variables the prestation %s depends of :\n %s \n" %(varcol.name, str(par_fetched)) for var in par_to_fetch: if var + '_of' in table.columns: par_fetched.append(var + '_of') else: par_fetched.append(var) elif set(par_to_fetch) <= set(simu_aggr_tables.columns).union(erf_menage.columns): #print "Variables the prestation %s depends of (some missing picked in erf):\n %s \n" %(varcol.name,str(par_fetched)) for var in par_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: par_fetched.append(var + '_of') elif var + '_erf' in table.columns: par_fetched.append(var + '_erf') else: par_fetched.append(var) else: for var in par_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: par_fetched.append(var + '_of') elif var in erf_menage.columns: if var + '_erf' in table.columns: par_fetched.append(var + '_erf') if len(par_fetched) > 0: #print "Variables the prestation %s depends of (some missing):\n %s \n" %(varcol.name, str(par_fetched)) pass else: #print "Variables the prestation %s depends of couldn't be found :\n %s \n" %(varcol.name, str(par_fetched)) pass if len(par_fetched) > 0: temp = table[['idmen'] + par_fetched][0:fov] bigtemp['table'] = bigtemp['table'].merge(temp, how = 'inner', on = 'idmen') #print temp.to_string(), "\n" if varcol._option != {} and not set(varcol._option.keys()) < set(options_met): vars_to_fetch = list(set(varcol._option.keys())-set(options_met)) #print "and the options to current variable %s for the id's with strongest difference :\n %s \n" %(varcol.name, varcol._option.keys()) liste = [i for i in range(0,fov)] liste = map(lambda x: table['idmen'].iloc[x], liste) temp = simu_nonaggr_tables[['idmen', 'quimen','noindiv'] + vars_to_fetch][simu_nonaggr_tables['idmen'].isin(table['idmen'][0:fov])] temp_sorted = temp[temp['idmen'] == liste[0]] for i in xrange(1,fov): temp_sorted = temp_sorted.append(temp[temp['idmen'] == liste[i]]) if bigtemp['options'] is None: bigtemp['options'] = temp_sorted bigtemp['options'] = bigtemp['options'].merge(erf_eec_indivi, on = 'noindiv', how = 'outer') else: bigtemp['options'] = bigtemp['options'].merge(temp_sorted, on = ['noindiv','idmen','quimen'], how = 'outer') # temp_sorted.set_index(['idmen', 'quimen'], drop = True, inplace = True) # If we do that del temp, temp_sorted gc.collect() already_met.append(varcol.name) options_met.extend(varcol._option.keys()) for var in varcol._parents: iter_on_parents(var) iter_on_parents(varcol) # We merge the aggregate table with the option table ( for each individual in entity ) bigtemp['table'] = bigtemp['table'].merge(bigtemp['options'], how = 'left', on = 'idmen', suffixes = ('(agg)', '(ind)')) # Reshaping the table to group by descending error on col, common entities bigtemp['table'].sort(columns = ['af','quimen'], ascending = [False,True], inplace = True) bigtemp['table'] = bigtemp['table'].groupby(['idmen','quimen'], sort = False).sum() print "Table of values for %s dependencies : \n" %col print bigtemp['table'].to_string() del bigtemp['table'], bigtemp['options'] gc.collect()
def test(year=2006, variables = ['af']): simulation = SurveySimulation() survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5') simulation.set_config(year=year, survey_filename=survey_filename) simulation.set_param() simulation.compute() # of_aggregates = Aggregates() # of_aggregates.set_simulation(simulation) # of_aggregates.compute() # print of_aggregates.aggr_frame # # from openfisca_france.data.erf.aggregates import build_erf_aggregates # temp = (build_erf_aggregates(variables=variables, year= year)) # print temp # return variable= "af" debugger = Debugger() debugger.set_simulation(simulation) debugger.set_variable(variable) debugger.show_aggregates() def get_all_ancestors(varlist): if len(varlist) == 0: return [] else: if varlist[0]._parents == set(): return ([varlist[0]] + get_all_ancestors(varlist[1:])) else: return ([varlist[0]] + get_all_ancestors(list(varlist[0]._parents)) + get_all_ancestors(varlist[1:])) # We want to get all ancestors + children + the options that we're going to encounter parents = map(lambda x: simulation.output_table.column_by_name.get(x), variables) parents = get_all_ancestors(parents) options = [] for varcol in parents: options.extend(varcol._option.keys()) options = list(set(options)) #print options parents = map(lambda x: x.name, parents) for var in variables: children = set() varcol = simulation.output_table.column_by_name.get(var) children = children.union(set(map(lambda x: x.name, varcol._children))) variables = list(set(parents + list(children))) #print variables del parents, children gc.collect() def get_var(variable): variables =[variable] return simulation.aggregated_by_entity(entity="men", variables=variables, all_output_vars = False, force_sum=True)[0] simu_aggr_tables = get_var(variables[0]) for var in variables[1:]: simu_aggr_tables = simu_aggr_tables.merge(get_var(var)[['idmen', var]], on = 'idmen', how = 'outer') # We load the data from erf table in case we have to pick data there erf_data = DataCollection(year=year) os.system('cls') todo = set(variables + ["ident", "wprm"]).union(set(options)) print 'Variables or equivalents to fetch :' print todo ''' Méthode générale pour aller chercher les variables de l'erf/eec ( qui n'ont pas forcément le même nom et parfois sont les variables utilisées pour créér l'of ): 1 - essayer le get_of2erf, ça doit marcher pour les variables principales ( au moins les aggrégats que l'on compare ) Si les variables ne sont pas directement dans la table, elles ont été calculées à partir d'autres variables de données erf/eec donc chercher dans : 2 - build_survey 3 - model/model.py qui dira éventuellement dans quel module de model/ chercher Le 'print todo' vous indique quelles variables chercher ( attention à ne pas inclure les enfants directs ) L'utilisation du Ctrl-H est profitable ! ''' fetch_eec = ['statut','titc','chpub','encadr','prosa','age','naim','naia','noindiv'] fetch_erf = ['zsali','af','ident','wprm','noi','noindiv','quelfic'] erf_df = erf_data.get_of_values(variables= fetch_erf, table="erf_indivi") eec_df = erf_data.get_of_values(variables= fetch_eec, table="eec_indivi") erf_eec_indivi = erf_df.merge(eec_df, on ='noindiv', how = 'inner' ) assert 'quelfic' in erf_eec_indivi.columns, "quelfic not in erf_indivi columns" del eec_df, erf_df # We then get the aggregate variables for the menage ( mainly to compare with of ) print 'Loading data from erf_menage table' erf_menage = erf_data.get_of_values(variables= list(todo) + ['quelfic'], table="erf_menage") del todo gc.collect() assert 'ident' in erf_menage.columns, "ident not in erf_menage.columns" from openfisca_france.data.erf import get_erf2of erf2of = get_erf2of() erf_menage.rename(columns = erf2of, inplace = True) # We get the options from the simulation non aggregated tables: # First from the output_table # We recreate the noindiv in output_table simulation.output_table.table['noindiv'] = 100 * simulation.output_table.table.idmen_ind + simulation.output_table.table.noi_ind simulation.output_table.table['noindiv'] = simulation.output_table.table['noindiv'].astype(np.int64) s1 = [var for var in set(options).intersection(set(simulation.output_table.table.columns))] + ['idmen_ind', 'quimen_ind', 'noindiv'] simu_nonaggr_tables = (simulation.output_table.table)[s1] simu_nonaggr_tables.rename(columns = {'idmen_ind' : 'idmen', 'quimen_ind':'quimen'}, inplace = True) assert 'noindiv' in simu_nonaggr_tables.columns # If not found, we dwelve into the input_table if (set(s1)- set(['idmen_ind', 'quimen_ind','noindiv'])) < set(options): assert 'noindiv' in simulation.input_table.table.columns, "'noindiv' not in simulation.input_table.table.columns" s2 = [var for var in (set(options).intersection(set(simulation.input_table.table.columns)) - set(s1))] + ['noindiv'] #print s2 temp = simulation.input_table.table[s2] simu_nonaggr_tables = simu_nonaggr_tables.merge(temp, on = 'noindiv', how = 'inner', sort = False) del s2, temp del s1 gc.collect() simu_nonaggr_tables = simu_nonaggr_tables[list(set(options)) + ['idmen', 'quimen','noindiv']] #print options, variables assert 'idmen' in simu_nonaggr_tables.columns, 'Idmen not in simu_nonaggr_tables columns' # Check the idmens that are not common erf_menage.rename(columns = {'ident' : 'idmen'}, inplace = True) print "\n" print 'Checking if idmen is here...' print '\n ERF : ' print 'idmen' in erf_menage.columns print "\n Simulation output" print 'idmen' in simu_aggr_tables.columns print "\n" #print 'Dropping duplicates of idmen for both tables...' assert not erf_menage["idmen"].duplicated().any(), "Duplicated idmen in erf_menage" #erf_menage.drop_duplicates('idmen', inplace = True) simu_aggr_tables.drop_duplicates('idmen', inplace = True) assert not simu_aggr_tables["idmen"].duplicated().any(), "Duplicated idmen in of" print 'Checking mismatching idmen... ' s1 = set(erf_menage['idmen']) - (set(simu_aggr_tables['idmen'])) if s1: print "idmen that aren't in simu_aggr_tables : %s" %str(len(s1)) pass s2 = (set(simu_aggr_tables['idmen'])) - set(erf_menage['idmen']) if s2: print "idmen that aren't in erf_menage : %s" %str(len(s2)) pass del s1, s2 # Restrict to common idmens and merge s3 = set(erf_menage['idmen']).intersection(set(simu_aggr_tables['idmen'])) print "Restricting to %s common idmen... \n" %str(len(s3)) erf_menage = erf_menage[erf_menage['idmen'].isin(s3)] simu_aggr_tables = simu_aggr_tables[simu_aggr_tables['idmen'].isin(s3)] del s3 gc.collect() #print erf_menage.columns #print simu_aggr_tables.columns # Compare differences across of and erf dataframes print "Comparing differences between dataframes... \n" colcom = (set(erf_menage.columns).intersection(set(simu_aggr_tables.columns))) - set(['idmen','wprm']) print 'Common variables: ' print colcom erf_menage.reset_index(inplace = True) simu_aggr_tables.reset_index(inplace = True) for col in colcom: temp = set(erf_menage['idmen'][erf_menage[col] != simu_aggr_tables[col]]) print "Numbers of idmen that aren't equal on variable %s : %s \n" %(col, str(len(temp))) del temp # Detect the biggest differences bigtable = merge(erf_menage, simu_aggr_tables, on = 'idmen', how = 'inner', suffixes=('_erf','_of')) print 'Length of new dataframe is %s' %str(len(bigtable)) #print bigtable.columns bigtable.set_index('idmen', drop = False, inplace = True) already_met = [] options_met = [] for col in colcom: bigtemp = None table = bigtable[and_(bigtable[col+'_erf']!=0,bigtable[col+'_of']!=0)] table[col] = (table[col+'_erf'] - table[col+'_of']) / table[col+'_erf'] #Difference relative table[col] = table[col].apply(lambda x: abs(x)) print 'Minimum difference between the two tables for %s is %s' %(col, str(table[col].min())) print 'Maximum difference between the two tables for %s is %s' %(col, str(table[col].max())) print table[col].describe() try: assert len(table[col]) == len(table['wprm_of']), "PINAGS" dec, values = mwp(table[col], np.arange(1,11), table['wprm_of'], 2, return_quantiles=True) #print sorted(values) dec, values = mwp(table[col], np.arange(1,101), table['wprm_erf'], 2, return_quantiles=True) #print sorted(values)[90:] del dec, values gc.collect() except: #print 'Weighted percentile method didnt work for %s' %col pass print "\n" # Show the relevant information for the most deviant households table.sort(columns = col, ascending = False, inplace = True) #print table[col][0:10].to_string() if bigtemp is None: bigtemp = {'table' : table[[col, col+'_of', col+'_erf', 'idmen']][0:10], 'options' : None} bigtemp['table'][col+'div'] = bigtemp['table'][col+'_of'] / bigtemp['table'][col+'_erf'] print bigtemp['table'].to_string() ''' bigtemp is the table which will get filled little by little by the relevant variables. Up to the last rows of code 'table' refers to a table of aggregated values, while 'options is a table of individual variables. The reason we call it in a dictionnary is also because we modify it inside the recursive function 'iter_on parents', and it causes an error in Python unless for certain types like dictionnary values. ''' #print "\n" # If variable is a Prestation, we show the dependancies varcol = simulation.output_table.column_by_name.get(col) if isinstance(varcol, Prestation): ''' For the direct children ''' if not varcol._children is None: ch_to_fetch = list(varcol._children) ch_to_fetch = map(lambda x: x.name, ch_to_fetch) ch_fetched = [] if set(ch_to_fetch) <= set(simu_aggr_tables.columns): print "Variables which need %s to be computed :\n %s \n" %(col, str(ch_to_fetch)) for var in ch_to_fetch: if var + '_of' in table.columns: ch_fetched.append(var + '_of') else: ch_fetched.append(var) elif set(ch_to_fetch) <= set(simu_aggr_tables.columns).union(erf_menage.columns): print "Variables which need %s to be computed (some missing picked in erf):\n %s \n" %(col, str(ch_to_fetch)) for var in ch_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: ch_fetched.append(var + '_of') elif var + '_erf' in table.columns: ch_fetched.append(var + '_erf') else: ch_fetched.append(var) else: print "Variables which need %s to be computed (some missing):\n %s \n" %(col, str(ch_to_fetch)) for var in ch_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: ch_fetched.append(var + '_of') elif var in erf_menage.columns: if var + '_erf' in table.columns: ch_fetched.append(var + '_erf') print table[[col] + ch_fetched][0:10] print "\n" del ch_to_fetch, ch_fetched ''' For the parents ''' def iter_on_parents(varcol): if (varcol._parents == set() and varcol._option == {}) or varcol.name in already_met: return else: par_to_fetch = list(varcol._parents) par_to_fetch = map(lambda x: x.name, par_to_fetch) par_fetched = [] if set(par_fetched) <= set(simu_aggr_tables.columns): #print "Variables the prestation %s depends of :\n %s \n" %(varcol.name, str(par_fetched)) for var in par_fetched: if var + '_of' in table.columns: par_fetched.append(var + '_of') else: par_fetched.append(var) elif set(par_fetched) <= set(simu_aggr_tables.columns).union(erf_menage.columns): #print "Variables the prestation %s depends of (some missing picked in erf):\n %s \n" %(varcol.name,str(par_fetched)) for var in par_fetched: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: par_fetched.append(var + '_of') elif var + '_erf' in table.columns: par_fetched.append(var + '_erf') else: par_fetched.append(var) else: for var in par_fetched: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: par_fetched.append(var + '_of') elif var in erf_menage.columns: if var + '_erf' in table.columns: par_fetched.append(var + '_erf') if len(par_fetched) > 0: #print "Variables the prestation %s depends of (some missing):\n %s \n" %(varcol.name, str(par_fetched)) pass else: #print "Variables the prestation %s depends of couldn't be found :\n %s \n" %(varcol.name, str(par_fetched)) pass if len(par_fetched) > 0: temp = table[[col, 'idmen'] + par_fetched][0:10] bigtemp['table'] = pd.merge(temp, bigtemp['table'], how = 'inner') #print temp.to_string(), "\n" if varcol._option != {} and not set(varcol._option.keys()) < set(options_met): vars_to_fetch = list(set(varcol._option.keys())-set(options_met)) #print "and the options to current variable %s for the id's with strongest difference :\n %s \n" %(varcol.name, varcol._option.keys()) liste = [i for i in range(0,10)] liste = map(lambda x: table['idmen'].iloc[x], liste) temp = simu_nonaggr_tables[['idmen', 'quimen','noindiv'] + vars_to_fetch][simu_nonaggr_tables['idmen'].isin(table['idmen'][0:10])] temp_sorted = temp[temp['idmen'] == liste[0]] for i in xrange(1,10): temp_sorted = temp_sorted.append(temp[temp['idmen'] == liste[i]]) if bigtemp['options'] is None: bigtemp['options'] = temp_sorted bigtemp['options'] = bigtemp['options'].merge(erf_eec_indivi, on = 'noindiv', how = 'outer') else: bigtemp['options'] = bigtemp['options'].merge(temp_sorted, on = ['noindiv','idmen','quimen'], how = 'outer') # temp_sorted.set_index(['idmen', 'quimen'], drop = True, inplace = True) # If we do that del temp, temp_sorted gc.collect() already_met.append(varcol.name) options_met.extend(varcol._option.keys()) for var in varcol._parents: iter_on_parents(var) iter_on_parents(varcol) # We merge the aggregate table with the option table ( for each individual in entity ) bigtemp['table'] = bigtemp['table'].merge(bigtemp['options'], how = 'left', on = 'idmen', suffixes = ('(agg)', '(ind)')) # Reshaping the table to group by descending error on col, common entities bigtemp['table'].sort(columns = ['af','quimen'], ascending = [False,True], inplace = True) bigtemp['table'] = bigtemp['table'].groupby(['idmen','quimen'], sort = False).sum() print "Table of values for %s dependencies : \n" %col print bigtemp['table'].to_string() del bigtemp['table'], bigtemp['options'] gc.collect()
def formula_2016_01_18(individu, period, parameters): effectif_entreprise = individu('effectif_entreprise', period) apprenti = individu('apprenti', period) contrat_de_travail_duree = individu('contrat_de_travail_duree', period) TypesContratDeTravailDuree = contrat_de_travail_duree.possible_values contrat_de_travail_debut = individu('contrat_de_travail_debut', period) contrat_de_travail_fin = individu('contrat_de_travail_fin', period) coefficient_proratisation = individu('coefficient_proratisation', period) smic_proratise = individu('smic_proratise', period) salaire_de_base = individu('salaire_de_base', period) exoneration_cotisations_employeur_jei = individu('exoneration_cotisations_employeur_jei', period) aide_premier_salarie = individu('aide_premier_salarie', period) # Cette aide est temporaire. # Si toutefois elle est reconduite et modifiée, les dates et le montant seront à implémenter comme # des params xml. # jusqu’à 1,3 fois le Smic eligible_salaire = salaire_de_base <= (1.3 * smic_proratise) # pour les PME eligible_effectif = effectif_entreprise < 250 non_cumulee = and_( # non cumulable avec l'aide pour la première embauche # qui est identique, si ce n'est qu'elle couvre tous les salaires aide_premier_salarie == 0, # non cumul avec le dispositif Jeune Entreprise Innovante (JEI) not_(exoneration_cotisations_employeur_jei) ) eligible_contrat = and_( contrat_de_travail_debut >= datetime64("2016-01-18"), contrat_de_travail_debut <= datetime64("2017-06-30") ) # Si CDD, durée du contrat doit être > 1 an eligible_duree = or_( # durée indéterminée contrat_de_travail_duree == TypesContratDeTravailDuree.cdi, # durée déterminée supérieure à 1 an and_( # CDD contrat_de_travail_duree == TypesContratDeTravailDuree.cdd, # > 6 mois (contrat_de_travail_fin - contrat_de_travail_debut).astype('timedelta64[M]') >= timedelta64(6, 'M') ) ) # Valable 2 ans seulement eligible_date = datetime64(period.offset(-24, 'month').start) < contrat_de_travail_debut eligible = ( eligible_salaire * eligible_effectif * non_cumulee * eligible_contrat * eligible_duree * eligible_date * not_(apprenti) ) # somme sur 24 mois, à raison de 500 € maximum par trimestre montant_max = 4000 # Si le salarié est embauché à temps partiel, # l’aide est proratisée en fonction de sa durée de travail. # TODO cette multiplication par le coefficient de proratisation suffit-elle pour le cas du temps partiel ? # A tester return eligible * (montant_max / 24) * coefficient_proratisation
def function(self, simulation, period): period = period.start.offset('first-of', 'month').period('month') concub = simulation.calculate('concub', period) aide_logement_base_ressources = simulation.calculate('aide_logement_base_ressources', period) statut_occupation_holder = simulation.compute('statut_occupation', period) loyer_holder = simulation.compute('loyer', period) coloc_holder = simulation.compute('coloc', period) logement_chambre_holder = simulation.compute('logement_chambre', period) al_pac = simulation.calculate('al_pac', period) enceinte_fam = simulation.calculate('enceinte_fam', period) zone_apl_famille = simulation.calculate('zone_apl_famille', period) nat_imp_holder = simulation.compute('nat_imp', period.start.period(u'year').offset('first-of')) al = simulation.legislation_at(period.start).al pfam_n_2 = simulation.legislation_at(period.start.offset(-2, 'year')).fam # le barème "couple" est utilisé pour les femmes enceintes isolées couple = or_(concub, enceinte_fam) personne_seule = not_(couple) statut_occupation = self.cast_from_entity_to_roles(statut_occupation_holder) statut_occupation = self.filter_role(statut_occupation, role = CHEF) loyer = self.cast_from_entity_to_roles(loyer_holder) loyer = self.filter_role(loyer, role = CHEF) zone_apl = zone_apl_famille # Variables individuelles coloc = self.any_by_roles(coloc_holder) chambre = self.any_by_roles(logement_chambre_holder) # Variables du foyer fiscal nat_imp = self.cast_from_entity_to_roles(nat_imp_holder) nat_imp = self.any_by_roles(nat_imp) # ne prend pas en compte les chambres ni les logements-foyers. # variables nécéssaires dans FA # al_pac : nb de personne à charge du ménage prise en compte pour les AL # zone_apl # loyer # coloc (1 si colocation, 0 sinon) # statut_occupation : statut d'occupation du logement # Voir statut_occupation dans model/caracteristiques_socio_demographiques/logement.py loca = ((3 <= statut_occupation) & (5 >= statut_occupation)) | (statut_occupation == 7) acce = statut_occupation == 1 # # aides au logement pour les locataires # loyer mensuel, multiplié par 2/3 pour les meublés L1 = round((statut_occupation == 5) * loyer * 2 / 3 + (statut_occupation != 5) * loyer, 2) # taux à appliquer sur le loyer plafond taux_loyer_plafond = (and_(not_(coloc), not_(chambre)) * 1 + chambre * al.loyers_plafond.chambre + not_(chambre) * coloc * al.loyers_plafond.colocation) loyer_plafond_personne_seule = or_(personne_seule * (al_pac == 0), chambre) loyer_plafond_famille = not_(loyer_plafond_personne_seule) * (al_pac > 0) loyer_plafond_couple = and_(not_(loyer_plafond_famille), not_(loyer_plafond_personne_seule)) z1 = al.loyers_plafond.zone1 z2 = al.loyers_plafond.zone2 z3 = al.loyers_plafond.zone3 Lz1 = ( loyer_plafond_personne_seule * z1.L1 + loyer_plafond_couple * z1.L2 + loyer_plafond_famille * (z1.L3 + (al_pac > 1) * (al_pac - 1) * z1.L4) ) Lz2 = ( loyer_plafond_personne_seule * z2.L1 + loyer_plafond_couple * z2.L2 + loyer_plafond_famille * (z2.L3 + (al_pac > 1) * (al_pac - 1) * z2.L4) ) Lz3 = ( loyer_plafond_personne_seule * z3.L1 + loyer_plafond_couple * z3.L2 + loyer_plafond_famille * (z3.L3 + (al_pac > 1) * (al_pac - 1) * z3.L4) ) L2 = Lz1 * (zone_apl == 1) + Lz2 * (zone_apl == 2) + Lz3 * (zone_apl == 3) L2 = round(L2 * taux_loyer_plafond, 2) # loyer retenu L = min_(L1, L2) # forfait de charges P_fc = al.forfait_charges C = ( not_(coloc) * (P_fc.fc1 + al_pac * P_fc.fc2) + coloc * ((personne_seule * 0.5 + couple) * P_fc.fc1 + al_pac * P_fc.fc2) ) # dépense éligible E = L + C # ressources prises en compte R = aide_logement_base_ressources # Plafond RO rmi = al.rmi R1 = ( al.R1.taux1 * rmi * personne_seule * (al_pac == 0) + al.R1.taux2 * rmi * couple * (al_pac == 0) + al.R1.taux3 * rmi * (al_pac == 1) + al.R1.taux4 * rmi * (al_pac >= 2) + al.R1.taux5 * rmi * (al_pac > 2) * (al_pac - 2) ) bmaf = pfam_n_2.af.bmaf R2 = ( al.R2.taux4 * bmaf * (al_pac >= 2) + al.R2.taux5 * bmaf * (al_pac > 2) * (al_pac - 2) ) Ro = round(12 * (R1 - R2) * (1 - al.autres.abat_sal)) Rp = max_(0, R - Ro) # Participation personnelle Po = max_(al.pp.taux * E, al.pp.min) # Taux de famille TF = ( al.TF.taux1 * (personne_seule) * (al_pac == 0) + al.TF.taux2 * (couple) * (al_pac == 0) + al.TF.taux3 * (al_pac == 1) + al.TF.taux4 * (al_pac == 2) + al.TF.taux5 * (al_pac == 3) + al.TF.taux6 * (al_pac >= 4) + al.TF.taux7 * (al_pac > 4) * (al_pac - 4) ) # Loyer de référence L_Ref = ( z2.L1 * (personne_seule) * (al_pac == 0) + z2.L2 * (couple) * (al_pac == 0) + z2.L3 * (al_pac >= 1) + z2.L4 * (al_pac > 1) * (al_pac - 1) ) RL = L / L_Ref # TODO: paramètres en dur ?? TL = max_(max_(0, al.TL.taux2 * (RL - 0.45)), al.TL.taux3 * (RL - 0.75) + al.TL.taux2 * (0.75 - 0.45)) Tp = TF + TL PP = Po + Tp * Rp al_loc = max_(0, E - PP) * loca al_loc = al_loc * (al_loc >= al.autres.nv_seuil) # # TODO: APL pour les accédants à la propriété al_acc = 0 * acce # # APL (tous) al = al_loc + al_acc return period, al
def mark_weighted_percentiles(a, labels, weights, method, return_quantiles=False): # from http://pastebin.com/KTLip9ee # a is an input array of values. # weights is an input array of weights, so weights[i] goes with a[i] # labels are the names you want to give to the xtiles # method refers to which weighted algorithm. # 1 for wikipedia, 2 for the stackexchange post. # The code outputs an array the same shape as 'a', but with # labels[i] inserted into spot j if a[j] falls in x-tile i. # The number of xtiles requested is inferred from the length of 'labels'. # First method, "vanilla" weights from Wikipedia article. if method == 1: # Sort the values and apply the same sort to the weights. N = len(a) sort_indx = argsort(a) tmp_a = a[sort_indx].copy() tmp_weights = weights[sort_indx].copy() # 'labels' stores the name of the x-tiles the user wants, # and it is assumed to be linearly spaced between 0 and 1 # so 5 labels implies quintiles, for example. num_categories = len(labels) breaks = linspace(0, 1, num_categories + 1) # Compute the percentile values at each explicit data point in a. cu_weights = cumsum(tmp_weights) p_vals = (1.0 / cu_weights[-1]) * (cu_weights - 0.5 * tmp_weights) # Set up the output array. ret = repeat(0, len(a)) if(len(a) < num_categories): return ret # Set up the array for the values at the breakpoints. quantiles = [] # Find the two indices that bracket the breakpoint percentiles. # then do interpolation on the two a_vals for those indices, using # interp-weights that involve the cumulative sum of weights. for brk in breaks: if brk <= p_vals[0]: i_low = 0 i_high = 0 elif brk >= p_vals[-1]: i_low = N - 1 i_high = N - 1 else: for ii in range(N - 1): if (p_vals[ii] <= brk) and (brk < p_vals[ii + 1]): i_low = ii i_high = ii + 1 if i_low == i_high: v = tmp_a[i_low] else: # If there are two brackets, then apply the formula as per Wikipedia. v = (tmp_a[i_low] + ((brk - p_vals[i_low]) / (p_vals[i_high] - p_vals[i_low])) * (tmp_a[i_high] - tmp_a[i_low])) # Append the result. quantiles.append(v) # Now that the weighted breakpoints are set, just categorize # the elements of a with logical indexing. for i in range(0, len(quantiles) - 1): lower = quantiles[i] upper = quantiles[i + 1] ret[and_(a >= lower, a < upper)] = labels[i] # make sure upper and lower indices are marked ret[a <= quantiles[0]] = labels[0] ret[a >= quantiles[-1]] = labels[-1] return ret # The stats.stackexchange suggestion. elif method == 2: N = len(a) sort_indx = argsort(a) tmp_a = a[sort_indx].copy() tmp_weights = weights[sort_indx].copy() num_categories = len(labels) breaks = linspace(0, 1, num_categories + 1) cu_weights = cumsum(tmp_weights) # Formula from stats.stackexchange.com post. s_vals = [0.0] for ii in range(1, N): s_vals.append(ii * tmp_weights[ii] + (N - 1) * cu_weights[ii - 1]) s_vals = asarray(s_vals) # Normalized s_vals for comapring with the breakpoint. norm_s_vals = (1.0 / s_vals[-1]) * s_vals # Set up the output variable. ret = repeat(0, N) if(N < num_categories): return ret # Set up space for the values at the breakpoints. quantiles = [] # Find the two indices that bracket the breakpoint percentiles. # then do interpolation on the two a_vals for those indices, using # interp-weights that involve the cumulative sum of weights. for brk in breaks: if brk <= norm_s_vals[0]: i_low = 0 i_high = 0 elif brk >= norm_s_vals[-1]: i_low = N - 1 i_high = N - 1 else: for ii in range(N - 1): if (norm_s_vals[ii] <= brk) and (brk < norm_s_vals[ii + 1]): i_low = ii i_high = ii + 1 if i_low == i_high: v = tmp_a[i_low] else: # Interpolate as in the method 1 method, but using the s_vals instead. v = (tmp_a[i_low] + (((brk * s_vals[-1]) - s_vals[i_low]) / (s_vals[i_high] - s_vals[i_low])) * (tmp_a[i_high] - tmp_a[i_low])) quantiles.append(v) # Now that the weighted breakpoints are set, just categorize # the elements of a as usual. for i in range(0, len(quantiles) - 1): lower = quantiles[i] upper = quantiles[i + 1] ret[and_(a >= lower, a < upper)] = labels[i] # make sure upper and lower indices are marked ret[a <= quantiles[0]] = labels[0] ret[a >= quantiles[-1]] = labels[-1] if return_quantiles: return ret, quantiles else: return ret
def describe_discrepancies(self, fov=10, descending=True): """ Describe discrpancies Parameters ---------- fov : descending : """ erf_menage = self.erf_menage erf_eec_indivi = self.erf_eec_indivi simu_aggr_tables = self.simu_aggr_tables simu_nonaggr_tables = self.simu_nonaggr_tables # Detect the biggest differences bigtable = merge(erf_menage, simu_aggr_tables, on='idmen', how='inner', suffixes=('_erf', '_of')) print 'Length of new dataframe is %s' % str(len(bigtable)) #print bigtable.columns bigtable.set_index('idmen', drop=False, inplace=True) already_met = [] options_met = [] for col in [self.variable]: bigtemp = None table = bigtable[and_(bigtable[col + '_erf'] != 0, bigtable[col + '_of'] != 0)] table[col] = (table[col + '_of'] - table[col + '_erf'] ) / table[col + '_erf'] #Difference relative # table[col + "_sign"] = table[col].apply(lambda x: x/abs(x)) # table[col] = table[col].apply(lambda x: abs(x)) print 'Minimum difference between the two tables for %s is %s' % ( col, str(table[col].min())) print 'Maximum difference between the two tables for %s is %s' % ( col, str(table[col].max())) print table[col].describe() # print table[col + "_sign"].describe() # TODO: do regular percentiles try: assert len(table[col]) == len(table['wprm_of']), "PINAGS" dec, values = mwp(table[col], np.arange(1, 11), table['wprm_of'], 2, return_quantiles=True) print sorted(values) dec, values = mwp(table[col], np.arange(1, 101), table['wprm_erf'], 2, return_quantiles=True) print sorted(values)[90:] del dec, values gc.collect() except: print 'Weighted percentile method didnt work for %s' % col pass print "\n" # Show the relevant information for the most deviant households table.sort(columns=col, ascending=not descending, inplace=True) #print table[col][0:10].to_string() if bigtemp is None: bigtemp = { 'table': table[[col, col + '_of', col + '_erf', 'idmen']][0:fov], 'options': None } bigtemp['table'][col + 'div'] = bigtemp['table'][ col + '_of'] / bigtemp['table'][col + '_erf'] print bigtemp['table'].to_string() ''' bigtemp is the table which will get filled little by little by the relevant variables. Up to the last rows of code 'table' refers to a table of aggregated values, while 'options is a table of individual variables. The reason we call it in a dictionnary is also because we modify it inside the recursive function 'iter_on parents', and it causes an error in Python unless for certain types like dictionnary values. ''' #print "\n" # If variable is a Prestation, we show the dependancies varcol = self.simulation.output_table.column_by_name.get(col) if isinstance(varcol, Prestation): ''' For the direct children ''' if not varcol._children is None: ch_to_fetch = list(varcol._children) ch_to_fetch = map(lambda x: x.name, ch_to_fetch) ch_fetched = [] if set(ch_to_fetch) <= set(simu_aggr_tables.columns): print "Variables which need %s to be computed :\n %s \n" % ( col, str(ch_to_fetch)) for var in ch_to_fetch: if var + '_of' in table.columns: ch_fetched.append(var + '_of') else: ch_fetched.append(var) elif set(ch_to_fetch) <= set( simu_aggr_tables.columns).union( erf_menage.columns): print "Variables which need %s to be computed (some missing picked in erf):\n %s \n" % ( col, str(ch_to_fetch)) for var in ch_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: ch_fetched.append(var + '_of') elif var + '_erf' in table.columns: ch_fetched.append(var + '_erf') else: ch_fetched.append(var) else: print "Variables which need %s to be computed (some missing):\n %s \n" % ( col, str(ch_to_fetch)) for var in ch_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: ch_fetched.append(var + '_of') elif var in erf_menage.columns: if var + '_erf' in table.columns: ch_fetched.append(var + '_erf') print table[[col] + ch_fetched][0:fov] print "\n" del ch_to_fetch, ch_fetched ''' For the parents ''' def iter_on_parents(varcol): if (varcol._parents == set() and varcol._option == {}) or varcol.name in already_met: return else: par_to_fetch = list(varcol._parents) par_to_fetch = map(lambda x: x.name, par_to_fetch) par_fetched = [] if set(par_to_fetch) <= set(simu_aggr_tables.columns): #print "Variables the prestation %s depends of :\n %s \n" %(varcol.name, str(par_fetched)) for var in par_to_fetch: if var + '_of' in table.columns: par_fetched.append(var + '_of') else: par_fetched.append(var) elif set(par_to_fetch) <= set( simu_aggr_tables.columns).union( erf_menage.columns): #print "Variables the prestation %s depends of (some missing picked in erf):\n %s \n" %(varcol.name,str(par_fetched)) for var in par_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: par_fetched.append(var + '_of') elif var + '_erf' in table.columns: par_fetched.append(var + '_erf') else: par_fetched.append(var) else: for var in par_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: par_fetched.append(var + '_of') elif var in erf_menage.columns: if var + '_erf' in table.columns: par_fetched.append(var + '_erf') if len(par_fetched) > 0: #print "Variables the prestation %s depends of (some missing):\n %s \n" %(varcol.name, str(par_fetched)) pass else: #print "Variables the prestation %s depends of couldn't be found :\n %s \n" %(varcol.name, str(par_fetched)) pass if len(par_fetched) > 0: temp = table[['idmen'] + par_fetched][0:fov] bigtemp['table'] = bigtemp['table'].merge( temp, how='inner', on='idmen') #print temp.to_string(), "\n" if varcol._option != {} and not set( varcol._option.keys()) < set(options_met): vars_to_fetch = list( set(varcol._option.keys()) - set(options_met)) #print "and the options to current variable %s for the id's with strongest difference :\n %s \n" %(varcol.name, varcol._option.keys()) liste = [i for i in range(0, fov)] liste = map(lambda x: table['idmen'].iloc[x], liste) temp = simu_nonaggr_tables[ ['idmen', 'quimen', 'noindiv'] + vars_to_fetch][ simu_nonaggr_tables['idmen'].isin( table['idmen'][0:fov])] temp_sorted = temp[temp['idmen'] == liste[0]] for i in xrange(1, fov): temp_sorted = temp_sorted.append( temp[temp['idmen'] == liste[i]]) if bigtemp['options'] is None: bigtemp['options'] = temp_sorted bigtemp['options'] = bigtemp['options'].merge( erf_eec_indivi, on='noindiv', how='outer') else: bigtemp['options'] = bigtemp['options'].merge( temp_sorted, on=['noindiv', 'idmen', 'quimen'], how='outer') # temp_sorted.set_index(['idmen', 'quimen'], drop = True, inplace = True) # If we do that del temp, temp_sorted gc.collect() already_met.append(varcol.name) options_met.extend(varcol._option.keys()) for var in varcol._parents: iter_on_parents(var) iter_on_parents(varcol) # We merge the aggregate table with the option table ( for each individual in entity ) bigtemp['table'] = bigtemp['table'].merge(bigtemp['options'], how='left', on='idmen', suffixes=('(agg)', '(ind)')) # Reshaping the table to group by descending error on col, common entities bigtemp['table'].sort(columns=['af', 'quimen'], ascending=[False, True], inplace=True) bigtemp['table'] = bigtemp['table'].groupby( ['idmen', 'quimen'], sort=False).sum() print "Table of values for %s dependencies : \n" % col print bigtemp['table'].to_string() del bigtemp['table'], bigtemp['options'] gc.collect()
def _nbH(self, age, alt, inv, quifoy): enfant_a_charge_garde_alternee = and_(quifoy >= 2, or_(age < 18, inv), alt) return self.sum_by_entity(enfant_a_charge_garde_alternee.astype(int16))
def plot_choice_transition_probs(folder, after_correct=False, col_bar='general', sim_agent=False, prop_wrong=None, period=None, plot_figs=True, show_values=False, sv_fig=True, displacement=0): """ Plots choice-transition probabilities once the agent has trained in an environment for those trials at the end of each block where there is no obvious higher stimulus and after a correct previous choice. (criteria: v=max(accumulated_stimuli) < percentile_10(v)). Casuistry: end of nch block (vertical axis) against end of repetition block (horizontal axis). Inside each subplot, for current choice (row) at trial t, probability of the other choices at trial t+1. Figure is saved in folder. Args: folder: path, folder from where to extarct data and save results. after_correct: bool, if True, choice transition probabilities for trials after a correct trial are plotted. col_bar: str, determines whether to plot colorbar and how. If 'general', colobar common for all plots is added. If 'specific', colorbar for each subplot is added. If None (default), no colorbar. sim_agent: if True, a simulated agent is generated prop_wrong: if stim_agent==True, proportion of wrong choices for it period: limits number of steps selected for calculation of bias plot_figs: if True, figures are plotted show_values: if True, total counts for each condition shown on top of normal plot sv_fig: if True, figure is saved in folder. """ # TODO: split creation of matrix from plotting of matrix # trans_mat, counts_mat, choices, col_bar, n_blcks_nch, # n_blcks_trh, blck_tr_hist_id, blck_n_ch_id, show values, # (title, period: for suptitle) # Data loading/Putting together and loading. if os.path.exists(folder + '/bhvr_data_all.npz'): data = np.load(folder + '/bhvr_data_all.npz') else: data = pl.put_together_files(folder) if data: # print('Number of trials: ', str(data['gt'].shape[0])) if period is None: period = [0, data['gt'].shape[0]] elif isinstance(period, int): period = [data['gt'].shape[0] - period, data['gt'].shape[0]] # We select necessary data ground_truth = data['gt'][period[0]:period[1]] choices = np.unique(ground_truth) # print('GT: \n', choices, np.unique(ground_truth, return_counts=True)) if not sim_agent: choices_agent = data['choice'][period[0]:period[1]] # print('CHOICES: \n', choices_agent, # np.unique(choices_agent, return_counts=True)) performance = data['performance'][period[0]:period[1]] else: choices_agent = ground_truth.copy() size = (int(choices_agent.shape[0] * prop_wrong), ) indx = np.random.choice(np.arange(choices_agent.shape[0], ), size=size, replace=False) choices_agent[indx] = np.random.choice(choices, size=size) performance = choices_agent == ground_truth stim = data['stimulus'][period[0]:period[1]] # Manage possible situation when active choices remain constant. try: block_n_ch = data['nch'][period[0]:period[1]] except KeyError: block_n_ch = np.full(ground_truth.shape, len(choices)) block_tr_hist = data['curr_block'][period[0]:period[1]] # Get transition blocks # blck_tr_hist_id = np.unique(block_tr_hist) # Get number of choices blocks # blck_n_ch_id = np.unique(block_n_ch) # Percentile selection of highest stimuli. end_ind_nch = len(choices) + 1 evidence = np.abs( np.max(stim[:, 1:end_ind_nch], axis=1) - np.mean(stim[:, 1:end_ind_nch], axis=1)) evidence = np.append(evidence[1:], max(evidence)) # shift back evidence percetile_10 = np.percentile(evidence, 10) # Select between after error or after correct for bias discrimation. titles = ['after error', 'after correct'] title = titles[after_correct] extra_condition = and_(performance == after_correct, evidence <= percetile_10) # Matrix to fill for all causistry. trans_mat, counts_mat = hf.compute_transition_probs_mat( choices_agent, choices, block_n_ch, block_tr_hist, extra_condition=extra_condition) if plot_figs: f, ax = get_transition_probs_figure(trans_mat, counts_mat, col_bar='general', title=title + ' choice', end_title=', period= ' + str(period), show_values=True, displacement=displacement) if sv_fig: f.savefig(folder + '/choice_transition_matrix_' + title + '.png') return trans_mat else: print('No data in: ', folder) return None
def _nbF(self, age, alt, inv, quifoy): enfant_a_charge = and_(quifoy >= 2, or_(age < 18, inv), not_(alt)) return self.sum_by_entity(enfant_a_charge.astype(int16))
def _nbG(self, alt, inv, quifoy): enfant_a_charge_invalide = and_(quifoy >= 2, inv, not_(alt)) return self.sum_by_entity(enfant_a_charge_invalide.astype(int16))
def test(year=2006, variables=['af']): simulation = SurveySimulation() survey_filename = os.path.join(model.DATA_DIR, 'sources', 'test.h5') simulation.set_config(year=year, survey_filename=survey_filename) simulation.set_param() simulation.compute() # of_aggregates = Aggregates() # of_aggregates.set_simulation(simulation) # of_aggregates.compute() # print of_aggregates.aggr_frame # # from openfisca_france.data.erf.aggregates import build_erf_aggregates # temp = (build_erf_aggregates(variables=variables, year= year)) # print temp # return variable = "af" debugger = Debugger() debugger.set_simulation(simulation) debugger.set_variable(variable) debugger.show_aggregates() def get_all_ancestors(varlist): if len(varlist) == 0: return [] else: if varlist[0]._parents == set(): return ([varlist[0]] + get_all_ancestors(varlist[1:])) else: return ([varlist[0]] + get_all_ancestors(list(varlist[0]._parents)) + get_all_ancestors(varlist[1:])) # We want to get all ancestors + children + the options that we're going to encounter parents = map(lambda x: simulation.output_table.column_by_name.get(x), variables) parents = get_all_ancestors(parents) options = [] for varcol in parents: options.extend(varcol._option.keys()) options = list(set(options)) #print options parents = map(lambda x: x.name, parents) for var in variables: children = set() varcol = simulation.output_table.column_by_name.get(var) children = children.union(set(map(lambda x: x.name, varcol._children))) variables = list(set(parents + list(children))) #print variables del parents, children gc.collect() def get_var(variable): variables = [variable] return simulation.aggregated_by_entity(entity="men", variables=variables, all_output_vars=False, force_sum=True)[0] simu_aggr_tables = get_var(variables[0]) for var in variables[1:]: simu_aggr_tables = simu_aggr_tables.merge(get_var(var)[['idmen', var]], on='idmen', how='outer') # We load the data from erf table in case we have to pick data there erf_data = DataCollection(year=year) os.system('cls') todo = set(variables + ["ident", "wprm"]).union(set(options)) print 'Variables or equivalents to fetch :' print todo ''' Méthode générale pour aller chercher les variables de l'erf/eec ( qui n'ont pas forcément le même nom et parfois sont les variables utilisées pour créér l'of ): 1 - essayer le get_of2erf, ça doit marcher pour les variables principales ( au moins les aggrégats que l'on compare ) Si les variables ne sont pas directement dans la table, elles ont été calculées à partir d'autres variables de données erf/eec donc chercher dans : 2 - build_survey 3 - model/model.py qui dira éventuellement dans quel module de model/ chercher Le 'print todo' vous indique quelles variables chercher ( attention à ne pas inclure les enfants directs ) L'utilisation du Ctrl-H est profitable ! ''' fetch_eec = [ 'statut', 'titc', 'chpub', 'encadr', 'prosa', 'age', 'naim', 'naia', 'noindiv' ] fetch_erf = ['zsali', 'af', 'ident', 'wprm', 'noi', 'noindiv', 'quelfic'] erf_df = erf_data.get_of_values(variables=fetch_erf, table="erf_indivi") eec_df = erf_data.get_of_values(variables=fetch_eec, table="eec_indivi") erf_eec_indivi = erf_df.merge(eec_df, on='noindiv', how='inner') assert 'quelfic' in erf_eec_indivi.columns, "quelfic not in erf_indivi columns" del eec_df, erf_df # We then get the aggregate variables for the menage ( mainly to compare with of ) print 'Loading data from erf_menage table' erf_menage = erf_data.get_of_values(variables=list(todo) + ['quelfic'], table="erf_menage") del todo gc.collect() assert 'ident' in erf_menage.columns, "ident not in erf_menage.columns" from openfisca_france.data.erf import get_erf2of erf2of = get_erf2of() erf_menage.rename(columns=erf2of, inplace=True) # We get the options from the simulation non aggregated tables: # First from the output_table # We recreate the noindiv in output_table simulation.output_table.table[ 'noindiv'] = 100 * simulation.output_table.table.idmen_ind + simulation.output_table.table.noi_ind simulation.output_table.table['noindiv'] = simulation.output_table.table[ 'noindiv'].astype(np.int64) s1 = [ var for var in set(options).intersection( set(simulation.output_table.table.columns)) ] + ['idmen_ind', 'quimen_ind', 'noindiv'] simu_nonaggr_tables = (simulation.output_table.table)[s1] simu_nonaggr_tables.rename(columns={ 'idmen_ind': 'idmen', 'quimen_ind': 'quimen' }, inplace=True) assert 'noindiv' in simu_nonaggr_tables.columns # If not found, we dwelve into the input_table if (set(s1) - set(['idmen_ind', 'quimen_ind', 'noindiv'])) < set(options): assert 'noindiv' in simulation.input_table.table.columns, "'noindiv' not in simulation.input_table.table.columns" s2 = [ var for var in (set(options).intersection( set(simulation.input_table.table.columns)) - set(s1)) ] + ['noindiv'] #print s2 temp = simulation.input_table.table[s2] simu_nonaggr_tables = simu_nonaggr_tables.merge(temp, on='noindiv', how='inner', sort=False) del s2, temp del s1 gc.collect() simu_nonaggr_tables = simu_nonaggr_tables[list(set(options)) + ['idmen', 'quimen', 'noindiv']] #print options, variables assert 'idmen' in simu_nonaggr_tables.columns, 'Idmen not in simu_nonaggr_tables columns' # Check the idmens that are not common erf_menage.rename(columns={'ident': 'idmen'}, inplace=True) print "\n" print 'Checking if idmen is here...' print '\n ERF : ' print 'idmen' in erf_menage.columns print "\n Simulation output" print 'idmen' in simu_aggr_tables.columns print "\n" #print 'Dropping duplicates of idmen for both tables...' assert not erf_menage["idmen"].duplicated().any( ), "Duplicated idmen in erf_menage" #erf_menage.drop_duplicates('idmen', inplace = True) simu_aggr_tables.drop_duplicates('idmen', inplace=True) assert not simu_aggr_tables["idmen"].duplicated().any( ), "Duplicated idmen in of" print 'Checking mismatching idmen... ' s1 = set(erf_menage['idmen']) - (set(simu_aggr_tables['idmen'])) if s1: print "idmen that aren't in simu_aggr_tables : %s" % str(len(s1)) pass s2 = (set(simu_aggr_tables['idmen'])) - set(erf_menage['idmen']) if s2: print "idmen that aren't in erf_menage : %s" % str(len(s2)) pass del s1, s2 # Restrict to common idmens and merge s3 = set(erf_menage['idmen']).intersection(set(simu_aggr_tables['idmen'])) print "Restricting to %s common idmen... \n" % str(len(s3)) erf_menage = erf_menage[erf_menage['idmen'].isin(s3)] simu_aggr_tables = simu_aggr_tables[simu_aggr_tables['idmen'].isin(s3)] del s3 gc.collect() #print erf_menage.columns #print simu_aggr_tables.columns # Compare differences across of and erf dataframes print "Comparing differences between dataframes... \n" colcom = (set(erf_menage.columns).intersection( set(simu_aggr_tables.columns))) - set(['idmen', 'wprm']) print 'Common variables: ' print colcom erf_menage.reset_index(inplace=True) simu_aggr_tables.reset_index(inplace=True) for col in colcom: temp = set( erf_menage['idmen'][erf_menage[col] != simu_aggr_tables[col]]) print "Numbers of idmen that aren't equal on variable %s : %s \n" % ( col, str(len(temp))) del temp # Detect the biggest differences bigtable = merge(erf_menage, simu_aggr_tables, on='idmen', how='inner', suffixes=('_erf', '_of')) print 'Length of new dataframe is %s' % str(len(bigtable)) #print bigtable.columns bigtable.set_index('idmen', drop=False, inplace=True) already_met = [] options_met = [] for col in colcom: bigtemp = None table = bigtable[and_(bigtable[col + '_erf'] != 0, bigtable[col + '_of'] != 0)] table[col] = (table[col + '_erf'] - table[col + '_of'] ) / table[col + '_erf'] #Difference relative table[col] = table[col].apply(lambda x: abs(x)) print 'Minimum difference between the two tables for %s is %s' % ( col, str(table[col].min())) print 'Maximum difference between the two tables for %s is %s' % ( col, str(table[col].max())) print table[col].describe() try: assert len(table[col]) == len(table['wprm_of']), "PINAGS" dec, values = mwp(table[col], np.arange(1, 11), table['wprm_of'], 2, return_quantiles=True) #print sorted(values) dec, values = mwp(table[col], np.arange(1, 101), table['wprm_erf'], 2, return_quantiles=True) #print sorted(values)[90:] del dec, values gc.collect() except: #print 'Weighted percentile method didnt work for %s' %col pass print "\n" # Show the relevant information for the most deviant households table.sort(columns=col, ascending=False, inplace=True) #print table[col][0:10].to_string() if bigtemp is None: bigtemp = { 'table': table[[col, col + '_of', col + '_erf', 'idmen']][0:10], 'options': None } bigtemp['table'][col + 'div'] = bigtemp['table'][ col + '_of'] / bigtemp['table'][col + '_erf'] print bigtemp['table'].to_string() ''' bigtemp is the table which will get filled little by little by the relevant variables. Up to the last rows of code 'table' refers to a table of aggregated values, while 'options is a table of individual variables. The reason we call it in a dictionnary is also because we modify it inside the recursive function 'iter_on parents', and it causes an error in Python unless for certain types like dictionnary values. ''' #print "\n" # If variable is a Prestation, we show the dependancies varcol = simulation.output_table.column_by_name.get(col) if isinstance(varcol, Prestation): ''' For the direct children ''' if not varcol._children is None: ch_to_fetch = list(varcol._children) ch_to_fetch = map(lambda x: x.name, ch_to_fetch) ch_fetched = [] if set(ch_to_fetch) <= set(simu_aggr_tables.columns): print "Variables which need %s to be computed :\n %s \n" % ( col, str(ch_to_fetch)) for var in ch_to_fetch: if var + '_of' in table.columns: ch_fetched.append(var + '_of') else: ch_fetched.append(var) elif set(ch_to_fetch) <= set(simu_aggr_tables.columns).union( erf_menage.columns): print "Variables which need %s to be computed (some missing picked in erf):\n %s \n" % ( col, str(ch_to_fetch)) for var in ch_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: ch_fetched.append(var + '_of') elif var + '_erf' in table.columns: ch_fetched.append(var + '_erf') else: ch_fetched.append(var) else: print "Variables which need %s to be computed (some missing):\n %s \n" % ( col, str(ch_to_fetch)) for var in ch_to_fetch: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: ch_fetched.append(var + '_of') elif var in erf_menage.columns: if var + '_erf' in table.columns: ch_fetched.append(var + '_erf') print table[[col] + ch_fetched][0:10] print "\n" del ch_to_fetch, ch_fetched ''' For the parents ''' def iter_on_parents(varcol): if (varcol._parents == set() and varcol._option == {}) or varcol.name in already_met: return else: par_to_fetch = list(varcol._parents) par_to_fetch = map(lambda x: x.name, par_to_fetch) par_fetched = [] if set(par_fetched) <= set(simu_aggr_tables.columns): #print "Variables the prestation %s depends of :\n %s \n" %(varcol.name, str(par_fetched)) for var in par_fetched: if var + '_of' in table.columns: par_fetched.append(var + '_of') else: par_fetched.append(var) elif set(par_fetched) <= set( simu_aggr_tables.columns).union( erf_menage.columns): #print "Variables the prestation %s depends of (some missing picked in erf):\n %s \n" %(varcol.name,str(par_fetched)) for var in par_fetched: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: par_fetched.append(var + '_of') elif var + '_erf' in table.columns: par_fetched.append(var + '_erf') else: par_fetched.append(var) else: for var in par_fetched: if var in simu_aggr_tables.columns: if var + '_of' in table.columns: par_fetched.append(var + '_of') elif var in erf_menage.columns: if var + '_erf' in table.columns: par_fetched.append(var + '_erf') if len(par_fetched) > 0: #print "Variables the prestation %s depends of (some missing):\n %s \n" %(varcol.name, str(par_fetched)) pass else: #print "Variables the prestation %s depends of couldn't be found :\n %s \n" %(varcol.name, str(par_fetched)) pass if len(par_fetched) > 0: temp = table[[col, 'idmen'] + par_fetched][0:10] bigtemp['table'] = pd.merge(temp, bigtemp['table'], how='inner') #print temp.to_string(), "\n" if varcol._option != {} and not set( varcol._option.keys()) < set(options_met): vars_to_fetch = list( set(varcol._option.keys()) - set(options_met)) #print "and the options to current variable %s for the id's with strongest difference :\n %s \n" %(varcol.name, varcol._option.keys()) liste = [i for i in range(0, 10)] liste = map(lambda x: table['idmen'].iloc[x], liste) temp = simu_nonaggr_tables[ ['idmen', 'quimen', 'noindiv'] + vars_to_fetch][simu_nonaggr_tables['idmen'].isin( table['idmen'][0:10])] temp_sorted = temp[temp['idmen'] == liste[0]] for i in xrange(1, 10): temp_sorted = temp_sorted.append( temp[temp['idmen'] == liste[i]]) if bigtemp['options'] is None: bigtemp['options'] = temp_sorted bigtemp['options'] = bigtemp['options'].merge( erf_eec_indivi, on='noindiv', how='outer') else: bigtemp['options'] = bigtemp['options'].merge( temp_sorted, on=['noindiv', 'idmen', 'quimen'], how='outer') # temp_sorted.set_index(['idmen', 'quimen'], drop = True, inplace = True) # If we do that del temp, temp_sorted gc.collect() already_met.append(varcol.name) options_met.extend(varcol._option.keys()) for var in varcol._parents: iter_on_parents(var) iter_on_parents(varcol) # We merge the aggregate table with the option table ( for each individual in entity ) bigtemp['table'] = bigtemp['table'].merge(bigtemp['options'], how='left', on='idmen', suffixes=('(agg)', '(ind)')) # Reshaping the table to group by descending error on col, common entities bigtemp['table'].sort(columns=['af', 'quimen'], ascending=[False, True], inplace=True) bigtemp['table'] = bigtemp['table'].groupby(['idmen', 'quimen'], sort=False).sum() print "Table of values for %s dependencies : \n" % col print bigtemp['table'].to_string() del bigtemp['table'], bigtemp['options'] gc.collect()
def get_major_differences(self): self.build_columns_to_fetch() self.build_erf_data_frames() self.build_openfisca_data_frames() variable = self.variable erf_menages_data_frame = self.erf_menages_data_frame of_menages_data_frame = self.of_menages_data_frame merged_menage_data_frame = merge( erf_menages_data_frame[[variable, 'idmen']], of_menages_data_frame[[variable, 'idmen']], on = 'idmen', how = 'inner', suffixes = ('_erf', '_of') ) log.info('Length of merged_menage_data_frameis {}'.format(len(merged_menage_data_frame))) merged_menage_data_frame.set_index('idmen', drop = False, inplace = True) table = merged_menage_data_frame[ and_( merged_menage_data_frame[variable + '_erf'] != 0, merged_menage_data_frame[variable + '_of'] != 0 ) ] table[variable + "_rel_diff"] = (table[variable + '_of'] - table[variable + '_erf']) \ / table[variable + '_erf'] # Difference relative log.info( "Minimum difference between the two tables for {} is {}".format( variable, str(table[variable + "_rel_diff"].min()) ) ) log.info( "Maximum difference between the two tables for {} is {}".format( variable, str(table[variable + "_rel_diff"].max()) ) ) table[variable + '_ratio'] = ( table[variable + '_of'] / table[variable + '_erf'] ) log.info(table[variable + "_rel_diff"].describe()) try: assert len(table[variable + "_rel_diff"]) == len(table['wprm_of']), "PINAGS" dec, values = mwp( table[variable + "_rel_diff"], np.arange(1, 11), table['wprm_of'], 2, return_quantiles = True ) log.info(sorted(values)) dec, values = mwp( table[variable + "_rel_diff"], np.arange(1, 101), table['wprm_erf'], 2, return_quantiles = True ) log.info(sorted(values)[90:]) del dec, values except: log.info('Weighted percentile method did not work for {}'.format(variable + "_rel_diff")) pass table.sort(columns = variable + "_rel_diff", ascending = False, inplace = True) print table.to_string() return table
def _ass_elig_i(chomeur, ass_precondition_remplie): ''' Éligibilité individuelle à l'ASS ''' return and_(chomeur, ass_precondition_remplie)
def plot_glm_weights(weights_ac, weights_ae, tags_mat, step, per, num_tr_back=3, axs=None, linewidth=0.5, nch=None, **kwargs): ''' Plotting function for GLM weights. For each list of tags in tags_mat, a figure is created. For eah tag in each list, a subplot is generated. plot_opts = {'legend': False, 'lw': .5, 'label': '', 'alpha': 1, 'N': 0, 'compared_averages': False, 'num_tr_tm': None} ''' plot_opts = { 'legend': False, 'lw': .5, 'label': '', 'alpha': 1, 'N': 0, 'compared_averages': False, 'num_tr_tm': num_tr_back } # TODO: num_tr_back is passed twice (?) plot_opts.update(kwargs) weights = [weights_ac, weights_ae] regressors = [afterc_cols, aftere_cols] l_styl = ['-', '--'] titles = ['a_c', 'a_e'] figs = [] for ind_tag, tags in enumerate(tags_mat): if axs is None: f, ax = plt.subplots(nrows=len(tags) // 2, ncols=2, sharex=True, sharey=True) figs.append(f) ax = ax.flatten() else: ax = axs[ind_tag] for ind_cond, cond in enumerate(zip(weights, regressors, l_styl)): l_st = cond[2] # different line style for a_c and a_e weights_tmp = np.array(cond[0]).squeeze() # weights rgr_tmp = np.array(cond[1]) # labels for data contained in weights for ind_t, tag in enumerate(tags): t_contr = and_(tag != 'evidence', tag != 'intercept') num_tr_tm = num_tr_back + 1 if t_contr else 2 if plot_opts['num_tr_tm']: num_tr_tm = plot_opts['num_tr_tm'] for ind_tr in range(1, num_tr_tm + 1): t_tmp = tag + str(ind_tr) if t_contr else tag if t_tmp in rgr_tmp: # Plot tunning # label given (for tunning legend) only when: lbl = (plot_opts['label'] + 'trial lag ' + t_tmp[-1] + ' ' + titles[ind_cond]) if ind_t == 0 and \ plot_opts['legend'] else '' color = COLORES[ind_tr - 1] # for the case of only averages compared for nchs. if plot_opts['compared_averages']: color = COLORS[int(plot_opts['N'])] lbl = 'N=' + str(plot_opts['N']) \ + ', lag= ' + str(ind_tr) + ' , ' \ + titles[ind_cond] lbl = lbl if ind_t == 0 else '' alpha = 1 - (1 / (num_tr_tm + 1)) * (ind_tr - 1) plot_opts.update({'alpha': alpha}) ax[ind_t].plot(np.arange(weights_tmp.shape[0]) * step + per / 2, weights_tmp[:, rgr_tmp == t_tmp], color=color, linestyle=l_st, label=lbl, linewidth=plot_opts['lw'], alpha=plot_opts['alpha']) if lbl != '': ax[ind_t].legend() ax[ind_t].set_title(tag) # ax[0].legend() return figs
def mark_weighted_percentiles(a, labels, weights, method, return_quantiles=False): """ Args: a: labels: weights: method: return_quantiles: (Default value = False) Returns: """ # from http://pastebin.com/KTLip9ee # a is an input array of values. # weights is an input array of weights, so weights[i] goes with a[i] # labels are the names you want to give to the xtiles # method refers to which weighted algorithm. # 1 for wikipedia, 2 for the stackexchange post. # The code outputs an array the same shape as 'a', but with # labels[i] inserted into spot j if a[j] falls in x-tile i. # The number of xtiles requested is inferred from the length of 'labels'. # First method, "vanilla" weights from Wikipedia article. if method == 1: # Sort the values and apply the same sort to the weights. N = len(a) sort_indx = argsort(a) tmp_a = a[sort_indx].copy() tmp_weights = weights[sort_indx].copy() # 'labels' stores the name of the x-tiles the user wants, # and it is assumed to be linearly spaced between 0 and 1 # so 5 labels implies quintiles, for example. num_categories = len(labels) breaks = linspace(0, 1, num_categories + 1) # Compute the percentile values at each explicit data point in a. cu_weights = cumsum(tmp_weights) p_vals = (1.0 / cu_weights[-1]) * (cu_weights - 0.5 * tmp_weights) # Set up the output array. ret = repeat(0, len(a)) if len(a) < num_categories: return ret # Set up the array for the values at the breakpoints. quantiles = [] # Find the two indices that bracket the breakpoint percentiles. # then do interpolation on the two a_vals for those indices, using # interp-weights that involve the cumulative sum of weights. for brk in breaks: if brk <= p_vals[0]: i_low = 0 i_high = 0 elif brk >= p_vals[-1]: i_low = N - 1 i_high = N - 1 else: for ii in range(N - 1): if (p_vals[ii] <= brk) and (brk < p_vals[ii + 1]): i_low = ii i_high = ii + 1 if i_low == i_high: v = tmp_a[i_low] else: # If there are two brackets, then apply the formula as per Wikipedia. v = (tmp_a[i_low] + ((brk - p_vals[i_low]) / (p_vals[i_high] - p_vals[i_low])) * (tmp_a[i_high] - tmp_a[i_low])) # Append the result. quantiles.append(v) # Now that the weighted breakpoints are set, just categorize # the elements of a with logical indexing. for i in range(0, len(quantiles) - 1): lower = quantiles[i] upper = quantiles[i + 1] ret[and_(a >= lower, a < upper)] = labels[i] # make sure upper and lower indices are marked ret[a <= quantiles[0]] = labels[0] ret[a >= quantiles[-1]] = labels[-1] return ret # The stats.stackexchange suggestion. elif method == 2: N = len(a) sort_indx = argsort(a) tmp_a = a[sort_indx].copy() tmp_weights = weights[sort_indx].copy() num_categories = len(labels) breaks = linspace(0, 1, num_categories + 1) cu_weights = cumsum(tmp_weights) # Formula from stats.stackexchange.com post. s_vals = [0.0] for ii in range(1, N): s_vals.append(ii * tmp_weights[ii] + (N - 1) * cu_weights[ii - 1]) s_vals = asarray(s_vals) # Normalized s_vals for comapring with the breakpoint. norm_s_vals = (1.0 / s_vals[-1]) * s_vals # Set up the output variable. ret = repeat(0, N) if N < num_categories: return ret # Set up space for the values at the breakpoints. quantiles = [] # Find the two indices that bracket the breakpoint percentiles. # then do interpolation on the two a_vals for those indices, using # interp-weights that involve the cumulative sum of weights. for brk in breaks: if brk <= norm_s_vals[0]: i_low = 0 i_high = 0 elif brk >= norm_s_vals[-1]: i_low = N - 1 i_high = N - 1 else: for ii in range(N - 1): if (norm_s_vals[ii] <= brk) and (brk < norm_s_vals[ii + 1]): i_low = ii i_high = ii + 1 if i_low == i_high: v = tmp_a[i_low] else: # Interpolate as in the method 1 method, but using the s_vals instead. v = (tmp_a[i_low] + (((brk * s_vals[-1]) - s_vals[i_low]) / (s_vals[i_high] - s_vals[i_low])) * (tmp_a[i_high] - tmp_a[i_low])) quantiles.append(v) # Now that the weighted breakpoints are set, just categorize # the elements of a as usual. for i in range(0, len(quantiles) - 1): lower = quantiles[i] upper = quantiles[i + 1] ret[and_(a >= lower, a < upper)] = labels[i] # make sure upper and lower indices are marked ret[a <= quantiles[0]] = labels[0] ret[a >= quantiles[-1]] = labels[-1] if return_quantiles: return ret, quantiles else: return ret
def _nbJ(self, age, inv, quifoy): majeur_celibataire_sans_enfant = and_(quifoy >= 2, age >= 18, not_(inv)) return self.sum_by_entity(majeur_celibataire_sans_enfant.astype(int16))
def _nbI(self, alt, inv, quifoy): enfant_a_charge_garde_alternee_invalide = and_(quifoy >= 2, inv, alt) return self.sum_by_entity(enfant_a_charge_garde_alternee_invalide.astype(int16))