Beispiel #1
0
    "P6920": "pension, contributing, pre",
    "P6920S1": "pension, contribution amount",
    "P6940": "pension, contributors, pre",
    "P6990": "seguro de riesgos laborales, pre",
    "P7500S2A1": "pension, receipts"
    # new name, old income variable
}

ppl = pd.read_csv("data/enph-2017/recip-" + str(cl.subsample) +
                  "/Caracteristicas_generales_personas.csv",
                  usecols=list(colDict.keys())).rename(columns=colDict)

for corr in c.corrections:
    ppl = corr.correct(ppl)

ppl = c.all_columns_to_numbers(ppl)

ppl["pension, contributing (if not pensioned)"] = (
    ppl["pension, contributing, pre"].apply(lambda x: 1 if x == 1 else
                                            (0 if x == 2 else np.nan)))

ppl["pension, receiving"] = ((ppl["pension, contributing, pre"] == 3)
                             | (ppl["pension, receipts"] > 0)).astype('int')

ppl["pension, contributor(s) (if not pensioned) = split"] = (
    ppl["pension, contributors, pre"].apply(
        lambda x: 1 if x == 1 else (0 if (x > 0) & (x < 4) else np.nan)))

ppl["pension, contributor(s) (if not pensioned) = self"] = (
    ppl["pension, contributors, pre"].apply(
        lambda x: 1 if x == 2 else (0 if (x > 0) & (x < 4) else np.nan)))
dfc[["min vat frac", "min vat frac_x", "min vat frac_y"]]
dfc[["min vat frac", "max vat frac_x", "max vat frac_y"]]
dfc[["min vat", "min vat_x", "min vat_y"]]
dfc[["min vat", "max vat_x", "max vat_y"]]

util.describeWithMissing(
    data.people[data.people["age"] >= 18][data.cols_benefit_in_kind +
                                          data.cols_benefit_cash])

raw = pd.read_csv(
    "data/enph-2017/recip-100/Caracteristicas_generales_personas.csv")
raw = c.all_columns_to_numbers(
    raw.rename(
        columns={
            "P6040": "age",
            "P1668S1A4": "familias en accion",
            "P1668S3A4": "familias en su tierra",
            "P1668S4A4": "jovenes en accion",
            "P1668S2A4": "programa de adultos mayores",
            "P1668S5A4": "transferencias por victimizacion"
        }))

util.describeWithMissing(raw[raw["age"] >= 18][[
    "familias en accion", "familias en su tierra", "jovenes en accion",
    "programa de adultos mayores", "transferencias por victimizacion"
]])

util.describeWithMissing(data.people[data.people["age"] >= 18][[
    "income, year : benefit : familias en accion, in-kind",
    "income, year : benefit : familias en su tierra, in-kind",
    "income, year : benefit : jovenes en accion, in-kind",
    "income, year : benefit : programa de adultos mayores, in-kind",
Beispiel #3
0
# From the raw ENPH person-level data,
# creates a data set that's a little friendlier.

if True:
    import python.build.output_io as oio
    import python.build.people.files as files
    import python.common.common as cl
    import python.common.misc as c

ppl = c.all_columns_to_numbers(
    cl.collect_files(files.files, subsample=cl.subsample),
    skip_columns=["non-beca sources"
                  ]  # PITFALL : a space-separated list of ints
)

oio.saveStage(cl.subsample, ppl, 'people_0')
Beispiel #4
0
            [
                Correction.Change_Column_Type(colname, str),
                Correction.Replace_In_Column(
                    colname,
                    {
                        ' ': np.nan
                        # 'nan's are created from the cast to type str
                        ,
                        "nan": np.nan
                    })
            ] for colname in
            ["where-got", "coicop", "per month", "how-got", "value"]
        ]))):
    purchases = c.correct(purchases)

purchases = com.all_columns_to_numbers(purchases)
purchases = defs.drop_if_coicop_or_value_invalid(purchases)
purchases = defs.drop_absurdly_big_expenditures(purchases)
purchases = (Correction.Drop_Row_If_Column_Satisfies_Predicate(
    "value", lambda v: v <= 0).correct(purchases))
purchases = (
    Correction.  # no "never" frequencies
    Drop_Row_If_Column_Satisfies_Predicate(
        "per month", lambda x: x == 11).correct(purchases))
purchases = (
    Correction.  # no non-positive quantities
    Drop_Row_If_Column_Satisfies_Predicate(
        "quantity", lambda x: x <= 0).correct(purchases))

# These only make sense once the relevant columns are numbers.
for c in (  # how-got=1 -> is-purchase=1, nan -> nan, otherwise -> 0