DROP = groups + ["$p$_{{{}}}".format(group) for group in groups] few = T.shape[0] / 15. Table = Table[Table['$N$'] > few] return Table.drop(DROP, 1) def topcode(var, Nstd=3, drop=False): if drop: var[var > var.mean() + Nstd * var.std()] = np.nan else: var[var > var.mean() + Nstd * var.std()] = var.mean() + Nstd * var.std() return var if True: #~ Make DataFrame D = full_data(DIR=DATADIR) D['livestock_val_m'] = D.filter( regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)_m').sum( axis=1) D['livestock_val'] = D.filter( regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)').sum( axis=1) - D['livestock_val_m'] A = asset_vars(D, year=2013)[0] D['Asset Tot'] = A['Total'] D["Cash Savings"] = D.filter(regex="^savings_.*_b$").sum(axis=1) C = consumption_data(D)[0].ix[2013] food = [ 'c_cereals', 'c_maize', 'c_sorghum', 'c_millet', 'c_potato', 'c_sweetpotato', 'c_rice', 'c_bread', 'c_beans', 'c_oil', 'c_salt',
import statsmodels.api as sm from matplotlib import pyplot as plt import sys sys.path.append("../../data") from TUP import full_data, regressions, reg_table, df_to_orgtbl """ Note that topcoding has a large effect on the distribution here, and we see only a small (presumably non-random) portion of actual income for each household. """ # Top-Code or censor outliers? def topcode(var, Nstd=3, drop=False): if drop: var[var>var.mean()+Nstd*var.std()] = np.nan else: var[var>var.mean()+Nstd*var.std()] = var.mean()+Nstd*var.std() return var D = full_data(balance=[]) keep = D.index I_file = '../../data/Endline/sections_8_17.csv' I = pd.read_csv(I_file).rename(columns={"id":"HH"}).set_index("HH", drop=True).ix[keep] #~Getting non-agriculture income data is easy I = I.filter(regex="^s16") Imonths = I.filter(regex="s16_\dc").rename(columns=lambda x: x[:-1]) Ipermonth = I.filter(regex="s16_\dd").rename(columns=lambda x: x[:-1]) Income_12m = Imonths.mul(Ipermonth).sum(axis=1) Iyear = I.filter(regex="s16_\de").rename(columns=lambda x: x[:-1]).sum(axis=1) A_file = "../../data/Endline/Agriculture_cleaned.csv" A = pd.read_csv(A_file).rename(columns={"id":"HH"}).set_index("HH",drop=False).ix[keep] unit_prices = A.groupby(["harvest_type", "harvest_price_unit"])["harvest_price"].median()
pval = round(pval,3) Table.ix[var,'$p$_{{{}}}'.format(group)]+= pval nstar=sum(pval<threshold for threshold in p_stars) if nstar: Table.ix[var,'$\Delta${}'.format(group)]+="^{{{}}}".format("*"*nstar) DROP=groups+["$p$_{{{}}}".format(group) for group in groups] few=T.shape[0]/15. Table = Table[Table['$N$']>few] return Table.drop(DROP,1) def topcode(var, Nstd=3, drop=False): if drop: var[var>var.mean()+Nstd*var.std()] = np.nan else: var[var>var.mean()+Nstd*var.std()] = var.mean()+Nstd*var.std() return var if True: #~ Make DataFrame D = full_data(DIR=DATADIR) D['livestock_val_m'] = D.filter(regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)_m').sum(axis=1) D['livestock_val'] = D.filter(regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)').sum(axis=1) - D['livestock_val_m'] A = asset_vars(D,year=2013)[0] D['Asset Tot'] = A['Total'] D["Cash Savings"] = D.filter(regex="^savings_.*_b$").sum(axis=1) C = consumption_data(D)[0].ix[2013] food = ['c_cereals', 'c_maize', 'c_sorghum', 'c_millet', 'c_potato', 'c_sweetpotato', 'c_rice', 'c_bread', 'c_beans', 'c_oil', 'c_salt', 'c_sugar', 'c_meat', 'c_livestock', 'c_poultry', 'c_fish', 'c_egg', 'c_nuts', 'c_milk', 'c_vegetables', 'c_fruit', 'c_tea', 'c_spices', 'c_alcohol', 'c_otherfood'] month = ['c_fuel', 'c_medicine', 'c_airtime', 'c_cosmetics', 'c_soap', 'c_transport', 'c_entertainment', 'c_childcare', 'c_tobacco', 'c_batteries', 'c_church', 'c_othermonth'] year = ['c_clothesfootwear', 'c_womensclothes', 'c_childrensclothes', 'c_shoes', 'c_homeimprovement', 'c_utensils', 'c_furniture', 'c_textiles', 'c_ceremonies', 'c_funerals', 'c_charities', 'c_dowry', 'c_other'] C["Food"] = C[[item for item in food if item in C]].sum(axis=1).replace(0,np.nan) C["Month"] = C[[item for item in month if item in C]].sum(axis=1).replace(0,np.nan) C["Year"] = C[[item for item in year if item in C]].sum(axis=1).replace(0,np.nan) C["Tot"] = C[["Food","Month","Year"]].sum(axis=1)
Table['Diff.'] = map(str,Table['TUP']-Table['CTL']) Table['$p$-val'] = 0 Table['$N$']=(T>0).sum() Table.drop('TUP', inplace=True) for var in T: if var in ('$N$','group','TUP'): continue pval = ttest_ind(treat[var].dropna(), control[var].dropna())[1] pval = round(pval,3) Table.ix[var,'$p$-val']+= pval for threshold in (.1, .05, .01): if pval < threshold: Table.ix[var,'Diff.']+="*" return Table if True: #~ Make DataFrame D = full_data(balance=[]) D['livestock_val_m'] = D.filter(regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)_m').sum(axis=1) D['livestock_val'] = D.filter(regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)').sum(axis=1) - D['livestock_val_m'] A = asset_vars(D,year=2013)[0] D['Asset Tot'] = A['Total'] D['Asset Prod'] = A['Productive'] D["Cash Savings"] = D.filter(regex="^savings_.*_b$").sum(axis=1) D["Land Access (fedan)"] = D.filter(regex="^land_.*_b$").sum(axis=1) C = consumption_data(D)[0].ix[2013] food = ['c_cereals', 'c_maize', 'c_sorghum', 'c_millet', 'c_potato', 'c_sweetpotato', 'c_rice', 'c_bread', 'c_beans', 'c_oil', 'c_salt', 'c_sugar', 'c_meat', 'c_livestock', 'c_poultry', 'c_fish', 'c_egg', 'c_nuts', 'c_milk', 'c_vegetables', 'c_fruit', 'c_tea', 'c_spices', 'c_alcohol', 'c_otherfood'] month = ['c_fuel', 'c_medicine', 'c_airtime', 'c_cosmetics', 'c_soap', 'c_transport', 'c_entertainment', 'c_childcare', 'c_tobacco', 'c_batteries', 'c_church', 'c_othermonth'] year = ['c_clothesfootwear', 'c_womensclothes', 'c_childrensclothes', 'c_shoes', 'c_homeimprovement', 'c_utensils', 'c_furniture', 'c_textiles', 'c_ceremonies', 'c_funerals', 'c_charities', 'c_dowry', 'c_other'] C["Food"] = C[[item for item in food if item in C]].sum(axis=1).replace(0,np.nan) C["Month"] = C[[item for item in month if item in C]].sum(axis=1).replace(0,np.nan) C["Year"] = C[[item for item in year if item in C]].sum(axis=1).replace(0,np.nan)