def topcode(var, Nstd=3, drop=False): if drop: var[var>var.mean()+Nstd*var.std()] = np.nan else: var[var>var.mean()+Nstd*var.std()] = var.mean()+Nstd*var.std() return var if True: #~ Make DataFrame D = full_data(DIR=DATADIR) D['livestock_val_m'] = D.filter(regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)_m').sum(axis=1) D['livestock_val'] = D.filter(regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)').sum(axis=1) - D['livestock_val_m'] A = asset_vars(D,year=2013)[0] D['Asset Tot'] = A['Total'] D["Cash Savings"] = D.filter(regex="^savings_.*_b$").sum(axis=1) C = consumption_data(D)[0].ix[2013] food = ['c_cereals', 'c_maize', 'c_sorghum', 'c_millet', 'c_potato', 'c_sweetpotato', 'c_rice', 'c_bread', 'c_beans', 'c_oil', 'c_salt', 'c_sugar', 'c_meat', 'c_livestock', 'c_poultry', 'c_fish', 'c_egg', 'c_nuts', 'c_milk', 'c_vegetables', 'c_fruit', 'c_tea', 'c_spices', 'c_alcohol', 'c_otherfood'] month = ['c_fuel', 'c_medicine', 'c_airtime', 'c_cosmetics', 'c_soap', 'c_transport', 'c_entertainment', 'c_childcare', 'c_tobacco', 'c_batteries', 'c_church', 'c_othermonth'] year = ['c_clothesfootwear', 'c_womensclothes', 'c_childrensclothes', 'c_shoes', 'c_homeimprovement', 'c_utensils', 'c_furniture', 'c_textiles', 'c_ceremonies', 'c_funerals', 'c_charities', 'c_dowry', 'c_other'] C["Food"] = C[[item for item in food if item in C]].sum(axis=1).replace(0,np.nan) C["Month"] = C[[item for item in month if item in C]].sum(axis=1).replace(0,np.nan) C["Year"] = C[[item for item in year if item in C]].sum(axis=1).replace(0,np.nan) C["Tot"] = C[["Food","Month","Year"]].sum(axis=1) D["Daily Exp"] = C["Tot"] D["Daily Food"] = C["Food"] drop_vars = ['c_milk', 'c_alcohol', 'c_spices', 'c_entertainment', 'c_otherfood', 'asset_val_house', 'asset_val_plough'] D.drop([item for item in D if any(var in item for var in drop_vars)], 1, inplace=True) consumption = dict([(c,c[2:-2].capitalize()) for c in D.filter(regex='^c_.*_b$').columns]) assets = dict([(a,a[10:].capitalize()) for a in D.filter(regex='^asset_val.*_b$').columns])
if True: #~ Make DataFrame D = full_data(DIR=DATADIR) D['livestock_val_m'] = D.filter( regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)_m').sum( axis=1) D['livestock_val'] = D.filter( regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)').sum( axis=1) - D['livestock_val_m'] A = asset_vars(D, year=2013)[0] D['Asset Tot'] = A['Total'] D["Cash Savings"] = D.filter(regex="^savings_.*_b$").sum(axis=1) C = consumption_data(D)[0].ix[2013] food = [ 'c_cereals', 'c_maize', 'c_sorghum', 'c_millet', 'c_potato', 'c_sweetpotato', 'c_rice', 'c_bread', 'c_beans', 'c_oil', 'c_salt', 'c_sugar', 'c_meat', 'c_livestock', 'c_poultry', 'c_fish', 'c_egg', 'c_nuts', 'c_milk', 'c_vegetables', 'c_fruit', 'c_tea', 'c_spices', 'c_alcohol', 'c_otherfood' ] month = [ 'c_fuel', 'c_medicine', 'c_airtime', 'c_cosmetics', 'c_soap', 'c_transport', 'c_entertainment', 'c_childcare', 'c_tobacco', 'c_batteries', 'c_church', 'c_othermonth' ] year = [ 'c_clothesfootwear', 'c_womensclothes', 'c_childrensclothes', 'c_shoes', 'c_homeimprovement', 'c_utensils', 'c_furniture',
food = [ 'cereals', 'maize', 'sorghum', 'millet', 'potato', 'sweetpotato', 'rice', 'bread', 'beans', 'oil', 'salt', 'sugar', 'meat', 'livestock', 'poultry', 'fish', 'egg', 'nuts', 'milk', 'vegetables', 'fruit', 'tea', 'spices', 'alcohol', 'otherfood' ] month = [ 'fuel', 'medicine', 'airtime', 'cosmetics', 'soap', 'transport', 'entertainment', 'childcare', 'tobacco', 'batteries', 'church', 'othermonth' ] ConsumptionItems = food + ['airtime', 'fuel'] mobile = True D = full_data(DIR=DATADIR) C, HH, T = consumption_data(D, WRITE=False, include2016=False) HH['log HHSIZE'] = HH["hh_size"].apply(np.log) HH = HH.drop("hh_size", 1) y, z = C.replace(0, np.nan).apply(np.log).sort_index( level=[0, 1, 2])[ConsumptionItems].copy(), HH.sort_index( level=[0, 1, 2]).copy() y.index.names, z.index.names = ['j', 't', 'mkt'], ['j', 't', 'mkt'] keep = pd.notnull(y.index.get_level_values("mkt")) y, z = y.loc[keep, :].align(z, join="left", axis=0) b, ce, d, sed = nd.estimate_reduced_form(y, z, return_se=True, VERBOSE=True) ce = ce.dropna(how='all') print("Getting Loglambdas") bphi, logL = nd.get_loglambdas(ce, TEST="warn") try: xrange logL.to_pickle(DATADIR + "ss-lambdas.df")
InSample = {1: 0, 2: np.nan, 3: 1} if True: #~ Make DataFrame D = full_data(DIR=DATADIR) D["In14"] = D["merge_midline"].apply(lambda x: InSample.get(x)) D["In15"] = D["merge_endline"].apply(lambda x: InSample.get(x)) A = asset_vars(D, year=2013)[0].apply(winsorize) D['Asset Tot'] = A['Total'] D['Asset Prod'] = A['Productive'] D["Cash Savings"] = D.filter(regex="^savings_.*_b$").sum(axis=1) D["Land Access (fedan)"] = D.filter(regex="^land_.*_b$").sum(axis=1) #~C = consumption_data(D)[0].ix[2014] C = consumption_data(D)[0].reorder_levels([1, 2, 0]).sort_index() food = [ 'cereals', 'maize', 'sorghum', 'millet', 'potato', 'sweetpotato', 'rice', 'bread', 'beans', 'oil', 'salt', 'sugar', 'meat', 'livestock', 'poultry', 'fish', 'egg', 'nuts', 'milk', 'vegetables', 'fruit', 'tea', 'spices', 'alcohol', 'otherfood' ] month = [ 'fuel', 'medicine', 'airtime', 'cosmetics', 'soap', 'transport', 'entertainment', 'childcare', 'tobacco', 'batteries', 'church', 'othermonth' ] year = [ 'clothesfootwear', 'womensclothes', 'childrensclothes', 'shoes', 'homeimprovement', 'utensils', 'furniture', 'textiles', 'ceremonies', 'funerals', 'charities', 'dowry', 'other'
import numpy as np import pandas as pd import cfe.estimation as nd import statsmodels.api as sm import sys DATADIR = "../../data/" sys.path.append("../../data") from TUP import full_data, consumption_data, regressions, reg_table, df_to_orgtbl, mobile_data food = ['cereals', 'maize', 'sorghum', 'millet', 'potato', 'sweetpotato', 'rice', 'bread', 'beans', 'oil', 'salt', 'sugar', 'meat', 'livestock', 'poultry', 'fish', 'egg', 'nuts', 'milk', 'vegetables', 'fruit', 'tea', 'spices', 'alcohol', 'otherfood'] month = ['fuel', 'medicine', 'airtime', 'cosmetics', 'soap', 'transport', 'entertainment', 'childcare', 'tobacco', 'batteries', 'church', 'othermonth'] ConsumptionItems = food+['airtime','fuel'] mobile=True D = full_data(DIR=DATADIR) C, HH, T = consumption_data(D,WRITE=False,include2016=False) HH['log HHSIZE'] = HH["hh_size"].apply(np.log) HH = HH.drop("hh_size",1) y,z = C.replace(0,np.nan).apply(np.log).sort_index(level=[0,1,2])[ConsumptionItems].copy(),HH.sort_index(level=[0,1,2]).copy() y.index.names, z.index.names = ['j','t','mkt'], ['j','t','mkt'] keep = pd.notnull(y.index.get_level_values("mkt")) y,z = y.loc[keep,:].align(z,join="left",axis=0) b,ce,d,sed= nd.estimate_reduced_form(y,z,return_se=True,VERBOSE=True) ce = ce.dropna(how='all') print("Getting Loglambdas") bphi,logL=nd.get_loglambdas(ce,TEST="warn") try: xrange logL.to_pickle(DATADIR + "ss-lambdas.df") except NameError: logL.to_pickle(DATADIR + "ss-lambdas3.df")
import sys DATADIR = "../../data/" sys.path.append("../../data") import numpy as np import pandas as pd import statsmodels.api as sm from TUP import full_data, consumption_data, regressions, reg_table, df_to_orgtbl, mobile_data ITEMS = ["beans", "sugar", "fish", "nuts", "vegetables", "airtime", "fuel"] D = full_data(DIR=DATADIR) HH, T = consumption_data(D, WRITE=True)[1:] #"csv") M, C, mHH = mobile_data(DIR=DATADIR + "Mobile/") try: logL = pd.read_pickle(DATADIR + "ss-lambdas_mobile.df") except EnvironmentError: raise IOError("Need to run SS-lambdas.py") logL.index.names = ["HH", "Year", "Location"] logL.name = ["loglambda"] C = C.join(logL, how="left").rename(columns={"loglambda": "$\log\lambda_{it}$"}) C = C.reorder_levels([1, 0, 2]).sortlevel() keep = pd.notnull(C.index.get_level_values("Location")) C = C.loc[keep, :] # Make aggregate variables C["Tot"] = C.filter(ITEMS).sum(axis=1).replace(0, np.nan) C["logTot"] = C["Tot"].apply(np.log) C = C.join(T, how="left", lsuffix="_") C['const'] = 1. Outcomes = ["Tot", "logTot", "$\log\lambda_{it}$"] Controls = ['const', 'TUP', 'CSH']
return -1*winsorize(-1*Ser, **kwargs) InSample = {1: 0, 2: np.nan, 3: 1} if True: #~ Make DataFrame D = full_data(DIR=DATADIR) D["In14"] = D["merge_midline"].apply(lambda x: InSample.get(x)) D["In15"] = D["merge_endline"].apply(lambda x: InSample.get(x)) A = asset_vars(D,year=2013)[0].apply(winsorize) D['Asset Tot'] = A['Total'] D['Asset Prod'] = A['Productive'] D["Cash Savings"] = D.filter(regex="^savings_.*_b$").sum(axis=1) D["Land Access (fedan)"] = D.filter(regex="^land_.*_b$").sum(axis=1) #~C = consumption_data(D)[0].ix[2014] C = consumption_data(D)[0].reorder_levels([1,2,0]).sortlevel() food = ['cereals', 'maize', 'sorghum', 'millet', 'potato', 'sweetpotato', 'rice', 'bread', 'beans', 'oil', 'salt', 'sugar', 'meat', 'livestock', 'poultry', 'fish', 'egg', 'nuts', 'milk', 'vegetables', 'fruit', 'tea', 'spices', 'alcohol', 'otherfood'] month = ['fuel', 'medicine', 'airtime', 'cosmetics', 'soap', 'transport', 'entertainment', 'childcare', 'tobacco', 'batteries', 'church', 'othermonth'] year = ['clothesfootwear', 'womensclothes', 'childrensclothes', 'shoes', 'homeimprovement', 'utensils', 'furniture', 'textiles', 'ceremonies', 'funerals', 'charities', 'dowry', 'other'] C["Food"] = C[[item for item in food if item in C]].sum(axis=1).replace(0,np.nan) C["Month"] = C[[item for item in month if item in C]].sum(axis=1).replace(0,np.nan) C["Year"] = C[[item for item in year if item in C]].sum(axis=1).replace(0,np.nan) C["Tot"] = C[["Food","Month","Year"]].sum(axis=1) D["Daily Exp"] = C["Tot"].loc[2013].groupby(level="HH").first() D["Daily Food"] = C["Food"].loc[2013].groupby(level="HH").first() drop_vars = ['c_milk', 'c_alcohol', 'c_spices', 'c_entertainment', 'c_otherfood', 'asset_val_house', 'asset_val_plough'] D.drop([item for item in D if any(var in item for var in drop_vars)], 1, inplace=True) consumption = dict([(c,c[2:-2].capitalize()) for c in D.filter(regex='^c_.*_b$').columns]) assets = dict([(a,a[10:-2].capitalize()) for a in D.filter(regex='^asset_val.*_b$').columns])
import sys DATADIR = "../../data/" sys.path.append("../../data") import numpy as np import pandas as pd import statsmodels.api as sm from TUP import full_data, consumption_data, regressions, reg_table, df_to_orgtbl, mobile_data ITEMS = ["beans", "sugar", "fish", "nuts", "vegetables", "airtime", "fuel"] D = full_data(DIR=DATADIR) HH, T = consumption_data(D,WRITE=True)[1:] #"csv") M, C,mHH= mobile_data(DIR = DATADIR+"Mobile/") try: logL = pd.read_pickle(DATADIR+"ss-lambdas_mobile.df") except EnvironmentError: raise IOError("Need to run SS-lambdas.py") logL.index.names=["HH","Year","Location"] logL.name =["loglambda"] C = C.join(logL,how="left").rename(columns={"loglambda":"$\log\lambda_{it}$"}) C = C.reorder_levels([1,0,2]).sortlevel() keep = pd.notnull(C.index.get_level_values("Location")) C = C.loc[keep,:] # Make aggregate variables C["Tot"] = C.filter(ITEMS).sum(axis=1).replace(0,np.nan) C["logTot"] = C["Tot"].apply(np.log) C = C.join(T, how="left",lsuffix="_") C['const'] = 1. Outcomes =["Tot", "logTot", "$\log\lambda_{it}$"] Controls= ['const', 'TUP', 'CSH'] regs = regressions(C,outcomes=Outcomes, controls=Controls, Baseline=2013)
import sys DATADIR = "../../data/" sys.path.append("../../data") import numpy as np import pandas as pd import statsmodels.api as sm from TUP import full_data, consumption_data, regressions, reg_table, df_to_orgtbl, mobile_data food = ['cereals', 'maize', 'sorghum', 'millet', 'potato', 'sweetpotato', 'rice', 'bread', 'beans', 'oil', 'salt', 'sugar', 'meat', 'livestock', 'poultry', 'fish', 'egg', 'nuts', 'milk', 'vegetables', 'fruit', 'tea', 'spices', 'alcohol', 'otherfood'] month = ['fuel', 'medicine', 'airtime', 'cosmetics', 'soap', 'transport', 'entertainment', 'childcare', 'tobacco', 'batteries', 'church', 'othermonth'] year = ['clothesfootwear', 'womensclothes', 'childrensclothes', 'shoes', 'homeimprovement', 'utensils', 'furniture', 'textiles', 'ceremonies', 'funerals', 'charities', 'dowry', 'other'] D = full_data(DIR=DATADIR) C, HH, T = consumption_data(D,WRITE=True) #"csv") #logL = pd.read_pickle(DATADIR + "ss-lambdas.df") #logL.index.names=["HH","Year","Location"] #C = C.join(logL,how="left").rename(columns={"loglambda":"$\log\lambda_{it}$"}) C = C.reorder_levels([1,2,0]).sortlevel() keep = pd.notnull(C.index.get_level_values("Location")) C = C.loc[keep,:] # Make aggregate variables C["Food"] = C.filter(items=food).sum(axis=1).replace(0,np.nan) C["Month"] = C.filter(items=food).sum(axis=1) C["Year"] = C.filter(items=food).sum(axis=1) C["Tot"] = C[["Food","Month","Year"]].sum(axis=1).replace(0,np.nan) def align_indices(df1,df2): """ Reorder levels of df2 to match that of df1 Must have same index.names