Esempio n. 1
0
def topcode(var, Nstd=3, drop=False):
    if drop: var[var>var.mean()+Nstd*var.std()] = np.nan
    else: var[var>var.mean()+Nstd*var.std()] = var.mean()+Nstd*var.std() 
    return var

if True: #~ Make DataFrame
    D = full_data(DIR=DATADIR)

    D['livestock_val_m'] = D.filter(regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)_m').sum(axis=1)
    D['livestock_val'] = D.filter(regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)').sum(axis=1) - D['livestock_val_m']

    A = asset_vars(D,year=2013)[0]
    D['Asset Tot'] = A['Total']
    D["Cash Savings"] = D.filter(regex="^savings_.*_b$").sum(axis=1)
    C = consumption_data(D)[0].ix[2013]
    food = ['c_cereals', 'c_maize', 'c_sorghum', 'c_millet', 'c_potato', 'c_sweetpotato', 'c_rice', 'c_bread', 'c_beans', 'c_oil', 'c_salt', 'c_sugar', 'c_meat', 'c_livestock', 'c_poultry', 'c_fish', 'c_egg', 'c_nuts', 'c_milk', 'c_vegetables', 'c_fruit', 'c_tea', 'c_spices', 'c_alcohol', 'c_otherfood']
    month = ['c_fuel', 'c_medicine', 'c_airtime', 'c_cosmetics', 'c_soap', 'c_transport', 'c_entertainment', 'c_childcare', 'c_tobacco', 'c_batteries', 'c_church', 'c_othermonth']    
    year = ['c_clothesfootwear', 'c_womensclothes', 'c_childrensclothes', 'c_shoes', 'c_homeimprovement', 'c_utensils', 'c_furniture', 'c_textiles', 'c_ceremonies', 'c_funerals', 'c_charities', 'c_dowry', 'c_other']    
    C["Food"]  = C[[item for item in food  if item in C]].sum(axis=1).replace(0,np.nan)
    C["Month"] = C[[item for item in month if item in C]].sum(axis=1).replace(0,np.nan)
    C["Year"]  = C[[item for item in year  if item in C]].sum(axis=1).replace(0,np.nan)
    C["Tot"]   = C[["Food","Month","Year"]].sum(axis=1)
    D["Daily Exp"] = C["Tot"]
    D["Daily Food"] = C["Food"]

    drop_vars = ['c_milk', 'c_alcohol', 'c_spices', 'c_entertainment', 'c_otherfood', 'asset_val_house', 'asset_val_plough']
    D.drop([item for item in D if any(var in item for var in drop_vars)], 1, inplace=True)

consumption = dict([(c,c[2:-2].capitalize()) for c in D.filter(regex='^c_.*_b$').columns])
assets = dict([(a,a[10:].capitalize()) for a in D.filter(regex='^asset_val.*_b$').columns])
Esempio n. 2
0

if True:  #~ Make DataFrame
    D = full_data(DIR=DATADIR)

    D['livestock_val_m'] = D.filter(
        regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)_m').sum(
            axis=1)
    D['livestock_val'] = D.filter(
        regex='^asset_val_(cows|smallanimals|chickens|ducks|poultry)').sum(
            axis=1) - D['livestock_val_m']

    A = asset_vars(D, year=2013)[0]
    D['Asset Tot'] = A['Total']
    D["Cash Savings"] = D.filter(regex="^savings_.*_b$").sum(axis=1)
    C = consumption_data(D)[0].ix[2013]
    food = [
        'c_cereals', 'c_maize', 'c_sorghum', 'c_millet', 'c_potato',
        'c_sweetpotato', 'c_rice', 'c_bread', 'c_beans', 'c_oil', 'c_salt',
        'c_sugar', 'c_meat', 'c_livestock', 'c_poultry', 'c_fish', 'c_egg',
        'c_nuts', 'c_milk', 'c_vegetables', 'c_fruit', 'c_tea', 'c_spices',
        'c_alcohol', 'c_otherfood'
    ]
    month = [
        'c_fuel', 'c_medicine', 'c_airtime', 'c_cosmetics', 'c_soap',
        'c_transport', 'c_entertainment', 'c_childcare', 'c_tobacco',
        'c_batteries', 'c_church', 'c_othermonth'
    ]
    year = [
        'c_clothesfootwear', 'c_womensclothes', 'c_childrensclothes',
        'c_shoes', 'c_homeimprovement', 'c_utensils', 'c_furniture',
Esempio n. 3
0
food = [
    'cereals', 'maize', 'sorghum', 'millet', 'potato', 'sweetpotato', 'rice',
    'bread', 'beans', 'oil', 'salt', 'sugar', 'meat', 'livestock', 'poultry',
    'fish', 'egg', 'nuts', 'milk', 'vegetables', 'fruit', 'tea', 'spices',
    'alcohol', 'otherfood'
]
month = [
    'fuel', 'medicine', 'airtime', 'cosmetics', 'soap', 'transport',
    'entertainment', 'childcare', 'tobacco', 'batteries', 'church',
    'othermonth'
]
ConsumptionItems = food + ['airtime', 'fuel']
mobile = True

D = full_data(DIR=DATADIR)
C, HH, T = consumption_data(D, WRITE=False, include2016=False)
HH['log HHSIZE'] = HH["hh_size"].apply(np.log)
HH = HH.drop("hh_size", 1)
y, z = C.replace(0, np.nan).apply(np.log).sort_index(
    level=[0, 1, 2])[ConsumptionItems].copy(), HH.sort_index(
        level=[0, 1, 2]).copy()
y.index.names, z.index.names = ['j', 't', 'mkt'], ['j', 't', 'mkt']
keep = pd.notnull(y.index.get_level_values("mkt"))
y, z = y.loc[keep, :].align(z, join="left", axis=0)
b, ce, d, sed = nd.estimate_reduced_form(y, z, return_se=True, VERBOSE=True)
ce = ce.dropna(how='all')
print("Getting Loglambdas")
bphi, logL = nd.get_loglambdas(ce, TEST="warn")
try:
    xrange
    logL.to_pickle(DATADIR + "ss-lambdas.df")
Esempio n. 4
0

InSample = {1: 0, 2: np.nan, 3: 1}

if True:  #~ Make DataFrame
    D = full_data(DIR=DATADIR)
    D["In14"] = D["merge_midline"].apply(lambda x: InSample.get(x))
    D["In15"] = D["merge_endline"].apply(lambda x: InSample.get(x))

    A = asset_vars(D, year=2013)[0].apply(winsorize)
    D['Asset Tot'] = A['Total']
    D['Asset Prod'] = A['Productive']
    D["Cash Savings"] = D.filter(regex="^savings_.*_b$").sum(axis=1)
    D["Land Access (fedan)"] = D.filter(regex="^land_.*_b$").sum(axis=1)
    #~C = consumption_data(D)[0].ix[2014]
    C = consumption_data(D)[0].reorder_levels([1, 2, 0]).sort_index()
    food = [
        'cereals', 'maize', 'sorghum', 'millet', 'potato', 'sweetpotato',
        'rice', 'bread', 'beans', 'oil', 'salt', 'sugar', 'meat', 'livestock',
        'poultry', 'fish', 'egg', 'nuts', 'milk', 'vegetables', 'fruit', 'tea',
        'spices', 'alcohol', 'otherfood'
    ]
    month = [
        'fuel', 'medicine', 'airtime', 'cosmetics', 'soap', 'transport',
        'entertainment', 'childcare', 'tobacco', 'batteries', 'church',
        'othermonth'
    ]
    year = [
        'clothesfootwear', 'womensclothes', 'childrensclothes', 'shoes',
        'homeimprovement', 'utensils', 'furniture', 'textiles', 'ceremonies',
        'funerals', 'charities', 'dowry', 'other'
Esempio n. 5
0
import numpy as np
import pandas as pd
import cfe.estimation as nd
import statsmodels.api as sm
import sys
DATADIR = "../../data/"
sys.path.append("../../data")
from TUP import full_data, consumption_data, regressions, reg_table, df_to_orgtbl, mobile_data
food =  ['cereals', 'maize', 'sorghum', 'millet', 'potato', 'sweetpotato', 'rice', 'bread', 'beans', 'oil', 'salt', 'sugar', 'meat', 'livestock', 'poultry', 'fish', 'egg', 'nuts', 'milk', 'vegetables', 'fruit', 'tea', 'spices', 'alcohol', 'otherfood']
month = ['fuel', 'medicine', 'airtime', 'cosmetics', 'soap', 'transport', 'entertainment', 'childcare', 'tobacco', 'batteries', 'church', 'othermonth']    
ConsumptionItems = food+['airtime','fuel']
mobile=True

D = full_data(DIR=DATADIR)
C, HH, T = consumption_data(D,WRITE=False,include2016=False)
HH['log HHSIZE'] = HH["hh_size"].apply(np.log)
HH = HH.drop("hh_size",1)
y,z = C.replace(0,np.nan).apply(np.log).sort_index(level=[0,1,2])[ConsumptionItems].copy(),HH.sort_index(level=[0,1,2]).copy()
y.index.names, z.index.names = ['j','t','mkt'], ['j','t','mkt']
keep = pd.notnull(y.index.get_level_values("mkt"))
y,z = y.loc[keep,:].align(z,join="left",axis=0)
b,ce,d,sed= nd.estimate_reduced_form(y,z,return_se=True,VERBOSE=True)
ce = ce.dropna(how='all')
print("Getting Loglambdas")
bphi,logL=nd.get_loglambdas(ce,TEST="warn")
try:
   xrange
   logL.to_pickle(DATADIR + "ss-lambdas.df")
except NameError: logL.to_pickle(DATADIR + "ss-lambdas3.df")
Esempio n. 6
0
import sys
DATADIR = "../../data/"
sys.path.append("../../data")
import numpy as np
import pandas as pd
import statsmodels.api as sm
from TUP import full_data, consumption_data, regressions, reg_table, df_to_orgtbl, mobile_data
ITEMS = ["beans", "sugar", "fish", "nuts", "vegetables", "airtime", "fuel"]

D = full_data(DIR=DATADIR)
HH, T = consumption_data(D, WRITE=True)[1:]  #"csv")
M, C, mHH = mobile_data(DIR=DATADIR + "Mobile/")
try:
    logL = pd.read_pickle(DATADIR + "ss-lambdas_mobile.df")
except EnvironmentError:
    raise IOError("Need to run SS-lambdas.py")
logL.index.names = ["HH", "Year", "Location"]
logL.name = ["loglambda"]
C = C.join(logL,
           how="left").rename(columns={"loglambda": "$\log\lambda_{it}$"})
C = C.reorder_levels([1, 0, 2]).sortlevel()
keep = pd.notnull(C.index.get_level_values("Location"))
C = C.loc[keep, :]
# Make aggregate variables
C["Tot"] = C.filter(ITEMS).sum(axis=1).replace(0, np.nan)
C["logTot"] = C["Tot"].apply(np.log)
C = C.join(T, how="left", lsuffix="_")
C['const'] = 1.

Outcomes = ["Tot", "logTot", "$\log\lambda_{it}$"]
Controls = ['const', 'TUP', 'CSH']
Esempio n. 7
0
        return -1*winsorize(-1*Ser, **kwargs)

InSample = {1: 0, 2: np.nan, 3: 1}

if True: #~ Make DataFrame
    D = full_data(DIR=DATADIR)
    D["In14"] = D["merge_midline"].apply(lambda x: InSample.get(x))
    D["In15"] = D["merge_endline"].apply(lambda x: InSample.get(x))

    A = asset_vars(D,year=2013)[0].apply(winsorize)
    D['Asset Tot'] = A['Total']
    D['Asset Prod'] = A['Productive']
    D["Cash Savings"] = D.filter(regex="^savings_.*_b$").sum(axis=1)
    D["Land Access (fedan)"] = D.filter(regex="^land_.*_b$").sum(axis=1)
    #~C = consumption_data(D)[0].ix[2014]
    C = consumption_data(D)[0].reorder_levels([1,2,0]).sortlevel()
    food = ['cereals', 'maize', 'sorghum', 'millet', 'potato', 'sweetpotato', 'rice', 'bread', 'beans', 'oil', 'salt', 'sugar', 'meat', 'livestock', 'poultry', 'fish', 'egg', 'nuts', 'milk', 'vegetables', 'fruit', 'tea', 'spices', 'alcohol', 'otherfood']
    month = ['fuel', 'medicine', 'airtime', 'cosmetics', 'soap', 'transport', 'entertainment', 'childcare', 'tobacco', 'batteries', 'church', 'othermonth']    
    year = ['clothesfootwear', 'womensclothes', 'childrensclothes', 'shoes', 'homeimprovement', 'utensils', 'furniture', 'textiles', 'ceremonies', 'funerals', 'charities', 'dowry', 'other']    
    C["Food"]  = C[[item for item in food  if item in C]].sum(axis=1).replace(0,np.nan)
    C["Month"] = C[[item for item in month if item in C]].sum(axis=1).replace(0,np.nan)
    C["Year"]  = C[[item for item in year  if item in C]].sum(axis=1).replace(0,np.nan)
    C["Tot"]   = C[["Food","Month","Year"]].sum(axis=1)
    D["Daily Exp"] = C["Tot"].loc[2013].groupby(level="HH").first()
    D["Daily Food"] = C["Food"].loc[2013].groupby(level="HH").first()

    drop_vars = ['c_milk', 'c_alcohol', 'c_spices', 'c_entertainment', 'c_otherfood', 'asset_val_house', 'asset_val_plough']
    D.drop([item for item in D if any(var in item for var in drop_vars)], 1, inplace=True)

consumption = dict([(c,c[2:-2].capitalize()) for c in D.filter(regex='^c_.*_b$').columns])
assets = dict([(a,a[10:-2].capitalize()) for a in D.filter(regex='^asset_val.*_b$').columns])
Esempio n. 8
0
import sys
DATADIR = "../../data/"
sys.path.append("../../data")
import numpy as np
import pandas as pd
import statsmodels.api as sm
from TUP import full_data, consumption_data, regressions, reg_table, df_to_orgtbl, mobile_data
ITEMS = ["beans", "sugar", "fish", "nuts", "vegetables", "airtime", "fuel"]

D = full_data(DIR=DATADIR)
HH, T = consumption_data(D,WRITE=True)[1:] #"csv")
M, C,mHH= mobile_data(DIR = DATADIR+"Mobile/")
try: logL = pd.read_pickle(DATADIR+"ss-lambdas_mobile.df")
except EnvironmentError: raise IOError("Need to run SS-lambdas.py")
logL.index.names=["HH","Year","Location"]
logL.name       =["loglambda"]
C    = C.join(logL,how="left").rename(columns={"loglambda":"$\log\lambda_{it}$"})
C    = C.reorder_levels([1,0,2]).sortlevel()
keep = pd.notnull(C.index.get_level_values("Location"))
C    = C.loc[keep,:]
# Make aggregate variables
C["Tot"]    = C.filter(ITEMS).sum(axis=1).replace(0,np.nan)
C["logTot"] = C["Tot"].apply(np.log)
C           = C.join(T, how="left",lsuffix="_")
C['const']  = 1.

Outcomes =["Tot",  "logTot", "$\log\lambda_{it}$"]
Controls= ['const', 'TUP', 'CSH']

regs = regressions(C,outcomes=Outcomes, controls=Controls, Baseline=2013)
Esempio n. 9
0
import sys
DATADIR = "../../data/"
sys.path.append("../../data")
import numpy as np
import pandas as pd
import statsmodels.api as sm
from TUP import full_data, consumption_data, regressions, reg_table, df_to_orgtbl, mobile_data
food = ['cereals', 'maize', 'sorghum', 'millet', 'potato', 'sweetpotato', 'rice', 'bread', 'beans', 'oil', 'salt', 'sugar', 'meat', 'livestock', 'poultry', 'fish', 'egg', 'nuts', 'milk', 'vegetables', 'fruit', 'tea', 'spices', 'alcohol', 'otherfood']
month = ['fuel', 'medicine', 'airtime', 'cosmetics', 'soap', 'transport', 'entertainment', 'childcare', 'tobacco', 'batteries', 'church', 'othermonth']    
year = ['clothesfootwear', 'womensclothes', 'childrensclothes', 'shoes', 'homeimprovement', 'utensils', 'furniture', 'textiles', 'ceremonies', 'funerals', 'charities', 'dowry', 'other']    

D = full_data(DIR=DATADIR)
C, HH, T = consumption_data(D,WRITE=True) #"csv")
#logL = pd.read_pickle(DATADIR + "ss-lambdas.df")
#logL.index.names=["HH","Year","Location"]
#C = C.join(logL,how="left").rename(columns={"loglambda":"$\log\lambda_{it}$"})
C = C.reorder_levels([1,2,0]).sortlevel()
keep = pd.notnull(C.index.get_level_values("Location"))
C = C.loc[keep,:]

# Make aggregate variables
C["Food"]   = C.filter(items=food).sum(axis=1).replace(0,np.nan)
C["Month"]   = C.filter(items=food).sum(axis=1)
C["Year"]   = C.filter(items=food).sum(axis=1)
C["Tot"]    = C[["Food","Month","Year"]].sum(axis=1).replace(0,np.nan)

def align_indices(df1,df2):
   """
   Reorder levels of df2 to match that of df1
   Must have same index.names