#%% +Wealth """ wealth = pd.read_stata('totalWEALTH.dta') wealth = wealth[["HHID","totW"]] wealth.rename(columns={'HHID':'hh'}, inplace=True) data = pd.merge(data, wealth, on='hh', how='inner') """ #%% Income: #labor & business income: in US dollars lab_inc = pd.read_csv('income_hhsec_2009.csv', header=0, na_values='nan') lab_inc[["wage_total", "bs_profit", "other_inc" ]] = remove_outliers(lab_inc[["wage_total", "bs_profit", "other_inc"]], lq=0.001, hq=0.999) #Agricultural income: in UG Shillings ag_inc = pd.read_csv('income_agsec_09.csv', header=0, na_values='nan') #ag_inc["hh"] = ag_inc["hh"].astype(int) inc = pd.merge(lab_inc, ag_inc, on="hh", how="outer") inc = inc.drop(inc.columns[[0, 5]], axis=1) inc = remove_outliers(inc, lq=0.001, hq=0.999) inc["inctotal"] = inc.loc[:, ["wage_total", "bs_profit", "total_agrls"]].sum( axis=1) inc["inctotal_trans"] = inc.loc[:, [ "wage_total", "bs_profit", "other", "total_agrls" ]].sum(axis=1)
data[[ 'animal_value_p_sell', 'chem_fert', 'cons_value_p_sell', 'food_prod_value_p_sell', 'gift_value_p_sell', 'k', 'm', 'org_fert', 'pesticides', 'seed_cost', 'seeds_value_p_sell', 'sell_value_p_sell', 'stored_value_p_sell', 'total2_value_p_sell', 'trans_cost', 'y' ]] = data[[ 'animal_value_p_sell', 'chem_fert', 'cons_value_p_sell', 'food_prod_value_p_sell', 'gift_value_p_sell', 'k', 'm', 'org_fert', 'pesticides', 'seed_cost', 'seeds_value_p_sell', 'sell_value_p_sell', 'stored_value_p_sell', 'total2_value_p_sell', 'trans_cost', 'y' ]].div(data.inflation, axis=0) / dollars sumdata1 = data[['y', 'k', 'A', 'm', 'l']].describe() data[['y', 'k', 'A', 'm', 'l']] = remove_outliers(data[['y', 'k', 'A', 'm', 'l']], lq=0.01, hq=0.99) sumdata2 = data[['y', 'k', 'A', 'm', 'l']].describe() # One value of 166000 dollars on y. Therefore, remove 1% on both tails. variables = ['k', 'm', 'l', 'A', 'y', 'y_over_A'] for var in variables: data['ln' + var] = data['ln' + var] = np.log(data[var].dropna() + np.abs(np.min(data[var]))).replace( -np.inf, np.nan) data.to_csv( 'C:/Users/rodri/OneDrive/Documentos/IDEA/Phd tesi/data/agric_data10.csv', index=False)
sumowncrops = ag5a[["cons_value","own_value"]].describe()/dollars ag5a["cons_value2"] = ag5a["own_value"]/2 ag5a.drop(["own_value"], axis=1) ag5a["total_value2"] = ag5a.loc[:,["sells_value_2","gift_value","cons_value2","food_prod_value","animal_value", "seeds_value", "stored_value"]].sum(axis=1) """ # Merge datasets ------------------------------------------- agrica = pd.merge(ag2a, ag2b, on='hh', how='outer') agrica = pd.merge(agrica, ag3a, on='hh', how='outer') agrica = pd.merge(agrica, ag4a, on='hh', how='outer') agrica = pd.merge(agrica, ag5a, on='hh', how='outer') #### Trim the data at the 0.1% both sides agrica.set_index("hh", inplace=True) agrica = remove_outliers(agrica, lq=0, hq=0.999) #Pass it to dollars to see if values make sense or not summarya1 = agrica.describe() / dollars agrica.reset_index(inplace=True) del ag2a, ag2b, ag3a, ag4a, ag5a, prices #agrica = pd.merge(agrica, basic, on='hh', how='outer') agrica["cost_agra"] = -agrica.loc[:, ["fet_lab_c", "seeds_c", "trans_cost"]].sum( axis=1) agrica["profit_agra"] = agrica.loc[:, [ "total2_value_p_sell", "rent_owner", "rent_noowner", "cost_agra" ]].sum(axis=1) agrica = agrica.replace(0, np.nan)
other = other.groupby(by="hh").sum() other = other other["hh"] = np.array(other.index.values) summaryo = other.describe() / dollars # extra-expenditures --------------------------------------- # NO QUESTIONARY IN EXTRA EXPENDITURES #%% Merge datasets income_gsec = pd.merge(lab99, bus12, on="hh", how="outer") income_gsec = pd.merge(income_gsec, other, on="hh", how="outer") del income_gsec["wage1"], income_gsec[ "wage2"], bus12, dollars, other, lab99, summarybus, summaryo, summaryw dollars = 2586.89 income_gsec[["wage_total", "bs_profit", "other_inc"]] = remove_outliers( income_gsec[["wage_total", "bs_profit", "other_inc"]], lq=0.005, hq=0.995) income_gsec["wage_total"] = income_gsec[["wage_total"]] / dollars income_gsec["bs_profit"] = income_gsec[["bs_profit"]] / dollars income_gsec[""] = income_gsec[["other_inc"]] / dollars sumlab = income_gsec[["wage_total", "bs_profit", "other_inc"]].describe() print(sumlab.to_latex()) income_gsec.to_csv('income_hhsec.csv') #%% SAVING os.chdir('/Users/gabi/Dropbox/2019.1/Development/PS1/UG_2013_14_GGSB/') income_gsec.to_csv('income_hhsec.csv')
# Merge datasets ------------------------------------------- agrica = pd.merge(ag3a, ag4a, on=['HHID','plotID'], how='outer') agrica = pd.merge(agrica, ag5a, on=['HHID','plotID'], how='right') agrica = pd.merge(agrica, ag10, on='HHID', how='right') agrica.set_index(['HHID','plotID'], inplace=True) agrica = agrica.reset_index() agrica = agrica.drop_duplicates(subset=['HHID','plotID'], keep=False) del ag3a, ag4a, ag5a, ag5acrop, conversion_kg, count_bigger, count_equal, count_smaller, crop_count, crop_sum, p, prices, prices_usd, priceslist, q, quant, values_ag5a sumagrica = agrica[['org_fert', 'chem_fert', 'pesticides', 'hhlabor', 'hired_labor', 'area_planted', 'seed_cost', 'trans_cost', 'total_value_p_sell', 'total2_value_p_sell', 'farm_capital']].describe() #crop in production and planting coincide so we can eliminate one of them (in importing 2agsec4 do not import crop) agrica[['org_fert', 'chem_fert', 'pesticides', 'hhlabor', 'hired_labor', 'area_planted', 'seed_cost', 'trans_cost', 'total_value_p_sell', 'total2_value_p_sell', 'farm_capital']] = remove_outliers(agrica[['org_fert', 'chem_fert', 'pesticides', 'hhlabor', 'hired_labor', 'area_planted', 'seed_cost', 'trans_cost', 'total_value_p_sell', 'total2_value_p_sell', 'farm_capital']], lq=0, hq=0.99) sumagrica2 = agrica[['org_fert', 'chem_fert', 'pesticides', 'hhlabor', 'hired_labor', 'area_planted', 'seed_cost', 'trans_cost', 'total_value_p_sell', 'total2_value_p_sell', 'farm_capital']].describe() #%% computing productivity levels agrica['season'] = 1 agrica['k'] = agrica['farm_capital'] agrica['m'] = agrica['org_fert'].fillna(0)+ agrica['chem_fert'].fillna(0)+ agrica['pesticides'].fillna(0)+ agrica['seed_cost'].fillna(0) agrica['l'] = agrica['hhlabor'].fillna(0)+ agrica['hired_labor'].fillna(0) agrica['A'] = agrica['area_planted'] agrica['y'] = agrica['total2_value_p_sell'] agrica['y_over_A'] = agrica['y']/agrica['A']
data["ctotal_dur_own"] = data.loc[:, ["ctotal_own", "cdur_own"]].sum(axis=1) cdata_short = data[[ "hh", "ctotal", "ctotal_dur", "ctotal_gift", "ctotal_dur_gift", "ctotal_nogift", "ctotal_dur_nogift", "ctotal_own", "ctotal_dur_own", "cfood", "cnodur", "cdur" ]] cdata_short[[ "ctotal", "ctotal_dur", "ctotal_gift", "ctotal_dur_gift", "ctotal_nogift", "ctotal_dur_nogift", "ctotal_own", "ctotal_dur_own", "cfood", "cnodur", "cdur" ]] = remove_outliers(cdata_short[[ "ctotal", "ctotal_dur", "ctotal_gift", "ctotal_dur_gift", "ctotal_nogift", "ctotal_dur_nogift", "ctotal_own", "ctotal_dur_own", "cfood", "cnodur", "cdur" ]], lq=0.005, hq=0.995) cdata_short[[ "ctotal", "ctotal_dur", "ctotal_gift", "ctotal_dur_gift", "ctotal_nogift", "ctotal_dur_nogift", "ctotal_own", "ctotal_dur_own", "cfood", "cnodur", "cdur" ]] = cdata_short[[ "ctotal", "ctotal_dur", "ctotal_gift", "ctotal_dur_gift", "ctotal_nogift", "ctotal_dur_nogift", "ctotal_own", "ctotal_dur_own", "cfood", "cnodur", "cdur" ]] / dollars sumc = cdata_short.describe()
ii = ii[["hh", "farm_asset"]] w_farm = ii del ii, ag10 #%% LIVESTOCK ASSETS ??? #%% HH ASSETS c14 = pd.read_stata('GSEC14A.dta') c14 = c14[["HHID", "h14q2", "h14q5"]] c14 = c14.groupby(by="HHID")[["h14q5"]].sum().fillna(0) c14.columns = ["hh_asset"] c14["hh"] = np.array(c14.index.values) #%% MERGING DATA wealth = pd.merge(w_farm, c14, on="hh", how="inner") wealth = wealth[["hh", "farm_asset", "hh_asset"]] wealth["wtotal"] = wealth[["farm_asset", "hh_asset"]].sum(axis=1) wealth[["farm_asset", "hh_asset", "wtotal" ]] = remove_outliers(wealth[["farm_asset", "hh_asset", "wtotal"]], lq=0.001, hq=0.999) wealth[["farm_asset", "hh_asset", "wtotal"]] = wealth[["farm_asset", "hh_asset", "wtotal"]] / dollars wealth.to_csv("wealth.csv") #%% SAVING os.chdir('/Users/gabi/Dropbox/2019.1/Development/PS1/UG_2013_14_GGSB/') wealth.to_csv("wealth.csv")
]] = agrica[[ 'org_fert', 'chem_fert', 'seed_cost', 'trans_cost', 'pesticides', 'total2_value_p_sell', 'sell_value_p_sell', 'cons_value_p_sell', 'gift_value_p_sell', 'food_prod_value_p_sell', 'animal_value_p_sell', 'seeds_value_p_sell', 'stored_value_p_sell', 'farm_capital' ]] / dollars # Remove outliers: top 0.5% agrica[[ 'org_fert', 'chem_fert', 'seed_cost', 'trans_cost', 'pesticides', 'total2_value_p_sell', 'farm_capital', 'area_planted', 'hhlabor', 'hired_labor' ]] = remove_outliers(agrica[[ 'org_fert', 'chem_fert', 'seed_cost', 'trans_cost', 'pesticides', 'total2_value_p_sell', 'farm_capital', 'area_planted', 'hhlabor', 'hired_labor' ]], lq=0, hq=0.975) # computing productivity levels agrica['season'] = 1 agrica['k'] = agrica['farm_capital'] agrica['m'] = agrica['org_fert'].fillna(0) + agrica['chem_fert'].fillna( 0) + agrica['pesticides'].fillna(0) + agrica['seed_cost'].fillna(0) agrica['l'] = agrica['hhlabor'].fillna(0) + agrica['hired_labor'].fillna(0) agrica['A'] = agrica['area_planted'] agrica['y'] = agrica['total2_value_p_sell'] - agrica['trans_cost'] agrica['y_over_A'] = (agrica['y'] / agrica['A']).replace([-np.inf, np.inf], np.nan)
sumowncrops = ag5a[["cons_value","own_value"]].describe()/dollars ag5a["cons_value2"] = ag5a["own_value"]/2 ag5a.drop(["own_value"], axis=1) ag5a["total_value2"] = ag5a.loc[:,["sells_value_2","gift_value","cons_value2","food_prod_value","animal_value", "seeds_value", "stored_value"]].sum(axis=1) """ # Merge datasets ------------------------------------------- agrica = pd.merge(ag2a, ag2b, on='hh', how='outer') agrica = pd.merge(agrica, ag3a, on='hh', how='outer') agrica = pd.merge(agrica, ag4a, on='hh', how='outer') agrica = pd.merge(agrica, ag5a, on='hh', how='outer') #### Trim the data at the 0.1% both sides agrica.set_index("hh", inplace=True) agrica = remove_outliers(agrica, lq=0.005, hq=0.995) #Pass it to dollars to see if values make sense or not summarya1 = agrica.describe() / dollars agrica.reset_index(inplace=True) del ag2a, ag2b, ag3a, ag4a, ag5a, prices #agrica = pd.merge(agrica, basic, on='hh', how='outer') agrica["cost_agra"] = -agrica.loc[:, ["fet_lab_c", "seeds_c", "trans_cost"]].sum( axis=1) agrica["profit_agra"] = agrica.loc[:, [ "total2_value_p_sell", "rent_owner", "rent_noowner", "cost_agra" ]].sum(axis=1) agrica = agrica.replace(0, np.nan)
# Merge datasets ------------------------------------------- agrica = pd.merge(ag2a, ag2b, on='hh', how='outer') agrica = pd.merge(agrica, ag3a, on='hh', how='outer') agrica = pd.merge(agrica, ag4a, on='hh', how='outer') agrica = pd.merge(agrica, ag5a, on='hh', how='outer') #### Trim the data at the 0.1% both sides agrica.set_index("hh", inplace=True) agrica = remove_outliers(agrica, lq=0.01, hq=0.999) #Pass it to dollars to see if values make sense or not summarya1 = agrica.describe()/dollars agrica.reset_index(inplace=True) del ag2a, ag2b, ag3a, ag4a, ag5a, prices #agrica = pd.merge(agrica, basic, on='hh', how='outer') agrica["cost_agra"] = -agrica.loc[:,["fet_lab_c","seeds_c","trans_cost"]].sum(axis=1) agrica["profit_agra"] = agrica.loc[:,["total2_value_p_sell","rent_owner","rent_noowner","cost_agra"]].sum(axis=1) agrica= agrica.replace(0, np.nan) agA = agrica[["hh", "profit_agra"]]