def compute_probable_emergence(export_dataset, prox_cutoff): #-Export System-# dynples = export_dataset.to_dynamic_productlevelexportsystem() dynples = dynples.dynamic_global_panel() dynples.rca_matrices(complete_data=True) dynples.mcp_matrices() dynples.proximity_matrices() #-Compute Product Changes-# Mcp_BothYears, Mcp_NewProducts, Mcp_DieProducts = dynples.compute_product_changes() df_BothYears = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_BothYears)) Mc_BothYears = df_BothYears.groupby(level=['country', 'to_year']).sum() df_NewProducts = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_NewProducts)) Mc_NewProducts = df_NewProducts.groupby(level=['country', 'to_year']).sum() df_DieProducts = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_DieProducts)) Mc_DieProducts = df_DieProducts.groupby(level=['country', 'to_year']).sum() #-Compute Probable Emergence-# Mc_ProbableProducts, Mc_ImProbableProducts = dynples.compute_probable_improbable_emergence(prox_cutoff=prox_cutoff, style=style) Mc_ProbableProducts = from_dict_of_series_to(Mc_ProbableProducts, series_name='ProbableProducts') #-Compute Probable + Persistence-# Mcp_ProbableProducts, Mcp_ImProbableProducts = dynples.compute_probable_improbable_emergence(prox_cutoff=prox_cutoff, style=style, output='reduced') Mc_ProbablePersistent = from_dict_of_series_to(compute_persistence(dynples.mcp, Mcp_ProbableProducts, output="summary"), series_name="ProbablePersistent") return Mc_ProbableProducts, Mc_ProbablePersistent
def productspace_5yearavg_improbableemergence(Mcp_ImProbableProducts, GDPGrowth, window=(5,5), verbose=False): """ Construct a table: -5Y, -4Y ... 0Y ... 4Y, 5Y <country> GDPGrowth Mcp_ImProbableProducts : Dict(pd.DataFrame(index=(country), columns=(productcode))) GDPGrowth : pd.Series(index=(country, year)) """ #-Find Improbable Product Emergence Events-# r,i = [],[] for idx,s in from_dict_to_dataframe(Mcp_ImProbableProducts).iterrows(): year, country, productcode = idx value = s['ImProbProducts'] if value > 0: i.append((year, country)) r.append(value) #1? i = pd.MultiIndex.from_tuples(i) df = pd.DataFrame(r, index=i) df.columns = ['ImProb'] df.index.names = ['year', 'country'] df.sort_index(inplace=True) #-Sum Duplicates-# df = df.groupby(level=['year', 'country']).sum() #-Find GDPGrowth n Years before and n years after cols = [] for item in range(window[0],0,-1): cols.append('Y(-%s)'%item) cols.append('Y0') for item in range(window[1],0,-1): cols.append('Y(%s)'%item) cols = ['Y(-5)', 'Y(-4)', 'Y(-3)', 'Y(-2)', 'Y(-1)', 'Y(0)', 'Y(1)', 'Y(2)', 'Y(3)', 'Y(4)', 'Y(5)'] i = [] r = [] for idx, s in df.iterrows(): year, country = idx row = [] i.append((year, country)) for yr in range(year-window[0]-1, year+window[1]+1, 1): if (yr, country) in i: continue if yr < 1962: row.append(np.nan) continue if yr > 2012: row.append(np.nan) continue try: gdpgrowth = GDPGrowth.ix[(country, yr)] except: gdpgrowth = np.nan row.append(gdpgrowth) r.append(row) i = pd.MultiIndex.from_tuples(i) df = pd.DataFrame(r, index=i, columns=cols) df.index.names = ['year', 'country'] df.sort_index(inplace=True) return df
def compute_probable_emergence(export_dataset, prox_cutoff): #-Export System-# dynples = export_dataset.to_dynamic_productlevelexportsystem() dynples = dynples.dynamic_global_panel() dynples.rca_matrices(complete_data=True) dynples.mcp_matrices() dynples.proximity_matrices() #-Compute Product Changes-# Mcp_BothYears, Mcp_NewProducts, Mcp_DieProducts = dynples.compute_product_changes( ) df_BothYears = reindex_dynamic_dataframe( from_dict_to_dataframe(Mcp_BothYears)) Mc_BothYears = df_BothYears.groupby(level=['country', 'to_year']).sum() df_NewProducts = reindex_dynamic_dataframe( from_dict_to_dataframe(Mcp_NewProducts)) Mc_NewProducts = df_NewProducts.groupby(level=['country', 'to_year']).sum() df_DieProducts = reindex_dynamic_dataframe( from_dict_to_dataframe(Mcp_DieProducts)) Mc_DieProducts = df_DieProducts.groupby(level=['country', 'to_year']).sum() #-Compute Probable Emergence-# Mc_ProbableProducts, Mc_ImProbableProducts = dynples.compute_probable_improbable_emergence( prox_cutoff=prox_cutoff, style=style) Mc_ProbableProducts = from_dict_of_series_to( Mc_ProbableProducts, series_name='ProbableProducts') #-Compute Probable + Persistence-# Mcp_ProbableProducts, Mcp_ImProbableProducts = dynples.compute_probable_improbable_emergence( prox_cutoff=prox_cutoff, style=style, output='reduced') Mc_ProbablePersistent = from_dict_of_series_to( compute_persistence(dynples.mcp, Mcp_ProbableProducts, output="summary"), series_name="ProbablePersistent") return Mc_ProbableProducts, Mc_ProbablePersistent
def compute_regression_dataset(export_dataset): """ Compute a Regression Dataset Helper Function """ #-Export System-# dynples = export_dataset.to_dynamic_productlevelexportsystem() dynples = dynples.dynamic_global_panel() dynples.rca_matrices(complete_data=True) dynples.mcp_matrices() dynples.proximity_matrices() #-Compute Product Changes-# Mcp_BothYears, Mcp_NewProducts, Mcp_DieProducts = dynples.compute_product_changes() df_BothYears = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_BothYears)) Mc_BothYears = df_BothYears.groupby(level=['country', 'to_year']).sum() df_NewProducts = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_NewProducts)) Mc_NewProducts = df_NewProducts.groupby(level=['country', 'to_year']).sum() df_DieProducts = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_DieProducts)) Mc_DieProducts = df_DieProducts.groupby(level=['country', 'to_year']).sum() #-Compute Probable and Improbable Emergence-# Mc_ProbableProducts, Mc_ImProbableProducts = dynples.compute_probable_improbable_emergence(prox_cutoff='median', style='average') Mc_ProbableProducts = from_dict_of_series_to(Mc_ProbableProducts, series_name='ProbableProducts') Mc_ImProbableProducts = from_dict_of_series_to(Mc_ImProbableProducts, series_name = 'ImprobableProducts') #-Compute Probable\Improbable + Persistence-# Mcp_ProbableProducts, Mcp_ImProbableProducts = dynples.compute_probable_improbable_emergence(prox_cutoff='median', style='average', output='reduced') Mc_ProbablePersistent = from_dict_of_series_to(compute_persistence(dynples.mcp, Mcp_ProbableProducts, output="summary"), series_name="ProbablePersistent") Mc_ImProbablePersistent = from_dict_of_series_to(compute_persistence(dynples.mcp, Mcp_ImProbableProducts, output="summary"), series_name="ImProbablePersistent") #-Compute Persistence Total Products as a Check-# Mc_PersistentProducts = from_dict_of_series_to(compute_persistence(dynples.mcp, reindex_dynamic_dict(Mcp_NewProducts, base='finish'), output="summary"), series_name="NewPersistent") #Compute Centrality AvgCentrality = dynples.compute_average_centrality(sum_not_mean=True) AvgCentrality = from_dict_of_series_to(AvgCentrality, series_name='AvgCentrality') #-Compute Diffusion Properties-# Mc_ProxAvgDiff, Mc_ProxVarDiff, Mc_ProxWidthDiff = compute_diffusion_properties_nx(dynples.mcp, dynples.proximity) Mc_ProxAvgDiff = from_dict_of_series_to(Mc_ProxAvgDiff, series_name='AvgProx') Mc_ProxVarDiff = from_dict_of_series_to(Mc_ProxVarDiff, series_name='VarProx') Mc_ProxWidthDiff = from_dict_of_series_to(Mc_ProxWidthDiff, series_name='WidthProx') #GDP and GDPPC Series wdi = WDI(WDI_SOURCE_DIR) GDP = wdi.series_long('NY.GDP.MKTP.CD') GDPPC = wdi.series_long('NY.GDP.PCAP.CD') GDPPCConst = wdi.series_long('NY.GDP.PCAP.KD') GDPPCPPP = wdi.series_long('NY.GDP.PCAP.PP.CD') #Only Available 1990 (USE PENN) GDPPCPPPConst = wdi.series_long('NY.GDP.PCAP.PP.KD') #Only Available 1990 (Use PENN) GDPGrowth = wdi.series_long('NY.GDP.MKTP.KD.ZG') GDPPCGrowth = wdi.series_long('NY.GDP.PCAP.KD.ZG') GNIAtlas = wdi.series_long('NY.GNP.ATLS.CD') GNIPPP = wdi.series_long('NY.GNP.MKTP.PP.CD') GNIPCAtlas = wdi.series_long('NY.GNP.PCAP.CD') NetBarterToT = wdi.series_long('TT.PRI.MRCH.XD.WD') #-Infrastructure-# AirDepartures = wdi.series_long('IS.AIR.DPRT') RailLinesKm = wdi.series_long('IS.RRS.TOTL.KM') ElectricityUsePC = wdi.series_long('EG.USE.ELEC.KH.PC') #-Others-# Population = wdi.series_long('SP.POP.TOTL') LandArea = wdi.series_long('AG.LND.TOTL.K2') #Trade Liberalisation Data trade_lib = pd.read_csv(WAZIARG_DIR+'trade_lib_wacziarg.csv') trade_lib = tradelib_stats(trade_lib) #-Add in TradeLib Stats-# trade_lib = trade_lib.set_index(keys=['iso3c', 'year']) #Merge and Export File (Country-Year Data) df = AvgCentrality for item in [Mc_ProbableProducts, Mc_ImProbableProducts, Mc_ProbablePersistent, Mc_ImProbablePersistent, Mc_ProxAvgDiff, Mc_ProxVarDiff, Mc_ProxWidthDiff]: df = df.join(item, how='outer') #-Single Index for Merging-# df.index = pd.Index(df.index) for item in [GDP, GDPPC, GDPPCConst, GDPPCPPP, GDPPCPPPConst, GDPGrowth, GDPPCGrowth, GNIAtlas, GNIPPP, GNIPCAtlas, NetBarterToT, AirDepartures, RailLinesKm, ElectricityUsePC, Population, LandArea]: item.index = pd.Index(item.index) df = df.merge(item, how='left', left_index=True, right_index=True) for item in [Mc_BothYears, Mc_NewProducts, Mc_DieProducts, Mc_PersistentProducts]: item.index = pd.Index(item.index) df = df.merge(item, how='left', left_index=True, right_index=True) for item in [trade_lib]: item.index = pd.Index(item.index) df = df.merge(item, how='left', left_index=True, right_index=True) #-Restore MultiIndex-# df.index = pd.MultiIndex.from_tuples(df.index, names=['country', 'year']) return df
def productspace_5yearavg_improbableemergence(Mcp_ImProbableProducts, GDPGrowth, window=(5, 5), verbose=False): """ Construct a table: -5Y, -4Y ... 0Y ... 4Y, 5Y <country> GDPGrowth Mcp_ImProbableProducts : Dict(pd.DataFrame(index=(country), columns=(productcode))) GDPGrowth : pd.Series(index=(country, year)) """ #-Find Improbable Product Emergence Events-# r, i = [], [] for idx, s in from_dict_to_dataframe(Mcp_ImProbableProducts).iterrows(): year, country, productcode = idx value = s['ImProbProducts'] if value > 0: i.append((year, country)) r.append(value) #1? i = pd.MultiIndex.from_tuples(i) df = pd.DataFrame(r, index=i) df.columns = ['ImProb'] df.index.names = ['year', 'country'] df.sort_index(inplace=True) #-Sum Duplicates-# df = df.groupby(level=['year', 'country']).sum() #-Find GDPGrowth n Years before and n years after cols = [] for item in range(window[0], 0, -1): cols.append('Y(-%s)' % item) cols.append('Y0') for item in range(window[1], 0, -1): cols.append('Y(%s)' % item) cols = [ 'Y(-5)', 'Y(-4)', 'Y(-3)', 'Y(-2)', 'Y(-1)', 'Y(0)', 'Y(1)', 'Y(2)', 'Y(3)', 'Y(4)', 'Y(5)' ] i = [] r = [] for idx, s in df.iterrows(): year, country = idx row = [] i.append((year, country)) for yr in range(year - window[0] - 1, year + window[1] + 1, 1): if (yr, country) in i: continue if yr < 1962: row.append(np.nan) continue if yr > 2012: row.append(np.nan) continue try: gdpgrowth = GDPGrowth.ix[(country, yr)] except: gdpgrowth = np.nan row.append(gdpgrowth) r.append(row) i = pd.MultiIndex.from_tuples(i) df = pd.DataFrame(r, index=i, columns=cols) df.index.names = ['year', 'country'] df.sort_index(inplace=True) return df
def compute_regression_dataset(export_dataset): """ Compute a Regression Dataset Helper Function """ #-Export System-# dynples = export_dataset.to_dynamic_productlevelexportsystem() dynples = dynples.dynamic_global_panel() dynples.rca_matrices(complete_data=True) dynples.mcp_matrices() dynples.proximity_matrices() #-Compute Product Changes-# Mcp_BothYears, Mcp_NewProducts, Mcp_DieProducts = dynples.compute_product_changes( ) df_BothYears = reindex_dynamic_dataframe( from_dict_to_dataframe(Mcp_BothYears)) Mc_BothYears = df_BothYears.groupby(level=['country', 'to_year']).sum() df_NewProducts = reindex_dynamic_dataframe( from_dict_to_dataframe(Mcp_NewProducts)) Mc_NewProducts = df_NewProducts.groupby(level=['country', 'to_year']).sum() df_DieProducts = reindex_dynamic_dataframe( from_dict_to_dataframe(Mcp_DieProducts)) Mc_DieProducts = df_DieProducts.groupby(level=['country', 'to_year']).sum() #-Compute Probable and Improbable Emergence-# Mc_ProbableProducts, Mc_ImProbableProducts = dynples.compute_probable_improbable_emergence( prox_cutoff='median', style='average') Mc_ProbableProducts = from_dict_of_series_to( Mc_ProbableProducts, series_name='ProbableProducts') Mc_ImProbableProducts = from_dict_of_series_to( Mc_ImProbableProducts, series_name='ImprobableProducts') #-Compute Probable\Improbable + Persistence-# Mcp_ProbableProducts, Mcp_ImProbableProducts = dynples.compute_probable_improbable_emergence( prox_cutoff='median', style='average', output='reduced') Mc_ProbablePersistent = from_dict_of_series_to( compute_persistence(dynples.mcp, Mcp_ProbableProducts, output="summary"), series_name="ProbablePersistent") Mc_ImProbablePersistent = from_dict_of_series_to( compute_persistence(dynples.mcp, Mcp_ImProbableProducts, output="summary"), series_name="ImProbablePersistent") #-Compute Persistence Total Products as a Check-# Mc_PersistentProducts = from_dict_of_series_to(compute_persistence( dynples.mcp, reindex_dynamic_dict(Mcp_NewProducts, base='finish'), output="summary"), series_name="NewPersistent") #Compute Centrality AvgCentrality = dynples.compute_average_centrality(sum_not_mean=True) AvgCentrality = from_dict_of_series_to(AvgCentrality, series_name='AvgCentrality') #-Compute Diffusion Properties-# Mc_ProxAvgDiff, Mc_ProxVarDiff, Mc_ProxWidthDiff = compute_diffusion_properties_nx( dynples.mcp, dynples.proximity) Mc_ProxAvgDiff = from_dict_of_series_to(Mc_ProxAvgDiff, series_name='AvgProx') Mc_ProxVarDiff = from_dict_of_series_to(Mc_ProxVarDiff, series_name='VarProx') Mc_ProxWidthDiff = from_dict_of_series_to(Mc_ProxWidthDiff, series_name='WidthProx') #GDP and GDPPC Series wdi = WDI(WDI_SOURCE_DIR) GDP = wdi.series_long('NY.GDP.MKTP.CD') GDPPC = wdi.series_long('NY.GDP.PCAP.CD') GDPPCConst = wdi.series_long('NY.GDP.PCAP.KD') GDPPCPPP = wdi.series_long( 'NY.GDP.PCAP.PP.CD') #Only Available 1990 (USE PENN) GDPPCPPPConst = wdi.series_long( 'NY.GDP.PCAP.PP.KD') #Only Available 1990 (Use PENN) GDPGrowth = wdi.series_long('NY.GDP.MKTP.KD.ZG') GDPPCGrowth = wdi.series_long('NY.GDP.PCAP.KD.ZG') GNIAtlas = wdi.series_long('NY.GNP.ATLS.CD') GNIPPP = wdi.series_long('NY.GNP.MKTP.PP.CD') GNIPCAtlas = wdi.series_long('NY.GNP.PCAP.CD') NetBarterToT = wdi.series_long('TT.PRI.MRCH.XD.WD') #-Infrastructure-# AirDepartures = wdi.series_long('IS.AIR.DPRT') RailLinesKm = wdi.series_long('IS.RRS.TOTL.KM') ElectricityUsePC = wdi.series_long('EG.USE.ELEC.KH.PC') #-Others-# Population = wdi.series_long('SP.POP.TOTL') LandArea = wdi.series_long('AG.LND.TOTL.K2') #Trade Liberalisation Data trade_lib = pd.read_csv(WAZIARG_DIR + 'trade_lib_wacziarg.csv') trade_lib = tradelib_stats(trade_lib) #-Add in TradeLib Stats-# trade_lib = trade_lib.set_index(keys=['iso3c', 'year']) #Merge and Export File (Country-Year Data) df = AvgCentrality for item in [ Mc_ProbableProducts, Mc_ImProbableProducts, Mc_ProbablePersistent, Mc_ImProbablePersistent, Mc_ProxAvgDiff, Mc_ProxVarDiff, Mc_ProxWidthDiff ]: df = df.join(item, how='outer') #-Single Index for Merging-# df.index = pd.Index(df.index) for item in [ GDP, GDPPC, GDPPCConst, GDPPCPPP, GDPPCPPPConst, GDPGrowth, GDPPCGrowth, GNIAtlas, GNIPPP, GNIPCAtlas, NetBarterToT, AirDepartures, RailLinesKm, ElectricityUsePC, Population, LandArea ]: item.index = pd.Index(item.index) df = df.merge(item, how='left', left_index=True, right_index=True) for item in [ Mc_BothYears, Mc_NewProducts, Mc_DieProducts, Mc_PersistentProducts ]: item.index = pd.Index(item.index) df = df.merge(item, how='left', left_index=True, right_index=True) for item in [trade_lib]: item.index = pd.Index(item.index) df = df.merge(item, how='left', left_index=True, right_index=True) #-Restore MultiIndex-# df.index = pd.MultiIndex.from_tuples(df.index, names=['country', 'year']) return df