def compute_probable_emergence(export_dataset, prox_cutoff):
	#-Export System-#
	dynples = export_dataset.to_dynamic_productlevelexportsystem()
	dynples = dynples.dynamic_global_panel()
	dynples.rca_matrices(complete_data=True)
	dynples.mcp_matrices()
	dynples.proximity_matrices()

	#-Compute Product Changes-#
	Mcp_BothYears, Mcp_NewProducts, Mcp_DieProducts = dynples.compute_product_changes()
	df_BothYears = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_BothYears))
	Mc_BothYears = df_BothYears.groupby(level=['country', 'to_year']).sum()
	df_NewProducts = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_NewProducts))
	Mc_NewProducts = df_NewProducts.groupby(level=['country', 'to_year']).sum()
	df_DieProducts = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_DieProducts))
	Mc_DieProducts = df_DieProducts.groupby(level=['country', 'to_year']).sum()

	#-Compute Probable Emergence-#
	Mc_ProbableProducts, Mc_ImProbableProducts = dynples.compute_probable_improbable_emergence(prox_cutoff=prox_cutoff, style=style)
	Mc_ProbableProducts = from_dict_of_series_to(Mc_ProbableProducts, series_name='ProbableProducts')

	#-Compute Probable + Persistence-# 
	Mcp_ProbableProducts, Mcp_ImProbableProducts = dynples.compute_probable_improbable_emergence(prox_cutoff=prox_cutoff, style=style, output='reduced')
	Mc_ProbablePersistent = from_dict_of_series_to(compute_persistence(dynples.mcp, Mcp_ProbableProducts, output="summary"), series_name="ProbablePersistent")

	return Mc_ProbableProducts, Mc_ProbablePersistent
def productspace_5yearavg_improbableemergence(Mcp_ImProbableProducts, GDPGrowth, window=(5,5), verbose=False):
	""" 
	Construct a table:
 					-5Y, -4Y ... 0Y ... 4Y, 5Y
	<country>  		 GDPGrowth

	Mcp_ImProbableProducts 	: Dict(pd.DataFrame(index=(country), columns=(productcode)))
	GDPGrowth 				: pd.Series(index=(country, year))
	"""
	#-Find Improbable Product Emergence Events-#
	r,i = [],[]
	for idx,s in  from_dict_to_dataframe(Mcp_ImProbableProducts).iterrows():
	    year, country, productcode = idx
	    value = s['ImProbProducts']
	    if value > 0:
	    	i.append((year, country))
	    	r.append(value)  #1?	
	i = pd.MultiIndex.from_tuples(i)
	df = pd.DataFrame(r, index=i)
	df.columns = ['ImProb']
	df.index.names = ['year', 'country']
	df.sort_index(inplace=True)
	#-Sum Duplicates-#
	df = df.groupby(level=['year', 'country']).sum()
	
	#-Find GDPGrowth n Years before and n years after
	cols = []
	for item in range(window[0],0,-1):
		cols.append('Y(-%s)'%item)
	cols.append('Y0')
	for item in range(window[1],0,-1):
		cols.append('Y(%s)'%item)
	
	cols = ['Y(-5)', 'Y(-4)', 'Y(-3)', 'Y(-2)', 'Y(-1)', 'Y(0)', 'Y(1)', 'Y(2)', 'Y(3)', 'Y(4)', 'Y(5)']
	i = []
	r = []
	for idx, s in df.iterrows():
		year, country = idx
		row = []
		i.append((year, country))
		for yr in range(year-window[0]-1, year+window[1]+1, 1):
			if (yr, country) in i:
				continue
			if yr < 1962:
				row.append(np.nan)
				continue
			if yr > 2012:
				row.append(np.nan)
				continue
			try:
				gdpgrowth = GDPGrowth.ix[(country, yr)]
			except:
				gdpgrowth = np.nan
			row.append(gdpgrowth)
		r.append(row)
	i = pd.MultiIndex.from_tuples(i)
	df = pd.DataFrame(r, index=i, columns=cols)
	df.index.names = ['year', 'country']
	df.sort_index(inplace=True)
	return df
Example #3
0
def compute_probable_emergence(export_dataset, prox_cutoff):
    #-Export System-#
    dynples = export_dataset.to_dynamic_productlevelexportsystem()
    dynples = dynples.dynamic_global_panel()
    dynples.rca_matrices(complete_data=True)
    dynples.mcp_matrices()
    dynples.proximity_matrices()

    #-Compute Product Changes-#
    Mcp_BothYears, Mcp_NewProducts, Mcp_DieProducts = dynples.compute_product_changes(
    )
    df_BothYears = reindex_dynamic_dataframe(
        from_dict_to_dataframe(Mcp_BothYears))
    Mc_BothYears = df_BothYears.groupby(level=['country', 'to_year']).sum()
    df_NewProducts = reindex_dynamic_dataframe(
        from_dict_to_dataframe(Mcp_NewProducts))
    Mc_NewProducts = df_NewProducts.groupby(level=['country', 'to_year']).sum()
    df_DieProducts = reindex_dynamic_dataframe(
        from_dict_to_dataframe(Mcp_DieProducts))
    Mc_DieProducts = df_DieProducts.groupby(level=['country', 'to_year']).sum()

    #-Compute Probable Emergence-#
    Mc_ProbableProducts, Mc_ImProbableProducts = dynples.compute_probable_improbable_emergence(
        prox_cutoff=prox_cutoff, style=style)
    Mc_ProbableProducts = from_dict_of_series_to(
        Mc_ProbableProducts, series_name='ProbableProducts')

    #-Compute Probable + Persistence-#
    Mcp_ProbableProducts, Mcp_ImProbableProducts = dynples.compute_probable_improbable_emergence(
        prox_cutoff=prox_cutoff, style=style, output='reduced')
    Mc_ProbablePersistent = from_dict_of_series_to(
        compute_persistence(dynples.mcp,
                            Mcp_ProbableProducts,
                            output="summary"),
        series_name="ProbablePersistent")

    return Mc_ProbableProducts, Mc_ProbablePersistent
def compute_regression_dataset(export_dataset):
	""" 
	Compute a Regression Dataset Helper Function
	"""
	#-Export System-#
	dynples = export_dataset.to_dynamic_productlevelexportsystem()
	dynples = dynples.dynamic_global_panel()
	dynples.rca_matrices(complete_data=True)
	dynples.mcp_matrices()
	dynples.proximity_matrices()

	#-Compute Product Changes-#
	Mcp_BothYears, Mcp_NewProducts, Mcp_DieProducts = dynples.compute_product_changes()
	df_BothYears = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_BothYears))
	Mc_BothYears = df_BothYears.groupby(level=['country', 'to_year']).sum()
	df_NewProducts = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_NewProducts))
	Mc_NewProducts = df_NewProducts.groupby(level=['country', 'to_year']).sum()
	df_DieProducts = reindex_dynamic_dataframe(from_dict_to_dataframe(Mcp_DieProducts))
	Mc_DieProducts = df_DieProducts.groupby(level=['country', 'to_year']).sum()

	#-Compute Probable and Improbable Emergence-#
	Mc_ProbableProducts, Mc_ImProbableProducts = dynples.compute_probable_improbable_emergence(prox_cutoff='median', style='average')
	Mc_ProbableProducts = from_dict_of_series_to(Mc_ProbableProducts, series_name='ProbableProducts')
	Mc_ImProbableProducts = from_dict_of_series_to(Mc_ImProbableProducts, series_name = 'ImprobableProducts')

	#-Compute Probable\Improbable + Persistence-# 
	Mcp_ProbableProducts, Mcp_ImProbableProducts = dynples.compute_probable_improbable_emergence(prox_cutoff='median', style='average', output='reduced')
	Mc_ProbablePersistent = from_dict_of_series_to(compute_persistence(dynples.mcp, Mcp_ProbableProducts, output="summary"), series_name="ProbablePersistent")
	Mc_ImProbablePersistent = from_dict_of_series_to(compute_persistence(dynples.mcp, Mcp_ImProbableProducts, output="summary"), series_name="ImProbablePersistent")

	#-Compute Persistence Total Products as a Check-#
	Mc_PersistentProducts = from_dict_of_series_to(compute_persistence(dynples.mcp, reindex_dynamic_dict(Mcp_NewProducts, base='finish'), output="summary"), series_name="NewPersistent")

	#Compute Centrality
	AvgCentrality = dynples.compute_average_centrality(sum_not_mean=True)
	AvgCentrality = from_dict_of_series_to(AvgCentrality, series_name='AvgCentrality')

	#-Compute Diffusion Properties-#
	Mc_ProxAvgDiff, Mc_ProxVarDiff, Mc_ProxWidthDiff = compute_diffusion_properties_nx(dynples.mcp, dynples.proximity)
	Mc_ProxAvgDiff = from_dict_of_series_to(Mc_ProxAvgDiff, series_name='AvgProx')
	Mc_ProxVarDiff = from_dict_of_series_to(Mc_ProxVarDiff, series_name='VarProx')
	Mc_ProxWidthDiff = from_dict_of_series_to(Mc_ProxWidthDiff, series_name='WidthProx')

	#GDP and GDPPC Series
	wdi = WDI(WDI_SOURCE_DIR)
	GDP = wdi.series_long('NY.GDP.MKTP.CD')
	GDPPC = wdi.series_long('NY.GDP.PCAP.CD')
	GDPPCConst = wdi.series_long('NY.GDP.PCAP.KD')
	GDPPCPPP = wdi.series_long('NY.GDP.PCAP.PP.CD') 	  #Only Available 1990 (USE PENN)
	GDPPCPPPConst = wdi.series_long('NY.GDP.PCAP.PP.KD')  #Only Available 1990 (Use PENN)
	GDPGrowth = wdi.series_long('NY.GDP.MKTP.KD.ZG')
	GDPPCGrowth = wdi.series_long('NY.GDP.PCAP.KD.ZG')
	GNIAtlas = wdi.series_long('NY.GNP.ATLS.CD')
	GNIPPP = wdi.series_long('NY.GNP.MKTP.PP.CD')
	GNIPCAtlas = wdi.series_long('NY.GNP.PCAP.CD')
	NetBarterToT = wdi.series_long('TT.PRI.MRCH.XD.WD')
	#-Infrastructure-#
	AirDepartures = wdi.series_long('IS.AIR.DPRT')
	RailLinesKm = wdi.series_long('IS.RRS.TOTL.KM')
	ElectricityUsePC = wdi.series_long('EG.USE.ELEC.KH.PC')
	#-Others-#
	Population = wdi.series_long('SP.POP.TOTL')
	LandArea = wdi.series_long('AG.LND.TOTL.K2')

	#Trade Liberalisation Data
	trade_lib = pd.read_csv(WAZIARG_DIR+'trade_lib_wacziarg.csv')
	trade_lib = tradelib_stats(trade_lib) 							#-Add in TradeLib Stats-#
	trade_lib = trade_lib.set_index(keys=['iso3c', 'year'])

	#Merge and Export File (Country-Year Data)
	df = AvgCentrality
	for item in [Mc_ProbableProducts, Mc_ImProbableProducts, Mc_ProbablePersistent, Mc_ImProbablePersistent, Mc_ProxAvgDiff, Mc_ProxVarDiff, Mc_ProxWidthDiff]:
		df = df.join(item, how='outer')
	#-Single Index for Merging-#
	df.index = pd.Index(df.index)
	for item in [GDP, GDPPC, GDPPCConst, GDPPCPPP, GDPPCPPPConst, GDPGrowth, GDPPCGrowth, GNIAtlas, GNIPPP, GNIPCAtlas, NetBarterToT, AirDepartures, RailLinesKm, ElectricityUsePC, Population, LandArea]:
		item.index = pd.Index(item.index)
		df = df.merge(item, how='left', left_index=True, right_index=True)
	for item in [Mc_BothYears, Mc_NewProducts, Mc_DieProducts, Mc_PersistentProducts]:
		item.index = pd.Index(item.index)
		df = df.merge(item, how='left', left_index=True, right_index=True)
	for item in [trade_lib]:
		item.index = pd.Index(item.index)
		df = df.merge(item, how='left', left_index=True, right_index=True)
	#-Restore MultiIndex-#
	df.index = pd.MultiIndex.from_tuples(df.index, names=['country', 'year'])
	return df
Example #5
0
def productspace_5yearavg_improbableemergence(Mcp_ImProbableProducts,
                                              GDPGrowth,
                                              window=(5, 5),
                                              verbose=False):
    """ 
	Construct a table:
 					-5Y, -4Y ... 0Y ... 4Y, 5Y
	<country>  		 GDPGrowth

	Mcp_ImProbableProducts 	: Dict(pd.DataFrame(index=(country), columns=(productcode)))
	GDPGrowth 				: pd.Series(index=(country, year))
	"""
    #-Find Improbable Product Emergence Events-#
    r, i = [], []
    for idx, s in from_dict_to_dataframe(Mcp_ImProbableProducts).iterrows():
        year, country, productcode = idx
        value = s['ImProbProducts']
        if value > 0:
            i.append((year, country))
            r.append(value)  #1?
    i = pd.MultiIndex.from_tuples(i)
    df = pd.DataFrame(r, index=i)
    df.columns = ['ImProb']
    df.index.names = ['year', 'country']
    df.sort_index(inplace=True)
    #-Sum Duplicates-#
    df = df.groupby(level=['year', 'country']).sum()

    #-Find GDPGrowth n Years before and n years after
    cols = []
    for item in range(window[0], 0, -1):
        cols.append('Y(-%s)' % item)
    cols.append('Y0')
    for item in range(window[1], 0, -1):
        cols.append('Y(%s)' % item)

    cols = [
        'Y(-5)', 'Y(-4)', 'Y(-3)', 'Y(-2)', 'Y(-1)', 'Y(0)', 'Y(1)', 'Y(2)',
        'Y(3)', 'Y(4)', 'Y(5)'
    ]
    i = []
    r = []
    for idx, s in df.iterrows():
        year, country = idx
        row = []
        i.append((year, country))
        for yr in range(year - window[0] - 1, year + window[1] + 1, 1):
            if (yr, country) in i:
                continue
            if yr < 1962:
                row.append(np.nan)
                continue
            if yr > 2012:
                row.append(np.nan)
                continue
            try:
                gdpgrowth = GDPGrowth.ix[(country, yr)]
            except:
                gdpgrowth = np.nan
            row.append(gdpgrowth)
        r.append(row)
    i = pd.MultiIndex.from_tuples(i)
    df = pd.DataFrame(r, index=i, columns=cols)
    df.index.names = ['year', 'country']
    df.sort_index(inplace=True)
    return df
Example #6
0
def compute_regression_dataset(export_dataset):
    """ 
	Compute a Regression Dataset Helper Function
	"""
    #-Export System-#
    dynples = export_dataset.to_dynamic_productlevelexportsystem()
    dynples = dynples.dynamic_global_panel()
    dynples.rca_matrices(complete_data=True)
    dynples.mcp_matrices()
    dynples.proximity_matrices()

    #-Compute Product Changes-#
    Mcp_BothYears, Mcp_NewProducts, Mcp_DieProducts = dynples.compute_product_changes(
    )
    df_BothYears = reindex_dynamic_dataframe(
        from_dict_to_dataframe(Mcp_BothYears))
    Mc_BothYears = df_BothYears.groupby(level=['country', 'to_year']).sum()
    df_NewProducts = reindex_dynamic_dataframe(
        from_dict_to_dataframe(Mcp_NewProducts))
    Mc_NewProducts = df_NewProducts.groupby(level=['country', 'to_year']).sum()
    df_DieProducts = reindex_dynamic_dataframe(
        from_dict_to_dataframe(Mcp_DieProducts))
    Mc_DieProducts = df_DieProducts.groupby(level=['country', 'to_year']).sum()

    #-Compute Probable and Improbable Emergence-#
    Mc_ProbableProducts, Mc_ImProbableProducts = dynples.compute_probable_improbable_emergence(
        prox_cutoff='median', style='average')
    Mc_ProbableProducts = from_dict_of_series_to(
        Mc_ProbableProducts, series_name='ProbableProducts')
    Mc_ImProbableProducts = from_dict_of_series_to(
        Mc_ImProbableProducts, series_name='ImprobableProducts')

    #-Compute Probable\Improbable + Persistence-#
    Mcp_ProbableProducts, Mcp_ImProbableProducts = dynples.compute_probable_improbable_emergence(
        prox_cutoff='median', style='average', output='reduced')
    Mc_ProbablePersistent = from_dict_of_series_to(
        compute_persistence(dynples.mcp,
                            Mcp_ProbableProducts,
                            output="summary"),
        series_name="ProbablePersistent")
    Mc_ImProbablePersistent = from_dict_of_series_to(
        compute_persistence(dynples.mcp,
                            Mcp_ImProbableProducts,
                            output="summary"),
        series_name="ImProbablePersistent")

    #-Compute Persistence Total Products as a Check-#
    Mc_PersistentProducts = from_dict_of_series_to(compute_persistence(
        dynples.mcp,
        reindex_dynamic_dict(Mcp_NewProducts, base='finish'),
        output="summary"),
                                                   series_name="NewPersistent")

    #Compute Centrality
    AvgCentrality = dynples.compute_average_centrality(sum_not_mean=True)
    AvgCentrality = from_dict_of_series_to(AvgCentrality,
                                           series_name='AvgCentrality')

    #-Compute Diffusion Properties-#
    Mc_ProxAvgDiff, Mc_ProxVarDiff, Mc_ProxWidthDiff = compute_diffusion_properties_nx(
        dynples.mcp, dynples.proximity)
    Mc_ProxAvgDiff = from_dict_of_series_to(Mc_ProxAvgDiff,
                                            series_name='AvgProx')
    Mc_ProxVarDiff = from_dict_of_series_to(Mc_ProxVarDiff,
                                            series_name='VarProx')
    Mc_ProxWidthDiff = from_dict_of_series_to(Mc_ProxWidthDiff,
                                              series_name='WidthProx')

    #GDP and GDPPC Series
    wdi = WDI(WDI_SOURCE_DIR)
    GDP = wdi.series_long('NY.GDP.MKTP.CD')
    GDPPC = wdi.series_long('NY.GDP.PCAP.CD')
    GDPPCConst = wdi.series_long('NY.GDP.PCAP.KD')
    GDPPCPPP = wdi.series_long(
        'NY.GDP.PCAP.PP.CD')  #Only Available 1990 (USE PENN)
    GDPPCPPPConst = wdi.series_long(
        'NY.GDP.PCAP.PP.KD')  #Only Available 1990 (Use PENN)
    GDPGrowth = wdi.series_long('NY.GDP.MKTP.KD.ZG')
    GDPPCGrowth = wdi.series_long('NY.GDP.PCAP.KD.ZG')
    GNIAtlas = wdi.series_long('NY.GNP.ATLS.CD')
    GNIPPP = wdi.series_long('NY.GNP.MKTP.PP.CD')
    GNIPCAtlas = wdi.series_long('NY.GNP.PCAP.CD')
    NetBarterToT = wdi.series_long('TT.PRI.MRCH.XD.WD')
    #-Infrastructure-#
    AirDepartures = wdi.series_long('IS.AIR.DPRT')
    RailLinesKm = wdi.series_long('IS.RRS.TOTL.KM')
    ElectricityUsePC = wdi.series_long('EG.USE.ELEC.KH.PC')
    #-Others-#
    Population = wdi.series_long('SP.POP.TOTL')
    LandArea = wdi.series_long('AG.LND.TOTL.K2')

    #Trade Liberalisation Data
    trade_lib = pd.read_csv(WAZIARG_DIR + 'trade_lib_wacziarg.csv')
    trade_lib = tradelib_stats(trade_lib)  #-Add in TradeLib Stats-#
    trade_lib = trade_lib.set_index(keys=['iso3c', 'year'])

    #Merge and Export File (Country-Year Data)
    df = AvgCentrality
    for item in [
            Mc_ProbableProducts, Mc_ImProbableProducts, Mc_ProbablePersistent,
            Mc_ImProbablePersistent, Mc_ProxAvgDiff, Mc_ProxVarDiff,
            Mc_ProxWidthDiff
    ]:
        df = df.join(item, how='outer')
    #-Single Index for Merging-#
    df.index = pd.Index(df.index)
    for item in [
            GDP, GDPPC, GDPPCConst, GDPPCPPP, GDPPCPPPConst, GDPGrowth,
            GDPPCGrowth, GNIAtlas, GNIPPP, GNIPCAtlas, NetBarterToT,
            AirDepartures, RailLinesKm, ElectricityUsePC, Population, LandArea
    ]:
        item.index = pd.Index(item.index)
        df = df.merge(item, how='left', left_index=True, right_index=True)
    for item in [
            Mc_BothYears, Mc_NewProducts, Mc_DieProducts, Mc_PersistentProducts
    ]:
        item.index = pd.Index(item.index)
        df = df.merge(item, how='left', left_index=True, right_index=True)
    for item in [trade_lib]:
        item.index = pd.Index(item.index)
        df = df.merge(item, how='left', left_index=True, right_index=True)
    #-Restore MultiIndex-#
    df.index = pd.MultiIndex.from_tuples(df.index, names=['country', 'year'])
    return df