예제 #1
0
def mis_inv(ref_ai, cb_ai, inv):
    add_acc_message(f'We dropped mis_ref becuase we could not match them to Refinitiv out of '
          f'{len(ref_ai["COMPANY_ID"].unique())} companies in Refinitiv. In Crunchbase there '
          f'{len(cb_ai["Crunchbase_ID"].unique())}')
    # Get statistics on the transactsion with missing transactions with investor countries
    # create missing investor country indicator
    inv.loc[inv['investor_country'].isna(),'inv_country_missing'] =1
    inv.loc[inv['investor_country'].notna(),'inv_country_missing'] =0
    # sum transactions by missing country
    table_miss = pd.pivot_table(inv.loc[inv['Target_Region']=='USA'],  values=['inv_country_missing', 'China_inv',
                 'investment_value'],index=['Crunchbase_ID_target'], aggfunc=np.sum)
    # Calculate the number of targets with Chinese investment. To do it we aggregte all tranasctions by company,
    # to see if there are some transactsions with some
    table_miss['China_dum']= table_miss['China_inv'].apply(lambda x: 1 if x>0 else 0)
    # same to calculate if there are companies with investments from investors with unknown countries
    table_miss['Miss_inv_country_dum']= table_miss['inv_country_missing'].apply(lambda x: 1 if x>0 else 0)
    # indicator if the firm received from both investors from unknown countries and investors from China
    table_miss.loc[(table_miss['Miss_inv_country_dum'] == 1) & (table_miss['China_dum']==1), 'Both'] = 1
    table_miss.loc[table_miss['Both'].isna(), 'Both'] = 0
    # indicator if the firm received from neighter investors from unknown countries and investors from China
    table_miss.loc[(table_miss['Miss_inv_country_dum'] == 0) & (table_miss['China_dum']==0), 'Neither'] = 1
    table_miss.loc[table_miss['Neither'].isna(), 'Neither'] = 0
    # add counter variables
    table_miss['all']= 1
    sum_miss = table_miss[['China_dum', 'Miss_inv_country_dum', 'Both', 'Neither','all']].sum()
    add_acc_message(f'US companies at least one transaction involving Chinese investors or Investors from missing countries'
                    f' {sum_miss}')
    # Export average values of Chinese investment in the US.
    # Save results for China/Unknown investors
    agg_inv('sum', table_miss, ['China_dum', 'Miss_inv_country_dum', 'all'], ['Both', 'Neither'], ['investment_value']).\
        to_csv('data/china_us_val.csv')
    av_us_vc = inv.loc[(inv['Target_Region']=='USA') & (inv['investment_value'].isna()) & (inv['MA'] =='VC'),
            'investment_value_median'].mean()
    av_ch_vc = inv.loc[(inv['Target_Region']=='China') & (inv['investment_value'].isna()) & (inv['MA'] =='VC') ,
                       'investment_value_median'].mean()
    av_us_vc_med = inv.loc[(inv['Target_Region']=='USA') & (inv['investment_value'].isna()) & (inv['MA'] =='MA'),
            'investment_value_median'].mean()
    av_ch_vc_med = inv.loc[(inv['Target_Region']=='China') & (inv['investment_value'].isna()) & (inv['MA'] =='MA') ,
                           'investment_value_median'].mean()
    add_acc_message(f'Averages deal sizes with imputed missing values: US VC deal {av_us_vc}, average China VC deal '
                    f'{av_ch_vc} average US MA deal {av_us_vc_med}, average China MA deal {av_ch_vc_med}')
    return
예제 #2
0
def average_deal(inv):
    # Companies and rounds where there are investors with missing or Chinese investors
    comp_data = agg_inv('count',inv.loc[inv['Target_Region']=='USA'],['MA','China_inv'], ['inv_country_missing'], ['Crunchbase_ID_target'])
    round_data = agg_inv('count',inv.loc[inv['Target_Region']=='USA'],['MA','China_inv'], ['inv_country_missing'], ['round_id'])
    #save the results
    comp_data.to_csv('data/comp.csv')
    round_data.to_csv('data/round_data.csv')
    # Get the number of US deals
    us_inv_rnd = inv.loc[inv['Target_Region']=='USA', 'round_id'].nunique()
    print(f"Number of unique US deals {us_inv_rnd}")
    # Get the number of US companies that received investment
    c= inv.loc[inv['Target_Region']== 'USA','Crunchbase_ID_target'].nunique()
    print(f'Over the period {c} American companies received private equity investment')
    ch = inv.loc[(inv['Target_Region']== 'USA')& inv['China_inv']== 1,'Crunchbase_ID_target'].nunique()
    print(f'Over the period {ch} American companies received private equity investment  from China')
    undisc =  inv.loc[(inv['Target_Region']== 'USA')& inv['investor_country'].isna(),'Crunchbase_ID_target'].nunique()
    print(f'Over the period {undisc} American companies received private equity investment from at least one investor with'
          f'undisclosed country.')
    agg_inv('sum',inv[inv['Target_Region']== 'USA'].loc[inv['investment_value'].notna()], ['Target_Region', 'MA'], ['year'], ['investment_value'])
    # table

    return
예제 #3
0
 def test_agg_inv_sum(self):
     agg = agg_inv("sum", self.inv_for_agg, ["Target_Region", "CB"],
                   ["year"], ["investment_value"])
     self.assertEqual(
         agg.to_dict(), {
             ('investment_value', 2013): {
                 ('CHN', '1'): 10,
                 ('CHN', '2'): 20,
                 ('USA', '3'): 0,
                 ('USA', '4'): 0
             },
             ('investment_value', 2014): {
                 ('CHN', '1'): 0,
                 ('CHN', '2'): 50,
                 ('USA', '3'): 0,
                 ('USA', '4'): 0
             },
             ('investment_value', 2015): {
                 ('CHN', '1'): 0,
                 ('CHN', '2'): 0,
                 ('USA', '3'): 30,
                 ('USA', '4'): 40
             }
         })
예제 #4
0
def add_tests(inv):
    # sum investments by target and year
    sum_t = agg_inv('sum', inv.loc[inv['investment_value'].notna()], ['Target_Region', 'MA'], ['year'],
                    ['investment_value'])
    # count investments by target and year
    count_t = agg_inv('count', inv.loc[inv['investment_value'].notna()], ['Target_Region', 'MA'], ['year'],
                      ['round_id'])
    # replace zero count with 1 to avoid division  by zero
    count_t = count_t.replace(0, 1)
    # calculate average deal value
    av_deal_values = sum_t.values / count_t.values
    sum_t_av = sum_t
    sum_t_av[:] = av_deal_values
    # find missing deal vlaues
    inv.loc[inv['investment_value'].isna(), 'missing_inv_values'] = 1
    inv.loc[inv['investment_value'].notna(), 'missing_inv_values'] = 0
    # calcualte total number of missing deals with investment round as an observation.
    tot_miss = agg_inv('count', inv.loc[inv['investment_value'].isna()], ['Target_Region', 'MA'], ['year'],
                       ['round_id'])
    tot_non_miss = agg_inv('count', inv, ['Target_Region', 'MA'], ['year'], ['round_id'])
    # average number of missing values
    av_miss = tot_miss / tot_non_miss
    # report the number of missing values
    add_acc_message(f"Average share of missing investment values {av_miss}")
    # loop separtely at MA transactions:
    miss_ma_inv = inv.loc[(inv['missing_inv_values']==1) & (inv['MA']=='MA'), 'missing_inv_values'].sum()
    tot_ma = len(inv.loc[inv['MA']=='MA'])
    add_acc_message(f"Average share of missing investment values in MA transactsion is  {miss_ma_inv/tot_ma}, the number"
                    f" of missing is {miss_ma_inv} out of {tot_ma}")
    add_acc_message(
        f"We cover {inv['round_id'].count()} deals, out which {inv.loc[inv['investment_value'].isna(), 'round_id'].count()}"
        f" miss investment value")
    add_acc_message(f"We cover {inv.loc[inv['MA'] == 'VC', 'round_id'].nunique()} VC rounds, out which"
                    f" {inv.loc[(inv['investment_value'].isna()) & (inv['MA'] == 'VC'), 'round_id'].nunique()}"
                    f" miss investment value")
    add_acc_message(f"We cover {inv.loc[inv['MA'] == 'VC', 'round_id'].count()} VC rounds/Company combinations, out which"
                    f" {inv.loc[(inv['investment_value'].isna()) & (inv['MA'] == 'VC'), 'round_id'].count()}"
                    f" miss investment value")
    add_acc_message(f"We cover {inv.loc[inv['MA'] == 'MA', 'round_id'].count()} MA deals, out which"
                    f" {inv.loc[(inv['investment_value'].isna()) & (inv['MA'] == 'MA'), 'round_id'].count()}"
                    f" miss investment value")



    # Chinese->US flows by investment stage count
    count_stage_CH_US = agg_inv('count',inv.loc[(inv['MA']== 'VC') & (inv['Target_Region']=='USA')], ['investment_type',
                     'China_inv'], ['year'], ['round_id'])
    # Chinese->US flows by investment stage median value
    sum_stage_CH_US_med = agg_inv('sum',inv.loc[(inv['MA']== 'VC') & (inv['Target_Region']=='USA')],
                                  ['investment_type', 'China_inv' ], ['year'], ['investment_value_median'])
    # Chinese->US flows by investment stage disclosed value
    sum_stage_CH_US = agg_inv('sum', inv.loc[(inv['MA']== 'VC') & (inv['Target_Region']=='USA')],
                                  ['investment_type', 'China_inv' ], ['year'], ['investment_value'])
    # Undisclosed identity of the descination country indicators
    inv.loc[inv['investor_country'].isna(), 'inv_unknown'] = 1
    inv.loc[inv['investor_country'].notna(), 'inv_unknown'] = 0
    # Unknown->US flows by investment stage count
    count_stage_UKN_US = agg_inv('count',inv.loc[(inv['MA']== 'VC') & (inv['Target_Region']=='USA')], ['investment_type',
                     'inv_unknown'], ['year'], ['round_id'])
    # Unknown->US flows by investment stage median value
    sum_stage_UKN_US_med = agg_inv('sum', inv.loc[(inv['MA']== 'VC') & (inv['Target_Region']=='USA')],
                                  ['investment_type', 'inv_unknown' ], ['year'], ['investment_value_median'])
    # Unknown->US flows by investment stage disclosed value
    sum_stage_UKN_US = agg_inv('sum', inv.loc[(inv['MA']== 'VC') & (inv['Target_Region']=='USA')],
                                  ['investment_type', 'inv_unknown' ], ['year'], ['investment_value'])

    # Target country
    # Get more countries other than US, China and ROW
    inv = inv.replace('GBR', 'UK')
    inv = inv.replace('CA', 'CAN')
    inv = inv.replace('IS', 'ISR')
    inv = inv.replace('IN', 'IND')
    inv = inv.replace('DE', 'DEU')
    inv = inv.replace('SG', 'SGP')
    inv = inv.replace('JP', 'JPN')
    inv = inv.replace('FR', 'FRA')
    # Create data for the Next8 countries:
    others_countries =  inv.loc[inv['target_country'].isin(['UK', 'CAN', 'ISR','IND', 'SGP', 'JPN', 'FRA', 'DEU'])]
    count_other_majors = agg_inv('count',others_countries, ['target_country','MA'], ['year'], ['round_id'])
    # Unknown->US flows by investment stage median value
    other_majors_med = agg_inv('sum',others_countries, ['target_country','MA'], ['year'], ['investment_value_median'])
    # Unknown->US flows by investment stage disclosed value
    other_majors_val = agg_inv('sum',others_countries, ['target_country','MA'], ['year'], ['investment_value'])
    with pd.ExcelWriter('data/add_test.xlsx') as writer:  # doctest: +SKIP
        sum_t_av.to_excel(writer, sheet_name='Average_deal_value')
        av_miss.to_excel(writer, sheet_name='Share_of_missing_values')
        tot_miss.to_excel(writer, sheet_name='Total_missing_value_count')
        count_stage_CH_US.to_excel(writer, sheet_name='Count_CH_to_US')
        sum_stage_CH_US_med.to_excel(writer, sheet_name='MedVal_CH_to_US')
        sum_stage_CH_US.to_excel(writer, sheet_name='DiscVal_CH_to_US')
        count_stage_UKN_US.to_excel(writer, sheet_name='Count_UKN_to_US')
        sum_stage_UKN_US_med.to_excel(writer, sheet_name='MedVal_UKN_to_US')
        sum_stage_UKN_US.to_excel(writer, sheet_name='DiscVal_UKN_to_US')
        count_other_majors.to_excel(writer, sheet_name='Others_Count')
        other_majors_med.to_excel(writer, sheet_name='Others_Med')
        other_majors_val.to_excel(writer, sheet_name='Others_Val')
    return