Beispiel #1
0
def combine_industry_and_acquisition():
    '''
    匹配合并并购表和行业表
    '''
    company_industry_tables = re.excel_table_to_OrderedDict_bySheetName(file='company_industry_filter_sample.xls',
                                                                        by_name=u'sub_data')
    M_A_subsample_tables = re.excel_table_to_OrderedDict_bySheetName(file='merger_and_acquisition_filter_sample.xls',
                                                                     by_name=u'data')

    print('len of company_industry_tables: %d' % len(company_industry_tables))
    print('len of M_A_subsample_tables: %d' % len(M_A_subsample_tables))

    result = []
    for i in range(len(M_A_subsample_tables)):
        row_M_A = M_A_subsample_tables[i]
        for j in range(len(company_industry_tables)):
            row_C_I = company_industry_tables[j]
            if row_M_A['Symbol'] == row_C_I['Stkcd']:
                row_M_A.update(row_C_I)
                print(row_M_A)
                result.append(row_M_A)
                break;

    we.write_excel('M&A_industry_combine.xls', 'combine_data', result)
    print(len(result))
Beispiel #2
0
def filter_ceo_data():
    tables = []
    for i in range(1, 10):
        # table = re.excel_table_byname(file='CG_Director_new.xls', by_name=u'CEO' + str(i))
        table = re.excel_table_to_OrderedDict_bySheetName(file='CG_Director_ALL.xls', by_name=u'CEO' + str(i))
        print(len(table))
        tables.extend(table)
    print('len of tables: %d' % len(tables))
    return tables

    result = []
    CMList = ['10', '11', '13']
    GMList = ['30', '31', '32', '37']
    for item in tables:
        zhiwei = str(item['D0201a'])
        isCM = str(zhiwei[0:2])
        isGM1 = str(zhiwei[2:4])
        isGM2 = str(zhiwei[4:6])
        # if isCM == '11' or '30' in isGM or '32' in isGM or '37' in isGM:
        #     if '副' not in item['D0201b']:
        #         print(item)
        #         result.append(item)
        if isCM in CMList or isGM1 in GMList or isGM2 in GMList:
            # print(item)
            result.append(item)

    print(len(result))
    return result
Beispiel #3
0
def add_ceoAge_to_GMTenure():
    table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param.xls', by_name=u'data')
    CEOAge = RWTXT.txt_to_dict_list('CG_Director_ALL.txt')
    CEOAge.sort(key=itemgetter('Stkcd'), reverse=True)
    lenCEO = len(CEOAge)
    print(len(CEOAge))
    CEOAge1 = CEOAge[0:int(lenCEO / 2)]
    CEOAge2 = CEOAge[int(lenCEO / 2):int(lenCEO)]
    for item in table:
        app = OrderedDict()

        startT = time.clock()
        Ceo_Age = []
        if item['GM_Name'] != '':
            Ceo_Age = common_lib.Group_Search_dictList(CEOAge, item['StockId'], item['Year'], item['GM_Name'])
        if len(Ceo_Age) > 0:
            app['CEOAge'] = Ceo_Age[0]['D0401b']
            app['CEOSex'] = 1 if Ceo_Age[0]['D0301b'] == '男' else 0
        else:
            app['CEOAge'] = ''
            app['CEOSex'] = ''
        print(Ceo_Age)
        endT = time.clock()
        print('Running time:%s Seconds' % ((endT - startT)))
        item.update(app)
    print(table[0])
    print(len(table))
    we.write_excel('GM_Tenure_with_Param(WithCeoAge_V2).xls', 'data', table)
Beispiel #4
0
def write_txt():
    start = time.clock()
    print(start)
    tables = []
    for i in range(1, 10):
        # table = re.excel_table_byname(file='CG_Director_new.xls', by_name=u'CEO' + str(i))
        table = re.excel_table_to_OrderedDict_bySheetName(
            file='CG_Director_ALL.xls', by_name=u'CEO' + str(i))
        print(len(table))
        tables.extend(table)
    print('len of tables: %d' % len(tables))
    tables.sort(key=itemgetter('Stkcd'))
    end = time.clock()
    print(end)
    print('read excel Running time:%s Seconds' % ((end - start)))

    startX = time.clock()
    output = open('CG_Director_ALL.txt', 'w', encoding='utf-8')
    titles = ''
    titleList = list(tables[0].keys())
    titles = '|'.join(titleList)
    output.writelines(titles)
    for item in tables:
        row = list(OrderedDict(item).values())
        print(row)
        row = [str(i) if type(i) == float else i for i in row]
        # print(row)
        data = '|'.join(row)
        output.write('\n')
        output.write(data)
    output.close()
    endX = time.clock()
    print('write txt Running time:%s Seconds' % ((endX - startX)))
Beispiel #5
0
def add_ceoAge_to_GMTenure_V3():
    GM_Table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param.xls', by_name=u'data')
    mongoClient = mdb.connect_to_db()
    CG_Director_List = mdb.Get_CG_Director_ALL(mongoClient)

    for item in GM_Table:
        app = OrderedDict()

        startT = time.clock()
        Ceo_Age = []
        if item['GM_Name'] != '':
            Ceo_Age = mdb.Search_CG_Director_ALL(CG_Director_List, item['StockId'], item['Year'], item['GM_Name'])
            # Ceo_Age=CG_Director_List.find({'Stkcd': item['StockId'], 'Reptdt': item['Year']+'-12-31', 'D0101b': item['GM_Name']})
        if len(Ceo_Age) > 0:
            app['CEOAge'] = Ceo_Age[0]['D0401b']
            app['CEOSex'] = 1 if Ceo_Age[0]['D0301b'] == '男' else 0
        else:
            app['CEOAge'] = ''
            app['CEOSex'] = ''
        print(Ceo_Age)
        endT = time.clock()
        print('Running time:%s Seconds' % ((endT - startT)))

        item.update(app)
    print(GM_Table[5])
    print(len(GM_Table))
Beispiel #6
0
def sort_ceo_final_data():
    table = re.excel_table_to_OrderedDict_bySheetName(file='CEO_final_V2.xls', by_name=u'data')
    result = sorted(table, key=itemgetter('Stkcd', 'Reptdt'))
    for item in result:
        print(item)

    we.write_excel('CEO_final_V2.xls', 'data', result)
    print(len(result))
Beispiel #7
0
def add_Assets0101_to_GMTenure():
    table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param(WithCeoAge_VFinal).xls',
                                                      by_name=u'data')
    print(len(table))

    FirmAssets = re.excel_table_to_OrderedDict_bySheetName(file='company_assets_Filter(01-01).xls', by_name=u'data')
    print(len(FirmAssets))

    for item in table:
        app = OrderedDict()
        asset = [x for x in FirmAssets if
                 x['Stkcd'] == item['StockId'] and str(x['Accper']).split('-')[0] == item['Year']]
        print(asset)
        # app['TotalAsset'] = asset[0]['A001000000']
        app['TotalAsset(01-01)'] = float(max(asset, key=itemgetter('A001000000'))['A001000000'])

        item.update(app)
    print(table[0])
    print(len(table))
    we.write_excel('GM_Tenure_with_Param(WithCeoAge_VFinal).xls', 'data', table)
Beispiel #8
0
def filter_merge_acquisition():
    '''
    过滤并购数据:
    BusinessID [业务编码]:包含A=资产、B=股权,
    TradingPositionID [上市公司交易地位编码]:S3101=买方、S3106=既是买方又是标的方
    '''
    tables = re.excel_table_to_OrderedDict_bySheetName(file='merger_and_acquisition.xls', by_name=u'STK_MA_TRADINGMAIN')
    tables.extend(
        re.excel_table_to_OrderedDict_bySheetName(file='merger_and_acquisition.xls', by_name=u'STK_MA_TRADINGMAIN1'))
    print(len(tables))
    result = []
    for i in range(len(tables)):
        row = tables[i]
        if 'A' in row['BusinessID'] or 'B' in row['BusinessID']:
            if 'S3101' in row['TradingPositionID'] or 'S3106' in row['TradingPositionID']:
                print(row)
                result.append(row)

    we.write_excel('merger_and_acquisition_filter_sample.xls', 'data', result)
    print(len(result))
Beispiel #9
0
def add_ceoAge_to_GMTenure_V2():
    GM_Table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param.xls', by_name=u'data')
    CEOAge = filter_ceo_data()
    CEOAge.sort(key=itemgetter('Stkcd'))
    lenCEO = len(CEOAge)
    print(lenCEO)
    CEOAge1 = CEOAge[0:int(lenCEO / 2)]
    CEOAge2 = CEOAge[int(lenCEO / 2):int(lenCEO)]

    for item in GM_Table:
        app = OrderedDict()

        startT = time.clock()

        # Ceo_Age = [x for x in CEOAge1 if x['Stkcd'] == item['StockId'] \
        #            and str(x['Reptdt']).split('-')[0] == item['Year'] \
        #            and x['D0101b'] == item['GM_Name']]
        # if len(Ceo_Age) > 0:
        #     app['CEOAge'] = Ceo_Age[0]['D0401b']
        #     app['CEOSex'] = 1 if Ceo_Age[0]['D0301b'] == '男' else 0
        # else:
        #     Ceo_Age = [x for x in CEOAge2 if x['Stkcd'] == item['StockId'] \
        #                and str(x['Reptdt']).split('-')[0] == item['Year'] \
        #                and x['D0101b'] == item['GM_Name']]
        #     if len(Ceo_Age) > 0:
        #         app['CEOAge'] = Ceo_Age[0]['D0401b']
        #         app['CEOSex'] = 1 if Ceo_Age[0]['D0301b'] == '男' else 0
        #     else:
        #         app['CEOAge'] = ''
        #         app['CEOSex'] = ''

        if item['StockId'] == '600002':
            a = 1
            b = 2

        Ceo_Age = []
        if item['GM_Name'] != '':
            Ceo_Age = common_lib.Binary_Search_dictList(CEOAge, item['StockId'], item['Year'], item['GM_Name'])
        if len(Ceo_Age) > 0:
            app['CEOAge'] = Ceo_Age[0]['D0401b']
            app['CEOSex'] = 1 if Ceo_Age[0]['D0301b'] == '男' else 0
        else:
            app['CEOAge'] = ''
            app['CEOSex'] = ''

        print(Ceo_Age)
        endT = time.clock()
        print('Running time:%s Seconds' % ((endT - startT)))

        item.update(app)
    print(GM_Table[0])
    print(len(GM_Table))
    we.write_excel('GM_Tenure_with_Param(WithCeoAge).xls', 'data', GM_Table)
Beispiel #10
0
def add_M_A_to_GMTenure():
    table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param(WithCeoAge_VFinal).xls',
                                                      by_name=u'data')
    print(len(table))

    MACount = re.excel_table_to_OrderedDict_bySheetName(file='M&A_TimeCount.xls', by_name=u'data')
    print(len(MACount))

    for item in table:
        app = OrderedDict()
        count = [x for x in MACount if x['StockId'] == item['StockId'] and x['Year'] == item['Year']]
        print(count)

        if len(count) > 0:
            app['MATimes'] = count[0]['Times']
            app['MATotalExpenseValue'] = count[0]['TotalExpenseValue']
        else:
            app['MATimes'] = ''
            app['MATotalExpenseValue'] = ''

        item.update(app)
    print(table[0])
    print(len(table))
    we.write_excel('GM_Tenure_with_Param(WithCeoAge_VFinal2).xls', 'data', table)
Beispiel #11
0
def filter_company_industry():
    '''
    过滤公司行业数据:
    剔除B股、ST股、金融行业(行业代码J开头)
    '''
    tables = re.excel_table_to_OrderedDict_bySheetName(file='company-industry.xls', by_name=u'CG_Co')
    print(len(tables))
    result = []
    for i in range(len(tables)):
        row = tables[i]
        if 'B' not in row['Stktype'] and 'ST' not in row['Stknme'] and 'J' not in row['Nnindcd']:
            print(row)
            result.append(row)

    we.write_excel('company_industry_filter_sample.xls', 'sub_data', result)
    print(len(result))
Beispiel #12
0
def merge_M_A_by_SameTime():
    table = re.excel_table_to_OrderedDict_bySheetName(file='M&A_MergeTime.xls', by_name=u'SucceedAndNonRelevance')
    print(len(table))

    result = []
    for Symbol, items1 in groupby(table, key=itemgetter('Symbol')):
        for FirstDeclareDate, items2 in groupby(items1, key=itemgetter('FirstDeclareDate')):
            app = OrderedDict()
            app['StockId'] = Symbol
            app['FirstDeclareDate'] = FirstDeclareDate
            app['Year'] = str(FirstDeclareDate).split('-')[0]
            ExpenseValue = 0
            for i in items2:
                ExpenseValue += float(i['ExpenseValue'])
            app['ExpenseValue'] = str(ExpenseValue)
            print(app)
            result.append(app)
    we.write_excel('M&A_MergeTime.xls', 'data', result)
Beispiel #13
0
def calc_M_A_Times():
    table = re.excel_table_to_OrderedDict_bySheetName(file='M&A_MergeTime.xls', by_name=u'data')
    print(len(table))

    result = []
    for StockId, items1 in groupby(table, key=itemgetter('StockId')):
        for Year, items2 in groupby(items1, key=itemgetter('Year')):
            app = OrderedDict()
            app['StockId'] = StockId
            app['Year'] = Year
            times = 0
            TotalExpenseValue = 0
            for i in items2:
                times += 1
                TotalExpenseValue += float(i['ExpenseValue'])
            app['Times'] = str(times)
            app['TotalExpenseValue'] = str(TotalExpenseValue)
            print(app)
            result.append(app)
    we.write_excel('M&A_TimeCount.xls', 'data', result)
Beispiel #14
0
def group_M_A_by_industry():
    table = re.excel_table_to_OrderedDict_bySheetName(file='M&A_industry_combine_except2017.xls', by_name=u'data')
    result1 = []
    # Sort by the desired field first
    table.sort(key=itemgetter('Nnindcd'))
    # Iterate in groups
    for Nnindcd, items in groupby(table, key=itemgetter('Nnindcd')):
        app = OrderedDict()
        app['IndustryId'] = Nnindcd
        print(Nnindcd)
        lenItem = 0
        industry_name = ''
        for i in items:
            lenItem += 1
            industry_name = dict(i)['Nnindnme']
            # print(' ', i)
        app['Times'] = lenItem
        app['IndustryName'] = industry_name
        result1.append(app)
        print(' ', lenItem, '---' + industry_name)

    we.write_excel('M&A_groupby_industry.xls', 'data', result1)
Beispiel #15
0
def group_M_A_by_year():
    table = re.excel_table_to_OrderedDict_bySheetName(file='M&A_industry_combine_except2017.xls', by_name=u'data')
    for item in table:
        item['FirstDeclareYear'] = str(item['FirstDeclareDate']).split('-')[0]

    result = []
    # Sort by the desired field first
    table.sort(key=itemgetter('FirstDeclareYear'))
    # Iterate in groups
    for FirstDeclareYear, items in groupby(table, key=itemgetter('FirstDeclareYear')):
        app = {}
        app['Year'] = FirstDeclareYear
        print(FirstDeclareYear)
        lenItem = 0
        for i in items:
            lenItem += 1
            # print(' ', i)
        app['Times'] = lenItem
        result.append(app)
        print(' ', lenItem)

    we.write_excel('M&A_groupby_year.xls', 'data', result)
Beispiel #16
0
def deal_change_tenure_times():
    table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_VFinal.xls', by_name=u'data')
    print(len(table))

    ceo_change_times = []
    tenure_one_year = []
    tenure_under_two_year = []
    for StockId, items1 in groupby(table, key=itemgetter('StockId')):
        change_time = 0
        for GM_Name, items2 in groupby(items1, key=itemgetter('GM_Name')):
            change_time += 1
            temp_list = []
            for i in items2:
                temp_list.append(OrderedDict(i))
            if GM_Name != '' and len(temp_list) == 1:
                tenure_one_year.extend(temp_list)
            if GM_Name != '' and len(temp_list) <= 2:
                tenure_under_two_year.extend(temp_list)
        app_change_time = OrderedDict()
        app_change_time['StockId'] = StockId
        app_change_time['ChangeTime'] = change_time - 1
        ceo_change_times.append(app_change_time)

    print(len(ceo_change_times))
    print(len(tenure_one_year))
    print(len(tenure_under_two_year))

    tenure_without_one_year = [i for i in table if i not in tenure_one_year]
    tenure_without_under_two_year = [i for i in table if i not in tenure_under_two_year]
    print(len(tenure_without_one_year))
    print(len(tenure_without_under_two_year))

    we.write_excel('CEO_Change_Time.xls', 'data', ceo_change_times)
    we.write_excel('GM_Tenure_OneYear.xls', 'data', tenure_one_year)
    we.write_excel('GM_Tenure_UnderTwoYear.xls', 'data', tenure_under_two_year)
    we.write_excel('GM_Tenure_Without_OneYear.xls', 'data', tenure_without_one_year)
    we.write_excel('GM_Tenure_Without_UnderTwoYear.xls', 'data', tenure_without_under_two_year)
Beispiel #17
0
def add_ceoAge_to_GMTenure_V4():
    table = re.excel_table_to_OrderedDict_bySheetName(file='GM_Tenure_with_Param.xls', by_name=u'data')
    CEOAge = RWTXT.txt_to_dict_list('CG_Director_Age.txt')
    print(len(CEOAge))
    for item in table:
        app = OrderedDict()

        startT = time.clock()
        Ceo_Age = []
        if item['GM_Name'] != '':
            Ceo_Age = [x for x in CEOAge if x['StockId'] == item['StockId'] and x['Name'] == item['GM_Name']]
        if len(Ceo_Age) > 0:
            app['CEOAge'] = str(int(item['Year']) - int(Ceo_Age[0]['BirthYear']))
            app['CEOSex'] = Ceo_Age[0]['Sex']
        else:
            app['CEOAge'] = ''
            app['CEOSex'] = ''
        print(Ceo_Age)
        endT = time.clock()
        print('Running time:%s Seconds' % ((endT - startT)))
        item.update(app)
    print(table[0])
    print(len(table))
    we.write_excel('GM_Tenure_with_Param(WithCeoAge_VFinal3).xls', 'data', table)
def filter_data():
    tables = re.excel_table_to_OrderedDict_bySheetName(
        file='GM_Tenure_with_Param(WithCeoAge_VFinal2).xls', by_name=u'data')
    tables = [i for i in tables if i['CashRatio'] != '' and i['GM_Name'] != '']
    return tables