def cacluate_needed_data(symb1, symb2, csv_file_full_path):

    _pairs = pair_trade.create_pairs_dataframe(setting.get_input_data_dir(),
                                               symb1, symb2)
    _pairs = _pairs.sort_values('DATE', ascending=True)
    _pairs = pair_trade.calculate_spread_zscore(_pairs, symb1, symb2)

    if ft.is_file_exists(csv_file_full_path):
        csv_pairs = ft.read_csv(csv_file_full_path)
        csv_pairs['DATE'] = pd.to_datetime(csv_pairs['DATE'])
        csv_pairs = csv_pairs.sort_values('DATE', ascending=True)
        csv_pairs.index = csv_pairs['DATE']

        last_row_date = csv_pairs.tail(1).index
        # print ('last_row_date {0}'.format(last_row_date))

        _pairs = _pairs.combine_first(csv_pairs)
        _pairs = _pairs.loc[:, [
            'OPEN_' + symb1, 'CLOSE_' + symb1, 'OPEN_' + symb2, 'CLOSE_' +
            symb2, 'saya_divide', 'saya_divide_mean', 'saya_divide_std',
            'saya_divide_sigma', 'deviation_rate(%)', 'CORR_3M', 'COINT_3M',
            'CORR_1Y', 'COINT_1Y'
        ]]

        _pairs = _pairs.sort_values('DATE', ascending=False)
        pair_back_test.set_corr_and_coint(_pairs, symb1, symb2, last_row_date)
        ft.write_csv(_pairs, csv_file_full_path)

    else:
        _pairs = _pairs.sort_values('DATE', ascending=False)
        pair_back_test.set_corr_and_coint(_pairs, symb1, symb2)
        ft.write_csv(_pairs, csv_file_full_path)
def generate_list():

    symbols = file_util.getAllTargetSymbols(setting.get_input_data_dir())
    df = pd.DataFrame(data=symbols, columns=["CODE"])

    save_file = setting.get_target_stock_data_list_file_path()
    file_util.write_csv_without_index(df, save_file)
def main(targetYear=None, targetMonth=None):
    print('Watching List data caculate main start!')
    file_name = os.path.join(setting.get_root_dir(), excel_file_name)
    workbook = openpyxl.load_workbook(file_name, data_only=True)
    sheet = workbook[sheet_name_Watching_Input]
    record_list = []
    symbols_corr_list = []

    ft.clean_target_dir(os.path.join(setting.get_watching_list_file_dir()))
    for i in range(4, sheet.max_row + 1, 1):

        record = WatchingRecord()
        code1 = str(sheet.cell(row=i, column=3).value)
        code2 = str(sheet.cell(row=i, column=7).value)
        if (code1 is None or code2 is None):
            continue
        record.code1 = code1
        record.code2 = code2
        record_list.append(record)

    for record in record_list:
        symb1 = record.code1
        symb2 = record.code2

        if (symb1 is None or symb2 is None or len(symb1) <= 0
                or len(symb2) <= 0 or symb1 == "None" or symb2 == "None"):
            continue

        _pairs = pairs_main.create_pairs_dataframe(
            setting.get_input_data_dir(), symb1, symb2)
        corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2)
        coint_3m, coint_1y = trade_util.check_cointegration(
            _pairs, symb1, symb2)
        symbols_corr_list.append(
            [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y])
        _pairs = _pairs.sort_values('DATE', ascending=True)
        _pairs = pairs_main.calculate_spread_zscore(_pairs, symb1, symb2)
        _pairs = _pairs.sort_values('DATE', ascending=False)
        file_util.write_csv(
            _pairs,
            os.path.join(setting.get_watching_list_file_dir(),
                         symb1 + '_' + symb2 + '.csv'))

    corr_data = sorted(symbols_corr_list, key=itemgetter(3),
                       reverse=True)  # sort by 3 month corr
    corr_data = pd.DataFrame(columns=[
        'SYM_A', 'SYM_B', 'CORR_3M', 'CORR_1Y', 'COINT_3M', 'COINT_1Y'
    ],
                             data=corr_data)
    # file_util.write_csv(corr_data, os.path.join(setting.get_result_dir(), corr_result_file_name))

    pairs_main.output_report(corr_data, False,
                             setting.get_watching_list_file_dir(),
                             setting.watching_corr_result_file_name)

    print('Watching List data caculate main end!')
Esempio n. 4
0
def generate_caculated_data_csv(symbols,
                                caculated_csv_path=caculated_csv_dir,
                                startdate=None,
                                enddate=None,
                                mode='insert'):
    symbol_check_dict = {}

    if (mode == 'create'):
        ft.clean_target_dir(caculated_csv_path)
    else:
        ft.create_target_dir(caculated_csv_path)

    index1 = 0
    for symb1 in symbols:
        index1 = index1 + 1
        print('Processing {0}/{1} {2}...'.format(index1, len(symbols), symb1))
        for symb2 in symbols:

            if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict
                    or (symb2 + symb1) in symbol_check_dict):
                continue
            symbol_check_dict[symb1 + symb2] = ''

            _pairs = pairs_util.create_pairs_dataframe(
                setting.get_input_data_dir(), symb1, symb2)

            if startdate is not None:
                start_date = datetime.datetime.strftime(startdate, '%Y-%m-%d')
                _pairs = _pairs[_pairs.index >= start_date]
            if enddate is not None:
                end_date = datetime.datetime.strftime(enddate, '%Y-%m-%d')
                _pairs = _pairs[_pairs.index <= end_date]
            #_pairs = _pairs[(_pairs.index >= startdate) & (_pairs.index <= enddate)]

            result_write_csv = os.path.join(caculated_csv_path,
                                            symb1 + '_' + symb2 + '.csv')
            _pairs = _pairs.sort_values('DATE', ascending=True)
            _pairs = pairs_util.calculate_spread_zscore(_pairs, symb1, symb2)

            if ft.is_file_exists(result_write_csv):
                csv_pairs = ft.read_csv(result_write_csv)
                csv_pairs['DATE'] = pd.to_datetime(csv_pairs['DATE'])
                csv_pairs = csv_pairs.sort_values('DATE', ascending=True)
                csv_pairs.index = csv_pairs['DATE']

                last_row_date = csv_pairs.tail(1).index
                # print ('last_row_date {0}'.format(last_row_date))

                _pairs = _pairs.combine_first(csv_pairs)
                result_write_csv = os.path.join(caculated_csv_path,
                                                symb1 + '_' + symb2 + '.csv')

                _pairs = _pairs.loc[:, [
                    'OPEN_' + symb1, 'CLOSE_' + symb1, 'OPEN_' +
                    symb2, 'CLOSE_' + symb2, 'saya_divide', 'saya_divide_mean',
                    'saya_divide_std', 'saya_divide_sigma',
                    'deviation_rate(%)', 'CORR_3M', 'COINT_3M', 'CORR_1Y',
                    'COINT_1Y'
                ]]

                _pairs = _pairs.sort_values('DATE', ascending=False)
                set_corr_and_coint(_pairs, symb1, symb2, last_row_date)
                ft.write_csv(_pairs, result_write_csv)

            else:
                _pairs = _pairs.sort_values('DATE', ascending=False)
                set_corr_and_coint(_pairs, symb1, symb2)
                ft.write_csv(_pairs, result_write_csv)
Esempio n. 5
0
def main(args):
    start_time = datetime.now()
    print('maint start ' + strftime("%Y-%m-%d %H:%M:%S"))

    isFastCaculateMode = False
    if (len(args) >= 2 and (args[1] == 'fast' or args[1] == 'FAST')):
        print('FAST CACULATE MODE')
        isFastCaculateMode = True

    file_util.clean_target_dir(setting.get_result_dir())

    # get all target stock symbols
    symbols = file_util.getAllTargetSymbols(setting.get_input_data_dir())

    print('Total symbols size:' + str(len(symbols)))
    index1 = 0
    symbols_corr_list = []
    symbol_check_dict = {}

    if (isFastCaculateMode == True):
        _pais = file_util.read_csv(setting.get_currenty_report_file())

        for index, row in _pais.iterrows():
            index1 = index1 + 1
            symb1 = str(int(row.SYM_A))
            symb2 = str(int(row.SYM_B))
            print('Processing {0}/{1} {2} - {3}...'.format(
                index1, len(_pais), symb1, symb2))

            _pairs = create_pairs_dataframe(setting.get_input_data_dir(),
                                            symb1, symb2)
            corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2)

            coint_3m, coint_1y = trade_util.check_cointegration(
                _pairs, symb1, symb2)

            if not is_available_pari_data(_pairs, symb1, symb2, corr_3m,
                                          corr_1y, coint_3m, coint_1y):
                continue

            symbols_corr_list.append(
                [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y])

            _pairs = _pairs.sort_values('DATE', ascending=True)
            _pairs = calculate_spread_zscore(_pairs, symb1, symb2)
            _pairs = _pairs.sort_values('DATE', ascending=False)

            file_util.write_csv(
                _pairs,
                os.path.join(setting.get_result_dir(),
                             symb1 + '_' + symb2 + '.csv'))
    else:
        for symb1 in symbols:
            index1 = index1 + 1
            print('Processing {0}/{1} {2}...'.format(index1, len(symbols),
                                                     symb1))
            for symb2 in symbols:
                # index2 =index2+1
                #  print('Processing {0}/{1}/{2} {3}-{4}...'.format(index2,index1, len(symbols), symb1, symb2))
                if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict
                        or (symb2 + symb1) in symbol_check_dict):
                    continue
                symbol_check_dict[symb1 + symb2] = ''

                _pairs = create_pairs_dataframe(setting.get_input_data_dir(),
                                                symb1, symb2)
                corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2)

                coint_3m, coint_1y = trade_util.check_cointegration(
                    _pairs, symb1, symb2)

                if not is_available_pari_data(_pairs, symb1, symb2, corr_3m,
                                              corr_1y, coint_3m, coint_1y):
                    continue

                symbols_corr_list.append(
                    [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y])

                _pairs = _pairs.sort_values('DATE', ascending=True)
                _pairs = calculate_spread_zscore(_pairs, symb1, symb2)
                _pairs = _pairs.sort_values('DATE', ascending=False)

                file_util.write_csv(
                    _pairs,
                    os.path.join(setting.get_result_dir(),
                                 symb1 + '_' + symb2 + '.csv'))

        # print(symbols_corr_list)

    corr_data = sorted(symbols_corr_list, key=itemgetter(3),
                       reverse=True)  # sort by 3 month corr
    corr_data = pd.DataFrame(columns=[
        'SYM_A', 'SYM_B', 'CORR_3M', 'CORR_1Y', 'COINT_3M', 'COINT_1Y'
    ],
                             data=corr_data)
    # file_util.write_csv(corr_data, os.path.join(setting.get_result_dir(), corr_result_file_name))

    output_report(corr_data, isFastCaculateMode, setting.get_result_dir(),
                  setting.corr_result_file_name)

    process_time = datetime.now() - start_time
    print('main end!' + strftime("%Y-%m-%d %H:%M:%S"))
    print('Time cost:{0}'.format(process_time))