def main(targetYear=None, targetMonth=None):
    print('Watching List data caculate main start!')
    file_name = os.path.join(setting.get_root_dir(), excel_file_name)
    workbook = openpyxl.load_workbook(file_name, data_only=True)
    sheet = workbook[sheet_name_Watching_Input]
    record_list = []
    symbols_corr_list = []

    ft.clean_target_dir(os.path.join(setting.get_watching_list_file_dir()))
    for i in range(4, sheet.max_row + 1, 1):

        record = WatchingRecord()
        code1 = str(sheet.cell(row=i, column=3).value)
        code2 = str(sheet.cell(row=i, column=7).value)
        if (code1 is None or code2 is None):
            continue
        record.code1 = code1
        record.code2 = code2
        record_list.append(record)

    for record in record_list:
        symb1 = record.code1
        symb2 = record.code2

        if (symb1 is None or symb2 is None or len(symb1) <= 0
                or len(symb2) <= 0 or symb1 == "None" or symb2 == "None"):
            continue

        _pairs = pairs_main.create_pairs_dataframe(
            setting.get_input_data_dir(), symb1, symb2)
        corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2)
        coint_3m, coint_1y = trade_util.check_cointegration(
            _pairs, symb1, symb2)
        symbols_corr_list.append(
            [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y])
        _pairs = _pairs.sort_values('DATE', ascending=True)
        _pairs = pairs_main.calculate_spread_zscore(_pairs, symb1, symb2)
        _pairs = _pairs.sort_values('DATE', ascending=False)
        file_util.write_csv(
            _pairs,
            os.path.join(setting.get_watching_list_file_dir(),
                         symb1 + '_' + symb2 + '.csv'))

    corr_data = sorted(symbols_corr_list, key=itemgetter(3),
                       reverse=True)  # sort by 3 month corr
    corr_data = pd.DataFrame(columns=[
        'SYM_A', 'SYM_B', 'CORR_3M', 'CORR_1Y', 'COINT_3M', 'COINT_1Y'
    ],
                             data=corr_data)
    # file_util.write_csv(corr_data, os.path.join(setting.get_result_dir(), corr_result_file_name))

    pairs_main.output_report(corr_data, False,
                             setting.get_watching_list_file_dir(),
                             setting.watching_corr_result_file_name)

    print('Watching List data caculate main end!')
def outputAnalysisCsvFile(dataList):
    targetDir = os.path.join(analysis_output_csv_file_dir, runDateTime)
    ft.clean_target_dir(targetDir)
    ft.write_csv_without_index(
        dataList,
        os.path.join(targetDir, 'trade_analysis_' + runDateTime + '.csv'))

    configFile = open(
        os.path.join(analysis_output_csv_file_dir, runDateTime,
                     'config_' + runDateTime + '.txt'), "w")
    configFile.write(str(vars(config)))
    configFile.close()
def generate_portfolio_csv_file(caculated_csv_path=caculated_csv_dir,
                                portfolio_csv_path=portfolio_csv_dir):
    print('generate_portfolio_csv_file start ' + strftime("%Y-%m-%d %H:%M:%S"))
    ft.clean_target_dir(portfolio_csv_path)

    file_name_list = ft.getAllTargetSymbols(caculated_csv_path)
    index1 = 0
    for file_name in file_name_list:
        index1 = index1 + 1
        print('Processing {0}/{1}...'.format(index1, len(file_name_list)))
        _temp = file_name.split('_')
        pairs_data = ft.read_csv(
            os.path.join(caculated_csv_path, file_name + '.csv'))
        pairs_util.signal_generate(pairs_data, _temp[0], _temp[1],
                                   portfolio_csv_path)

    print('generate_portfolio_csv_file end ' + strftime("%Y-%m-%d %H:%M:%S"))
def generate_backtest_report(caculated_csv_path=caculated_csv_dir,
                             portfolio_csv_path=portfolio_csv_dir,
                             backtest_report_path=backtest_report_dir,
                             startdate=start_date,
                             enddate=end_date):
    print('generate_backtest_report start ' + strftime("%Y-%m-%d %H:%M:%S"))
    print('start_date: {0} enddate: {1}'.format(startdate, enddate))

    ft.clean_target_dir(backtest_report_dir)
    ft.clean_target_dir(backtest_report_each_day_dir)

    start_date = datetime.strptime(startdate, '%Y-%m-%d')
    end_date = datetime.strptime(enddate, '%Y-%m-%d')

    file_name_list = ft.getAllTargetSymbols(caculated_csv_path)

    for target_date in tu.date_span(start_date, end_date):
        print(target_date)

        weekno = target_date.weekday()
        if weekno < 5:
            generate_day_report(target_date, file_name_list)

    print('generate_backtest_report end ' + strftime("%Y-%m-%d %H:%M:%S"))
def generate_caculated_data_csv(symbols,
                                caculated_csv_path=caculated_csv_dir,
                                startdate=None,
                                enddate=None,
                                mode='insert'):
    symbol_check_dict = {}

    if (mode == 'create'):
        ft.clean_target_dir(caculated_csv_path)
    else:
        ft.create_target_dir(caculated_csv_path)

    index1 = 0
    for symb1 in symbols:
        index1 = index1 + 1
        print('Processing {0}/{1} {2}...'.format(index1, len(symbols), symb1))
        for symb2 in symbols:

            if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict
                    or (symb2 + symb1) in symbol_check_dict):
                continue
            symbol_check_dict[symb1 + symb2] = ''

            _pairs = pairs_util.create_pairs_dataframe(
                setting.get_input_data_dir(), symb1, symb2)

            if startdate is not None:
                start_date = datetime.datetime.strftime(startdate, '%Y-%m-%d')
                _pairs = _pairs[_pairs.index >= start_date]
            if enddate is not None:
                end_date = datetime.datetime.strftime(enddate, '%Y-%m-%d')
                _pairs = _pairs[_pairs.index <= end_date]
            #_pairs = _pairs[(_pairs.index >= startdate) & (_pairs.index <= enddate)]

            result_write_csv = os.path.join(caculated_csv_path,
                                            symb1 + '_' + symb2 + '.csv')
            _pairs = _pairs.sort_values('DATE', ascending=True)
            _pairs = pairs_util.calculate_spread_zscore(_pairs, symb1, symb2)

            if ft.is_file_exists(result_write_csv):
                csv_pairs = ft.read_csv(result_write_csv)
                csv_pairs['DATE'] = pd.to_datetime(csv_pairs['DATE'])
                csv_pairs = csv_pairs.sort_values('DATE', ascending=True)
                csv_pairs.index = csv_pairs['DATE']

                last_row_date = csv_pairs.tail(1).index
                # print ('last_row_date {0}'.format(last_row_date))

                _pairs = _pairs.combine_first(csv_pairs)
                result_write_csv = os.path.join(caculated_csv_path,
                                                symb1 + '_' + symb2 + '.csv')

                _pairs = _pairs.loc[:, [
                    'OPEN_' + symb1, 'CLOSE_' + symb1, 'OPEN_' +
                    symb2, 'CLOSE_' + symb2, 'saya_divide', 'saya_divide_mean',
                    'saya_divide_std', 'saya_divide_sigma',
                    'deviation_rate(%)', 'CORR_3M', 'COINT_3M', 'CORR_1Y',
                    'COINT_1Y'
                ]]

                _pairs = _pairs.sort_values('DATE', ascending=False)
                set_corr_and_coint(_pairs, symb1, symb2, last_row_date)
                ft.write_csv(_pairs, result_write_csv)

            else:
                _pairs = _pairs.sort_values('DATE', ascending=False)
                set_corr_and_coint(_pairs, symb1, symb2)
                ft.write_csv(_pairs, result_write_csv)
def pre_process():
    if (config.clear_all_analysis_data):
        _logger.info("Clear all analysis old data under {}".format(
            analysis_output_csv_file_dir))
        ft.clean_target_dir(analysis_output_csv_file_dir)
def main(targetYear=None, targetMonth=None):
    print('Dry Run Positin / History Aalysis main start!')
    file_name = os.path.join(setting.get_root_dir(), excel_file_name)
    workbook = openpyxl.load_workbook(file_name, data_only=True)
    sheet = workbook[sheet_name_history]
    record_list = []

    ft.clean_target_dir(setting.get_dryrun_trade_open_dir())
    for i in range(4, sheet.max_row + 1, 2):
        record = TradeRecord()
        year = sheet.cell(row=i, column=27).value
        if (year is None):
            continue
        record.year = str(year)
        record.month = str(sheet.cell(row=i, column=28).value)
        record.sellCode = str(sheet.cell(row=i, column=6).value)
        record.buyCode = str(sheet.cell(row=i + 1, column=6).value)
        record_list.append(record)

    open_position_sheet = workbook[sheet_name_open_position]
    for i in range(4, open_position_sheet.max_row + 1, 2):

        if (open_position_sheet.cell(row=i, column=6).value is None):
            break

        record = TradeRecord()
        record.sellCode = str(open_position_sheet.cell(row=i, column=6).value)
        record.buyCode = str(
            open_position_sheet.cell(row=i + 1, column=6).value)
        record.kbn = 'open'
        record_list.append(record)

    for record in record_list:
        symb1 = record.sellCode
        symb2 = record.buyCode
        if record.sellCode > record.buyCode:
            symb1 = record.buyCode
            symb2 = record.sellCode
        file_name = symb1 + '_' + symb2 + '.csv'

        if (record.kbn != 'open'):
            target_ymd = datetime.datetime(year=int(record.year),
                                           month=int(record.month),
                                           day=1)
            ymd_edit = target_ymd + relativedelta(months=2)
            if (datetime.datetime.now() > ymd_edit):
                continue

        if (record.kbn == 'open'):
            ft.create_target_dir(
                os.path.join(setting.get_dryrun_trade_dir(), record.kbn))
            csv_file_full_path = os.path.join(setting.get_dryrun_trade_dir(),
                                              record.kbn, file_name)
        else:
            ft.create_target_dir(
                os.path.join(setting.get_dryrun_trade_history_dir(),
                             record.year))
            ft.create_target_dir(
                os.path.join(setting.get_dryrun_trade_history_dir(),
                             record.year, record.month))
            csv_file_full_path = os.path.join(
                setting.get_dryrun_trade_history_dir(), record.year,
                record.month, file_name)

        cacluate_needed_data(symb1, symb2, csv_file_full_path)

    print('Dry Run Positin / History Aalysis main end!')
def generate_input_stock_data(target_date):

    target_download_csv_file_path = setting.get_target_download_csv_file_path(target_date)

    if (file_util.is_file_exists(target_download_csv_file_path)):
        print('Generating input target stock data start.')
        target_stock_data_list = file_util.read_csv(setting.get_target_stock_data_list_file_path())
        # print(target_stock_data_list)
        # print(target_stock_data_list['CODE'][0])

        download_stock_data =file_util.read_csv(target_download_csv_file_path)
        columns=['DATE', 'CODE', 'KBN', 'OPEN', 'HIGH', 'LOW', 'CLOSE','Volume']
        download_stock_data.columns = columns
        # print(download_stock_data)
        # print(download_stock_data.dtypes)

        file_util.clean_target_dir(setting.get_generated_input_target_stock_data_dir())

        for index, data_row in target_stock_data_list.iterrows():
            symb = str(data_row['CODE'])
            print('symb:'+ symb)

            data_csv_file = os.path.join(setting.get_org_all_stock_data_file_dir(), symb + '.csv')
            symb_df = file_util.read_csv(data_csv_file)
            # symb_df['DATE'] = symb_df.to_datetime(symb_df['DATE'])
            newest_date = symb_df['DATE'][0]
            # print('newest_date:' + newest_date)

            searched_data = download_stock_data[download_stock_data['CODE'] == data_row['CODE']]

            if searched_data.empty:
                continue
            # print(searched_data['HIGH'])
            # print(searched_data.iloc[0]['HIGH'])

            target_date_time = datetime.strptime(target_date, '%Y%m%d')
            insert_date = target_date_time.strftime("%Y-%m-%d")
            # print(searched_data)

            insert_value = [[insert_date, searched_data.iloc[0]['OPEN'], searched_data.iloc[0]['HIGH'], searched_data.iloc[0]['LOW'], searched_data.iloc[0]['CLOSE'], searched_data.iloc[0]['Volume']]]
            _tmp_df = pd.DataFrame(data=insert_value, columns=['DATE', 'OPEN', 'HIGH','LOW', 'CLOSE','Volume'])
            symb_df = pd.concat([_tmp_df, symb_df], sort=True)
            symb_df = symb_df.loc[:, ['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'Volume']]

            symb_df['DATE'] = pd.to_datetime(symb_df['DATE'])
            symb_df.index = symb_df['DATE']
            symb_df = symb_df.drop_duplicates(keep='first')

            symb_df['OPEN'] = symb_df['OPEN'].astype(str).replace('\.0', '', regex=True)
            symb_df['HIGH'] = symb_df['HIGH'].astype(str).replace('\.0', '', regex=True)
            symb_df['LOW'] = symb_df['LOW'].astype(str).replace('\.0', '', regex=True)
            symb_df['CLOSE'] = symb_df['CLOSE'].astype(str).replace('\.0', '', regex=True)
            symb_df['Volume'] = symb_df['Volume'].astype(str).replace('\.0', '', regex=True)

            file_util.write_csv_without_index(symb_df, data_csv_file)

            target_data_span = target_date_time - relativedelta(years=INPUT_TARGET_DATA_YEAR_SPAN)
            target_data_span = target_data_span - relativedelta(days=1)
            # symb_df['DATE'] = pd.to_datetime(symb_df['DATE'])
            # symb_df.index = symb_df['DATE']
            target_data = symb_df[symb_df.index > target_data_span]

            # print(symb_df.info)
            save_file_path = os.path.join(setting.get_generated_input_target_stock_data_dir(), symb + '.csv')
            file_util.write_csv_without_index(target_data, save_file_path)

        generate_tracking_file(target_date)
        print('Generating input target stock data end.')
    else:
        print('No target download CSV file exists. file=' + target_download_csv_file_path)
Beispiel #9
0
def main(args):
    start_time = datetime.now()
    print('maint start ' + strftime("%Y-%m-%d %H:%M:%S"))

    isFastCaculateMode = False
    if (len(args) >= 2 and (args[1] == 'fast' or args[1] == 'FAST')):
        print('FAST CACULATE MODE')
        isFastCaculateMode = True

    file_util.clean_target_dir(setting.get_result_dir())

    # get all target stock symbols
    symbols = file_util.getAllTargetSymbols(setting.get_input_data_dir())

    print('Total symbols size:' + str(len(symbols)))
    index1 = 0
    symbols_corr_list = []
    symbol_check_dict = {}

    if (isFastCaculateMode == True):
        _pais = file_util.read_csv(setting.get_currenty_report_file())

        for index, row in _pais.iterrows():
            index1 = index1 + 1
            symb1 = str(int(row.SYM_A))
            symb2 = str(int(row.SYM_B))
            print('Processing {0}/{1} {2} - {3}...'.format(
                index1, len(_pais), symb1, symb2))

            _pairs = create_pairs_dataframe(setting.get_input_data_dir(),
                                            symb1, symb2)
            corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2)

            coint_3m, coint_1y = trade_util.check_cointegration(
                _pairs, symb1, symb2)

            if not is_available_pari_data(_pairs, symb1, symb2, corr_3m,
                                          corr_1y, coint_3m, coint_1y):
                continue

            symbols_corr_list.append(
                [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y])

            _pairs = _pairs.sort_values('DATE', ascending=True)
            _pairs = calculate_spread_zscore(_pairs, symb1, symb2)
            _pairs = _pairs.sort_values('DATE', ascending=False)

            file_util.write_csv(
                _pairs,
                os.path.join(setting.get_result_dir(),
                             symb1 + '_' + symb2 + '.csv'))
    else:
        for symb1 in symbols:
            index1 = index1 + 1
            print('Processing {0}/{1} {2}...'.format(index1, len(symbols),
                                                     symb1))
            for symb2 in symbols:
                # index2 =index2+1
                #  print('Processing {0}/{1}/{2} {3}-{4}...'.format(index2,index1, len(symbols), symb1, symb2))
                if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict
                        or (symb2 + symb1) in symbol_check_dict):
                    continue
                symbol_check_dict[symb1 + symb2] = ''

                _pairs = create_pairs_dataframe(setting.get_input_data_dir(),
                                                symb1, symb2)
                corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2)

                coint_3m, coint_1y = trade_util.check_cointegration(
                    _pairs, symb1, symb2)

                if not is_available_pari_data(_pairs, symb1, symb2, corr_3m,
                                              corr_1y, coint_3m, coint_1y):
                    continue

                symbols_corr_list.append(
                    [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y])

                _pairs = _pairs.sort_values('DATE', ascending=True)
                _pairs = calculate_spread_zscore(_pairs, symb1, symb2)
                _pairs = _pairs.sort_values('DATE', ascending=False)

                file_util.write_csv(
                    _pairs,
                    os.path.join(setting.get_result_dir(),
                                 symb1 + '_' + symb2 + '.csv'))

        # print(symbols_corr_list)

    corr_data = sorted(symbols_corr_list, key=itemgetter(3),
                       reverse=True)  # sort by 3 month corr
    corr_data = pd.DataFrame(columns=[
        'SYM_A', 'SYM_B', 'CORR_3M', 'CORR_1Y', 'COINT_3M', 'COINT_1Y'
    ],
                             data=corr_data)
    # file_util.write_csv(corr_data, os.path.join(setting.get_result_dir(), corr_result_file_name))

    output_report(corr_data, isFastCaculateMode, setting.get_result_dir(),
                  setting.corr_result_file_name)

    process_time = datetime.now() - start_time
    print('main end!' + strftime("%Y-%m-%d %H:%M:%S"))
    print('Time cost:{0}'.format(process_time))