def create_pairs_dataframe(data_dir, symbol1, symbol2):
    # print("Importing CSV data...")
    # print("Constructing dual matrix for s% and s%" % (symbol1, symbol2))

    sym1 = file_util.read_csv(os.path.join(data_dir, symbol1 + '.csv'))
    sym2 = file_util.read_csv(os.path.join(data_dir, symbol2 + '.csv'))

    sym1.rename(columns={
        'OPEN': 'OPEN_' + symbol1,
        'CLOSE': 'CLOSE_' + symbol1
    },
                inplace=True)
    sym2.rename(columns={
        'OPEN': 'OPEN_' + symbol2,
        'CLOSE': 'CLOSE_' + symbol2
    },
                inplace=True)

    pairs = pd.merge(sym1, sym2, on='DATE', how='left')
    pairs['DATE'] = pd.to_datetime(pairs['DATE'])
    pairs.index = pairs['DATE']

    pairs = pairs.drop(
        ['DATE', 'HIGH_x', 'HIGH_y', 'LOW_x', 'LOW_y', 'Volume_x', 'Volume_y'],
        axis=1)
    pairs = pairs.dropna()

    return pairs
Exemple #2
0
def get_daily_stock_data(code, target_ymd):
    """
    Get target daily stock date by code and date(YYYYmmDD)

    """
    target_download_csv_file_path = setting.get_target_download_csv_file_path(
        target_ymd)
    if (not file_util.is_file_exists(target_download_csv_file_path)):
        data_download.main(target_ymd)

    if (not file_util.is_file_exists(target_download_csv_file_path)):
        print("No target data")
        return None
    else:
        download_daily_stock_data = file_util.read_csv(
            target_download_csv_file_path)
        # columns=['DATE', 'CODE', 'KBN', 'OPEN', 'HIGH', 'LOW', 'CLOSE','Volume']
        download_daily_stock_data.columns = [
            'DATE', 'CODE', 'KBN', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'Volume'
        ]
        searched_data = download_daily_stock_data[
            download_daily_stock_data['CODE'] == code]
        searched_data['Volume'] = searched_data['Volume'].astype(float)
        searched_data = searched_data.drop('KBN', 1)

        return searched_data
def cacluate_needed_data(symb1, symb2, csv_file_full_path):

    _pairs = pair_trade.create_pairs_dataframe(setting.get_input_data_dir(),
                                               symb1, symb2)
    _pairs = _pairs.sort_values('DATE', ascending=True)
    _pairs = pair_trade.calculate_spread_zscore(_pairs, symb1, symb2)

    if ft.is_file_exists(csv_file_full_path):
        csv_pairs = ft.read_csv(csv_file_full_path)
        csv_pairs['DATE'] = pd.to_datetime(csv_pairs['DATE'])
        csv_pairs = csv_pairs.sort_values('DATE', ascending=True)
        csv_pairs.index = csv_pairs['DATE']

        last_row_date = csv_pairs.tail(1).index
        # print ('last_row_date {0}'.format(last_row_date))

        _pairs = _pairs.combine_first(csv_pairs)
        _pairs = _pairs.loc[:, [
            'OPEN_' + symb1, 'CLOSE_' + symb1, 'OPEN_' + symb2, 'CLOSE_' +
            symb2, 'saya_divide', 'saya_divide_mean', 'saya_divide_std',
            'saya_divide_sigma', 'deviation_rate(%)', 'CORR_3M', 'COINT_3M',
            'CORR_1Y', 'COINT_1Y'
        ]]

        _pairs = _pairs.sort_values('DATE', ascending=False)
        pair_back_test.set_corr_and_coint(_pairs, symb1, symb2, last_row_date)
        ft.write_csv(_pairs, csv_file_full_path)

    else:
        _pairs = _pairs.sort_values('DATE', ascending=False)
        pair_back_test.set_corr_and_coint(_pairs, symb1, symb2)
        ft.write_csv(_pairs, csv_file_full_path)
def generate_day_report(target_date, file_name_list):

    day_report_file_name = os.path.join(
        backtest_report_each_day_dir,
        target_date.strftime("%Y-%m-%d") + '.csv')

    # temp_data = ft.read_csv(os.path.join(caculated_csv_dir, file_name_list[0] + '.csv'))
    # day_report_data = pd.DataFrame([], columns=temp_data.columns)
    day_report_data = pd.DataFrame()

    for file_name in file_name_list:

        _temp = file_name.split('_')
        symb1 = _temp[0]
        symb2 = _temp[1]

        pairs_data = ft.read_csv(
            os.path.join(caculated_csv_dir, file_name + '.csv'))
        pairs_data['DATE'] = pd.to_datetime(pairs_data['DATE'])
        pairs_data.index = pairs_data['DATE']
        search_data = pairs_data[target_date:target_date]
        if search_data.empty:
            # print('empty {0}'.format(target_date))
            return

        search_data1 = search_data.copy(deep=True)
        search_data1.rename(columns={
            'OPEN_' + symb1: 'OPEN_A',
            'CLOSE_' + symb1: 'CLOSE_A',
            'OPEN_' + symb2: 'OPEN_B',
            'CLOSE_' + symb2: 'CLOSE_B'
        },
                            inplace=True)

        search_data1['SYM_A'] = symb1
        search_data1['SYM_B'] = symb2

        search_data1 = search_data1.loc[:, [
            'SYM_A', 'OPEN_A', 'CLOSE_A', 'SYM_B', 'OPEN_B', 'CLOSE_B',
            'saya_divide', 'saya_divide_mean', 'saya_divide_std',
            'saya_divide_sigma', 'deviation_rate(%)', 'CORR_3M', 'COINT_3M',
            'CORR_1Y', 'COINT_1Y'
        ]]

        if search_data1.at[target_date, 'CORR_3M'] < CORR_THRE_SHOLD_THREE_MONTH \
                or search_data.at[target_date, 'CORR_1Y'] < CORR_THRE_SHOLD_ONE_YEAR:
            continue

        if search_data1.at[target_date, 'COINT_3M'] > COINT_MAX_VAL \
            or search_data.at[target_date, 'COINT_1Y'] > COINT_MAX_VAL:
            continue

        day_report_data = day_report_data.append(search_data1)

    if not day_report_data.empty:
        ft.write_csv(day_report_data, day_report_file_name)
Exemple #5
0
def get_all_codes():
    """
    Get all target stock codes

    """
    # return DB_CONN.basic.distinct('code')
    target_stock_data_list = file_util.read_csv(
        setting.get_target_stock_data_list_file_path())
    # print(target_stock_data_list)
    return target_stock_data_list['CODE'].tolist()
def generate_portfolio_csv_file(caculated_csv_path=caculated_csv_dir,
                                portfolio_csv_path=portfolio_csv_dir):
    print('generate_portfolio_csv_file start ' + strftime("%Y-%m-%d %H:%M:%S"))
    ft.clean_target_dir(portfolio_csv_path)

    file_name_list = ft.getAllTargetSymbols(caculated_csv_path)
    index1 = 0
    for file_name in file_name_list:
        index1 = index1 + 1
        print('Processing {0}/{1}...'.format(index1, len(file_name_list)))
        _temp = file_name.split('_')
        pairs_data = ft.read_csv(
            os.path.join(caculated_csv_path, file_name + '.csv'))
        pairs_util.signal_generate(pairs_data, _temp[0], _temp[1],
                                   portfolio_csv_path)

    print('generate_portfolio_csv_file end ' + strftime("%Y-%m-%d %H:%M:%S"))
def getTargetTradeData(year, month, sellCode, buyCode):
    csvFile = sellCode + "_" + buyCode + '.csv'
    if (sellCode > buyCode):
        csvFile = buyCode + "_" + sellCode + '.csv'
    targetFilePath = os.path.join(setting.get_trade_dir(), year, month,
                                  csvFile)
    if not os.path.exists(targetFilePath):
        _logger.error(
            "No target csv file exists. {0} - {1}...{2}/{3}...{4}".format(
                sellCode, buyCode, year, month, targetFilePath))
        return

        trade_data = None
    try:
        trade_data = ft.read_csv(targetFilePath)
        trade_data['DATE'] = pd.to_datetime(trade_data['DATE'])
        trade_data.index = trade_data['DATE']
        trade_data = trade_data.sort_index(ascending=True)
    except:
        _logger.error("Error. {0} - {1}...{2}/{3}...{4}".format(
            sellCode, buyCode, year, month, targetFilePath))

    return trade_data
def generate_trade_report(history_data_file_full_path):

    trade_history_data = ft.read_csv(history_data_file_full_path)
    trade_history_data['open_date'] = pd.to_datetime(
        trade_history_data['open_date'])

    if config.backtest_start_ymd is not None:
        trade_history_data = trade_history_data[
            trade_history_data.close > config.backtest_start_ymd]

    if config.backtest_end_ymd is not None:
        trade_history_data = trade_history_data[
            trade_history_data.close < config.backtest_end_ymd]

    outputList = pd.DataFrame(columns=record_column())

    for index, row in trade_history_data.iterrows():
        year = str(int(row.YEAR))
        month = str(int(row.MONTH))
        sellCode = str(int(row.sellCode))
        buytCode = str(int(row.buyCode))
        _logger.debug("Processing {0}/{1} {2} - {3}...{4}/{5}".format(
            index + 1, len(trade_history_data), sellCode, buytCode, year,
            month))

        trade_data = getTargetTradeData(year, month, sellCode, buytCode)
        if trade_data is None:
            _logger.error("No target data found. {0} - {1}...{2}".format(
                sellCode, buytCode, year + month))
            continue
        search_data = trade_data.loc[trade_data['DATE'] >= row.open_date]
        result_row = generate_result(search_data, sellCode, buytCode, row)
        print(result_row)
        outputList = outputList.append(result_row, ignore_index=True)

    outputAnalysisCsvFile(outputList)
def generate_caculated_data_csv(symbols,
                                caculated_csv_path=caculated_csv_dir,
                                startdate=None,
                                enddate=None,
                                mode='insert'):
    symbol_check_dict = {}

    if (mode == 'create'):
        ft.clean_target_dir(caculated_csv_path)
    else:
        ft.create_target_dir(caculated_csv_path)

    index1 = 0
    for symb1 in symbols:
        index1 = index1 + 1
        print('Processing {0}/{1} {2}...'.format(index1, len(symbols), symb1))
        for symb2 in symbols:

            if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict
                    or (symb2 + symb1) in symbol_check_dict):
                continue
            symbol_check_dict[symb1 + symb2] = ''

            _pairs = pairs_util.create_pairs_dataframe(
                setting.get_input_data_dir(), symb1, symb2)

            if startdate is not None:
                start_date = datetime.datetime.strftime(startdate, '%Y-%m-%d')
                _pairs = _pairs[_pairs.index >= start_date]
            if enddate is not None:
                end_date = datetime.datetime.strftime(enddate, '%Y-%m-%d')
                _pairs = _pairs[_pairs.index <= end_date]
            #_pairs = _pairs[(_pairs.index >= startdate) & (_pairs.index <= enddate)]

            result_write_csv = os.path.join(caculated_csv_path,
                                            symb1 + '_' + symb2 + '.csv')
            _pairs = _pairs.sort_values('DATE', ascending=True)
            _pairs = pairs_util.calculate_spread_zscore(_pairs, symb1, symb2)

            if ft.is_file_exists(result_write_csv):
                csv_pairs = ft.read_csv(result_write_csv)
                csv_pairs['DATE'] = pd.to_datetime(csv_pairs['DATE'])
                csv_pairs = csv_pairs.sort_values('DATE', ascending=True)
                csv_pairs.index = csv_pairs['DATE']

                last_row_date = csv_pairs.tail(1).index
                # print ('last_row_date {0}'.format(last_row_date))

                _pairs = _pairs.combine_first(csv_pairs)
                result_write_csv = os.path.join(caculated_csv_path,
                                                symb1 + '_' + symb2 + '.csv')

                _pairs = _pairs.loc[:, [
                    'OPEN_' + symb1, 'CLOSE_' + symb1, 'OPEN_' +
                    symb2, 'CLOSE_' + symb2, 'saya_divide', 'saya_divide_mean',
                    'saya_divide_std', 'saya_divide_sigma',
                    'deviation_rate(%)', 'CORR_3M', 'COINT_3M', 'CORR_1Y',
                    'COINT_1Y'
                ]]

                _pairs = _pairs.sort_values('DATE', ascending=False)
                set_corr_and_coint(_pairs, symb1, symb2, last_row_date)
                ft.write_csv(_pairs, result_write_csv)

            else:
                _pairs = _pairs.sort_values('DATE', ascending=False)
                set_corr_and_coint(_pairs, symb1, symb2)
                ft.write_csv(_pairs, result_write_csv)
Exemple #10
0
                                           'CLOSE_' + symblB]
    else:
        CLOSE_A_1Y_ago = 0
        CLOSE_B_1Y_ago = 0

    two_year_ago = datetime.today() - relativedelta(years=2)
    two_year_data = pairs[pairs.index > two_year_ago]
    date_2y_ago = two_year_ago.strftime('%Y/%m/%d')
    if not two_year_data.empty:
        CLOSE_A_2Y_ago = two_year_data.loc[two_year_data.index[-1],
                                           'CLOSE_' + symblA]
        CLOSE_B_2Y_ago = two_year_data.loc[two_year_data.index[-1],
                                           'CLOSE_' + symblB]
    else:
        CLOSE_A_2Y_ago = 0
        CLOSE_B_2Y_ago = 0

    return date_3m_ago, CLOSE_A_3M_ago, CLOSE_B_3M_ago, date_6m_ago, CLOSE_A_6M_ago, CLOSE_B_6M_ago, date_1y_ago,\
           CLOSE_A_1Y_ago, CLOSE_B_1Y_ago, date_2y_ago, CLOSE_A_2Y_ago, CLOSE_B_2Y_ago


if __name__ == '__main__':
    #result=get_lot_size(1401,2000)
    #print(result)
    symblA = '9513'
    symblB = '9810'
    _file = os.path.join(setting.get_result_dir(),
                         symblA + '_' + symblB + '.csv')
    _df = file_util.read_csv(_file)
    print(get_before_close_price_data(_df, symblA, symblB))
def generate_input_stock_data(target_date):

    target_download_csv_file_path = setting.get_target_download_csv_file_path(target_date)

    if (file_util.is_file_exists(target_download_csv_file_path)):
        print('Generating input target stock data start.')
        target_stock_data_list = file_util.read_csv(setting.get_target_stock_data_list_file_path())
        # print(target_stock_data_list)
        # print(target_stock_data_list['CODE'][0])

        download_stock_data =file_util.read_csv(target_download_csv_file_path)
        columns=['DATE', 'CODE', 'KBN', 'OPEN', 'HIGH', 'LOW', 'CLOSE','Volume']
        download_stock_data.columns = columns
        # print(download_stock_data)
        # print(download_stock_data.dtypes)

        file_util.clean_target_dir(setting.get_generated_input_target_stock_data_dir())

        for index, data_row in target_stock_data_list.iterrows():
            symb = str(data_row['CODE'])
            print('symb:'+ symb)

            data_csv_file = os.path.join(setting.get_org_all_stock_data_file_dir(), symb + '.csv')
            symb_df = file_util.read_csv(data_csv_file)
            # symb_df['DATE'] = symb_df.to_datetime(symb_df['DATE'])
            newest_date = symb_df['DATE'][0]
            # print('newest_date:' + newest_date)

            searched_data = download_stock_data[download_stock_data['CODE'] == data_row['CODE']]

            if searched_data.empty:
                continue
            # print(searched_data['HIGH'])
            # print(searched_data.iloc[0]['HIGH'])

            target_date_time = datetime.strptime(target_date, '%Y%m%d')
            insert_date = target_date_time.strftime("%Y-%m-%d")
            # print(searched_data)

            insert_value = [[insert_date, searched_data.iloc[0]['OPEN'], searched_data.iloc[0]['HIGH'], searched_data.iloc[0]['LOW'], searched_data.iloc[0]['CLOSE'], searched_data.iloc[0]['Volume']]]
            _tmp_df = pd.DataFrame(data=insert_value, columns=['DATE', 'OPEN', 'HIGH','LOW', 'CLOSE','Volume'])
            symb_df = pd.concat([_tmp_df, symb_df], sort=True)
            symb_df = symb_df.loc[:, ['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'Volume']]

            symb_df['DATE'] = pd.to_datetime(symb_df['DATE'])
            symb_df.index = symb_df['DATE']
            symb_df = symb_df.drop_duplicates(keep='first')

            symb_df['OPEN'] = symb_df['OPEN'].astype(str).replace('\.0', '', regex=True)
            symb_df['HIGH'] = symb_df['HIGH'].astype(str).replace('\.0', '', regex=True)
            symb_df['LOW'] = symb_df['LOW'].astype(str).replace('\.0', '', regex=True)
            symb_df['CLOSE'] = symb_df['CLOSE'].astype(str).replace('\.0', '', regex=True)
            symb_df['Volume'] = symb_df['Volume'].astype(str).replace('\.0', '', regex=True)

            file_util.write_csv_without_index(symb_df, data_csv_file)

            target_data_span = target_date_time - relativedelta(years=INPUT_TARGET_DATA_YEAR_SPAN)
            target_data_span = target_data_span - relativedelta(days=1)
            # symb_df['DATE'] = pd.to_datetime(symb_df['DATE'])
            # symb_df.index = symb_df['DATE']
            target_data = symb_df[symb_df.index > target_data_span]

            # print(symb_df.info)
            save_file_path = os.path.join(setting.get_generated_input_target_stock_data_dir(), symb + '.csv')
            file_util.write_csv_without_index(target_data, save_file_path)

        generate_tracking_file(target_date)
        print('Generating input target stock data end.')
    else:
        print('No target download CSV file exists. file=' + target_download_csv_file_path)
def output_report(corr_df, isFastCaculateMode, resultDir,
                  corr_result_file_name):

    print('Output Report Processing...')
    timestr = time.strftime("%Y%m%d-%H%M%S")
    report_file = os.path.join(resultDir, 'report_' + timestr + '.xlsx')

    # corr_df = file_util.read_csv(os.path.join(setting.get_result_dir(), corr_result_file_name))
    master_df = file_util.read_csv(os.path.join(setting.get_master_file_dir()))
    corr_df = trade_util.addMasterInfo(corr_df, master_df)

    corr_df_new = corr_df.copy(deep=True)

    OPEN_A_list = []
    CLOSE_A_list = []
    OPEN_B_list = []
    CLOSE_B_list = []
    SIGMA = []
    ABS_SIGMA = []
    LAST_DAY_SIGMA = []
    TRADE_A = []
    TRADE_B = []
    DEV_RATE = []
    AXIS_LOT_SIZE = []
    PAIR_LOT_SIZE = []
    LOT_SIZE_DIFF = []

    total_profit_list = []
    average_profit_list = []
    average_plt_list = []
    total_times_list = []
    plus_times_list = []
    minus_times_list = []
    pl_times_list = []
    open_days_list = []
    stop_profit_times_list = []
    stop_loss_times_list = []
    max_day_over_times_list = []

    DATE_3M_ago_list = []
    CLOSE_A_3M_ago_list = []
    CLOSE_B_3M_ago_list = []

    DATE_6M_ago_list = []
    CLOSE_A_6M_ago_list = []
    CLOSE_B_6M_ago_list = []

    DATE_1y_ago_list = []
    CLOSE_A_1y_ago_list = []
    CLOSE_B_1y_ago_list = []

    DATE_2y_ago_list = []
    CLOSE_A_2y_ago_list = []
    CLOSE_B_2y_ago_list = []

    index1 = 0
    for index, row in corr_df.iterrows():
        # print('row.SYM_A:'+str(int(row.SYM_A)))
        symblA = str(int(row.SYM_A))
        symblB = str(int(row.SYM_B))
        # print('symblA=%s symblB=%s' % (symblA,symblB))
        index1 = index1 + 1
        print('Processing {0}/{1} {2} - {3}...'.format(index1, len(corr_df),
                                                       symblA, symblB))

        try:
            _file = os.path.join(resultDir, symblA + '_' + symblB + '.csv')
            _df = file_util.read_csv(_file)

            OPEN_A_list.append(_df['OPEN_' + symblA][0])
            CLOSE_A_list.append(_df['CLOSE_' + symblA][0])
            OPEN_B_list.append(_df['OPEN_' + symblB][0])
            CLOSE_B_list.append(_df['CLOSE_' + symblB][0])
            SIGMA.append(_df['saya_divide_sigma'][0])
            ABS_SIGMA.append(np.abs(_df['saya_divide_sigma'][0]))
            LAST_DAY_SIGMA.append(np.abs(_df['saya_divide_sigma'][1]))

            if (_df['saya_divide_sigma'][0] > 0):
                TRADE_A.append("SELL")
                TRADE_B.append("BUY")
            else:
                TRADE_A.append("BUY")
                TRADE_B.append("SELL")

            DEV_RATE.append(_df['deviation_rate(%)'][0])

            axis_lot_size, pair_lot_size, lot_size_diff = trade_util.get_lot_size(
                _df['CLOSE_' + symblA][0], _df['CLOSE_' + symblB][0])
            # print(axis_lot_size)
            AXIS_LOT_SIZE.append(axis_lot_size)
            PAIR_LOT_SIZE.append(pair_lot_size)
            LOT_SIZE_DIFF.append(lot_size_diff)

            # print(_df)
            total_profit, average_profit, average_pl, total_times, plus_times, minus_times, open_days, stop_profit_times, stop_loss_times, \
            max_day_over_times = signal_generate(_df, symblA, symblB, resultDir)

            total_profit_list.append(total_profit)
            average_profit_list.append(average_profit)
            average_plt_list.append(average_pl)
            total_times_list.append(total_times)
            plus_times_list.append(plus_times)
            minus_times_list.append(minus_times)
            if plus_times <= 0 or total_times <= 0:
                pl_times_list.append(0)
            else:
                pl_times_list.append(round(plus_times / total_times * 100, 2))
            open_days_list.append(open_days)

            stop_profit_times_list.append(stop_profit_times)
            stop_loss_times_list.append(stop_loss_times)
            max_day_over_times_list.append(max_day_over_times)

            date_3m_ago, CLOSE_A_3M_ago, CLOSE_B_3M_ago, date_6m_ago, CLOSE_A_6M_ago, CLOSE_B_6M_ago, date_1y_ago, \
            CLOSE_A_1Y_ago, CLOSE_B_1Y_ago, date_2y_ago, CLOSE_A_2Y_ago, CLOSE_B_2Y_ago = trade_util.get_before_close_price_data(_df, symblA, symblB)

            DATE_3M_ago_list.append(date_3m_ago)
            CLOSE_A_3M_ago_list.append(CLOSE_A_3M_ago)
            CLOSE_B_3M_ago_list.append(CLOSE_B_3M_ago)

            DATE_6M_ago_list.append(date_6m_ago)
            CLOSE_A_6M_ago_list.append(CLOSE_A_6M_ago)
            CLOSE_B_6M_ago_list.append(CLOSE_B_6M_ago)

            DATE_1y_ago_list.append(date_1y_ago)
            CLOSE_A_1y_ago_list.append(CLOSE_A_1Y_ago)
            CLOSE_B_1y_ago_list.append(CLOSE_B_1Y_ago)

            DATE_2y_ago_list.append(date_2y_ago)
            CLOSE_A_2y_ago_list.append(CLOSE_A_2Y_ago)
            CLOSE_B_2y_ago_list.append(CLOSE_B_2Y_ago)

            # path, ext = os.path.splitext(os.path.basename(_file))
            # _df.to_excel(writer, sheet_name=path)

        except FileNotFoundError:
            OPEN_A_list.append(0)
            CLOSE_A_list.append(0)
            OPEN_B_list.append(0)
            CLOSE_B_list.append(0)
            SIGMA.append(0)
            ABS_SIGMA.append(0)
            LAST_DAY_SIGMA.append(0)
            DEV_RATE.append(0)
            AXIS_LOT_SIZE.append(0)
            PAIR_LOT_SIZE.append(0)
            LOT_SIZE_DIFF.append(0)

            TRADE_A.append("")
            TRADE_B.append("")

            total_profit_list.append(0)
            average_profit_list.append(0)
            average_plt_list.append(0)
            total_times_list.append(0)
            plus_times_list.append(0)
            minus_times_list.append(0)
            pl_times_list.append(0)
            open_days_list.append(0)

            stop_profit_times_list.append(0)
            stop_loss_times_list.append(0)
            max_day_over_times_list.append(0)

            DATE_3M_ago_list.append(0)
            CLOSE_A_3M_ago_list.append(0)
            CLOSE_B_3M_ago_list.append(0)

            DATE_6M_ago_list.append(0)
            CLOSE_A_6M_ago_list.append(0)
            CLOSE_B_6M_ago_list.append(0)

            DATE_1y_ago_list.append(0)
            CLOSE_A_1y_ago_list.append(0)
            CLOSE_B_1y_ago_list.append(0)

            DATE_2y_ago_list.append(0)
            CLOSE_A_2y_ago_list.append(0)
            CLOSE_B_2y_ago_list.append(0)

            continue

    corr_df_new = corr_df_new.assign(
        OPEN_A=OPEN_A_list,
        CLOSE_A=CLOSE_A_list,
        OPEN_B=OPEN_B_list,
        CLOSE_B=CLOSE_B_list,
        SIGMA=SIGMA,
        ABS_SIGMA=ABS_SIGMA,
        LAST_DAY_SIGMA=LAST_DAY_SIGMA,
        TRADE_A=TRADE_A,
        TRADE_B=TRADE_B,
        DEV_RATE=DEV_RATE,
        AXIS_LOT_SIZE=AXIS_LOT_SIZE,
        PAIR_LOT_SIZE=PAIR_LOT_SIZE,
        LOT_SIZE_DIFF=LOT_SIZE_DIFF,
        total_profit=total_profit_list,
        average_profit=average_profit_list,
        average_pl=average_plt_list,
        total_times=total_times_list,
        plus_times=plus_times_list,
        minus_times=minus_times_list,
        pl_times=pl_times_list,
        open_days=open_days_list,
        stop_profit_times=stop_profit_times_list,
        stop_loss_times=stop_loss_times_list,
        max_day_over_times=max_day_over_times_list,
        DATE_3M_ago=DATE_3M_ago_list,
        CLOSE_A_3M_ago=CLOSE_A_3M_ago_list,
        CLOSE_B_3M_ago=CLOSE_B_3M_ago_list,
        DATE_6M_ago=DATE_6M_ago_list,
        CLOSE_A_6M_ago=CLOSE_A_6M_ago_list,
        CLOSE_B_6M_ago=CLOSE_B_6M_ago_list,
        DATE_1y_ago=DATE_1y_ago_list,
        CLOSE_A_1y_ago=CLOSE_A_1y_ago_list,
        CLOSE_B_1y_ago=CLOSE_B_1y_ago_list,
        DATE_2y_ago=DATE_2y_ago_list,
        CLOSE_A_2y_ago=CLOSE_A_2y_ago_list,
        CLOSE_B_2y_ago=CLOSE_B_2y_ago_list)

    # print(corr_df_new)
    # corr_df_new['ABS_SIGMA'] = np.abs(corr_df_new['SIGMA'])
    corr_df_new = corr_df_new.sort_values('total_profit', ascending=False)

    corr_df_new = corr_df_new.loc[:, [
        'SYM_A',
        'SYM_A_NAME',
        'SYM_A_INDUSTRY',
        'OPEN_A',
        'CLOSE_A',
        'AXIS_LOT_SIZE',
        'TRADE_A',
        'SYM_B',
        'SYM_B_NAME',
        'SYM_B_INDUSTRY',
        'OPEN_B',
        'CLOSE_B',
        'PAIR_LOT_SIZE',
        'TRADE_B',
        'CORR_3M',
        'CORR_1Y',
        'COINT_3M',
        'COINT_1Y',
        'SIGMA',
        'ABS_SIGMA',
        'LAST_DAY_SIGMA',
        'DEV_RATE',
        'LOT_SIZE_DIFF',
        'total_profit',
        'average_profit',
        'average_pl',
        'total_times',
        'plus_times',
        'minus_times',
        'pl_times',
        'open_days',
        'stop_profit_times',
        'stop_loss_times',
        'max_day_over_times',
        'DATE_3M_ago',
        'CLOSE_A_3M_ago',
        'CLOSE_B_3M_ago',
        'DATE_6M_ago',
        'CLOSE_A_6M_ago',
        'CLOSE_B_6M_ago',
        'DATE_1y_ago',
        'CLOSE_A_1y_ago',
        'CLOSE_B_1y_ago',
        'DATE_2y_ago',
        'CLOSE_A_2y_ago',
        'CLOSE_B_2y_ago',
    ]]

    file_util.write_csv(corr_df_new,
                        os.path.join(resultDir, corr_result_file_name))
    if (isFastCaculateMode == False):
        file_util.write_csv(
            corr_df_new,
            os.path.join(setting.get_master_dir(), corr_result_file_name))

    # with pd.ExcelWriter(report_file) as writer:
    #corr_df_new.to_excel(writer, sheet_name='CORR')

    #writer.save()
    #writer.close()

    print('Output Report Process end!')
def main(args):
    start_time = datetime.now()
    print('maint start ' + strftime("%Y-%m-%d %H:%M:%S"))

    isFastCaculateMode = False
    if (len(args) >= 2 and (args[1] == 'fast' or args[1] == 'FAST')):
        print('FAST CACULATE MODE')
        isFastCaculateMode = True

    file_util.clean_target_dir(setting.get_result_dir())

    # get all target stock symbols
    symbols = file_util.getAllTargetSymbols(setting.get_input_data_dir())

    print('Total symbols size:' + str(len(symbols)))
    index1 = 0
    symbols_corr_list = []
    symbol_check_dict = {}

    if (isFastCaculateMode == True):
        _pais = file_util.read_csv(setting.get_currenty_report_file())

        for index, row in _pais.iterrows():
            index1 = index1 + 1
            symb1 = str(int(row.SYM_A))
            symb2 = str(int(row.SYM_B))
            print('Processing {0}/{1} {2} - {3}...'.format(
                index1, len(_pais), symb1, symb2))

            _pairs = create_pairs_dataframe(setting.get_input_data_dir(),
                                            symb1, symb2)
            corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2)

            coint_3m, coint_1y = trade_util.check_cointegration(
                _pairs, symb1, symb2)

            if not is_available_pari_data(_pairs, symb1, symb2, corr_3m,
                                          corr_1y, coint_3m, coint_1y):
                continue

            symbols_corr_list.append(
                [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y])

            _pairs = _pairs.sort_values('DATE', ascending=True)
            _pairs = calculate_spread_zscore(_pairs, symb1, symb2)
            _pairs = _pairs.sort_values('DATE', ascending=False)

            file_util.write_csv(
                _pairs,
                os.path.join(setting.get_result_dir(),
                             symb1 + '_' + symb2 + '.csv'))
    else:
        for symb1 in symbols:
            index1 = index1 + 1
            print('Processing {0}/{1} {2}...'.format(index1, len(symbols),
                                                     symb1))
            for symb2 in symbols:
                # index2 =index2+1
                #  print('Processing {0}/{1}/{2} {3}-{4}...'.format(index2,index1, len(symbols), symb1, symb2))
                if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict
                        or (symb2 + symb1) in symbol_check_dict):
                    continue
                symbol_check_dict[symb1 + symb2] = ''

                _pairs = create_pairs_dataframe(setting.get_input_data_dir(),
                                                symb1, symb2)
                corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2)

                coint_3m, coint_1y = trade_util.check_cointegration(
                    _pairs, symb1, symb2)

                if not is_available_pari_data(_pairs, symb1, symb2, corr_3m,
                                              corr_1y, coint_3m, coint_1y):
                    continue

                symbols_corr_list.append(
                    [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y])

                _pairs = _pairs.sort_values('DATE', ascending=True)
                _pairs = calculate_spread_zscore(_pairs, symb1, symb2)
                _pairs = _pairs.sort_values('DATE', ascending=False)

                file_util.write_csv(
                    _pairs,
                    os.path.join(setting.get_result_dir(),
                                 symb1 + '_' + symb2 + '.csv'))

        # print(symbols_corr_list)

    corr_data = sorted(symbols_corr_list, key=itemgetter(3),
                       reverse=True)  # sort by 3 month corr
    corr_data = pd.DataFrame(columns=[
        'SYM_A', 'SYM_B', 'CORR_3M', 'CORR_1Y', 'COINT_3M', 'COINT_1Y'
    ],
                             data=corr_data)
    # file_util.write_csv(corr_data, os.path.join(setting.get_result_dir(), corr_result_file_name))

    output_report(corr_data, isFastCaculateMode, setting.get_result_dir(),
                  setting.corr_result_file_name)

    process_time = datetime.now() - start_time
    print('main end!' + strftime("%Y-%m-%d %H:%M:%S"))
    print('Time cost:{0}'.format(process_time))