def create_pairs_dataframe(data_dir, symbol1, symbol2): # print("Importing CSV data...") # print("Constructing dual matrix for s% and s%" % (symbol1, symbol2)) sym1 = file_util.read_csv(os.path.join(data_dir, symbol1 + '.csv')) sym2 = file_util.read_csv(os.path.join(data_dir, symbol2 + '.csv')) sym1.rename(columns={ 'OPEN': 'OPEN_' + symbol1, 'CLOSE': 'CLOSE_' + symbol1 }, inplace=True) sym2.rename(columns={ 'OPEN': 'OPEN_' + symbol2, 'CLOSE': 'CLOSE_' + symbol2 }, inplace=True) pairs = pd.merge(sym1, sym2, on='DATE', how='left') pairs['DATE'] = pd.to_datetime(pairs['DATE']) pairs.index = pairs['DATE'] pairs = pairs.drop( ['DATE', 'HIGH_x', 'HIGH_y', 'LOW_x', 'LOW_y', 'Volume_x', 'Volume_y'], axis=1) pairs = pairs.dropna() return pairs
def get_daily_stock_data(code, target_ymd): """ Get target daily stock date by code and date(YYYYmmDD) """ target_download_csv_file_path = setting.get_target_download_csv_file_path( target_ymd) if (not file_util.is_file_exists(target_download_csv_file_path)): data_download.main(target_ymd) if (not file_util.is_file_exists(target_download_csv_file_path)): print("No target data") return None else: download_daily_stock_data = file_util.read_csv( target_download_csv_file_path) # columns=['DATE', 'CODE', 'KBN', 'OPEN', 'HIGH', 'LOW', 'CLOSE','Volume'] download_daily_stock_data.columns = [ 'DATE', 'CODE', 'KBN', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'Volume' ] searched_data = download_daily_stock_data[ download_daily_stock_data['CODE'] == code] searched_data['Volume'] = searched_data['Volume'].astype(float) searched_data = searched_data.drop('KBN', 1) return searched_data
def cacluate_needed_data(symb1, symb2, csv_file_full_path): _pairs = pair_trade.create_pairs_dataframe(setting.get_input_data_dir(), symb1, symb2) _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = pair_trade.calculate_spread_zscore(_pairs, symb1, symb2) if ft.is_file_exists(csv_file_full_path): csv_pairs = ft.read_csv(csv_file_full_path) csv_pairs['DATE'] = pd.to_datetime(csv_pairs['DATE']) csv_pairs = csv_pairs.sort_values('DATE', ascending=True) csv_pairs.index = csv_pairs['DATE'] last_row_date = csv_pairs.tail(1).index # print ('last_row_date {0}'.format(last_row_date)) _pairs = _pairs.combine_first(csv_pairs) _pairs = _pairs.loc[:, [ 'OPEN_' + symb1, 'CLOSE_' + symb1, 'OPEN_' + symb2, 'CLOSE_' + symb2, 'saya_divide', 'saya_divide_mean', 'saya_divide_std', 'saya_divide_sigma', 'deviation_rate(%)', 'CORR_3M', 'COINT_3M', 'CORR_1Y', 'COINT_1Y' ]] _pairs = _pairs.sort_values('DATE', ascending=False) pair_back_test.set_corr_and_coint(_pairs, symb1, symb2, last_row_date) ft.write_csv(_pairs, csv_file_full_path) else: _pairs = _pairs.sort_values('DATE', ascending=False) pair_back_test.set_corr_and_coint(_pairs, symb1, symb2) ft.write_csv(_pairs, csv_file_full_path)
def generate_day_report(target_date, file_name_list): day_report_file_name = os.path.join( backtest_report_each_day_dir, target_date.strftime("%Y-%m-%d") + '.csv') # temp_data = ft.read_csv(os.path.join(caculated_csv_dir, file_name_list[0] + '.csv')) # day_report_data = pd.DataFrame([], columns=temp_data.columns) day_report_data = pd.DataFrame() for file_name in file_name_list: _temp = file_name.split('_') symb1 = _temp[0] symb2 = _temp[1] pairs_data = ft.read_csv( os.path.join(caculated_csv_dir, file_name + '.csv')) pairs_data['DATE'] = pd.to_datetime(pairs_data['DATE']) pairs_data.index = pairs_data['DATE'] search_data = pairs_data[target_date:target_date] if search_data.empty: # print('empty {0}'.format(target_date)) return search_data1 = search_data.copy(deep=True) search_data1.rename(columns={ 'OPEN_' + symb1: 'OPEN_A', 'CLOSE_' + symb1: 'CLOSE_A', 'OPEN_' + symb2: 'OPEN_B', 'CLOSE_' + symb2: 'CLOSE_B' }, inplace=True) search_data1['SYM_A'] = symb1 search_data1['SYM_B'] = symb2 search_data1 = search_data1.loc[:, [ 'SYM_A', 'OPEN_A', 'CLOSE_A', 'SYM_B', 'OPEN_B', 'CLOSE_B', 'saya_divide', 'saya_divide_mean', 'saya_divide_std', 'saya_divide_sigma', 'deviation_rate(%)', 'CORR_3M', 'COINT_3M', 'CORR_1Y', 'COINT_1Y' ]] if search_data1.at[target_date, 'CORR_3M'] < CORR_THRE_SHOLD_THREE_MONTH \ or search_data.at[target_date, 'CORR_1Y'] < CORR_THRE_SHOLD_ONE_YEAR: continue if search_data1.at[target_date, 'COINT_3M'] > COINT_MAX_VAL \ or search_data.at[target_date, 'COINT_1Y'] > COINT_MAX_VAL: continue day_report_data = day_report_data.append(search_data1) if not day_report_data.empty: ft.write_csv(day_report_data, day_report_file_name)
def get_all_codes(): """ Get all target stock codes """ # return DB_CONN.basic.distinct('code') target_stock_data_list = file_util.read_csv( setting.get_target_stock_data_list_file_path()) # print(target_stock_data_list) return target_stock_data_list['CODE'].tolist()
def generate_portfolio_csv_file(caculated_csv_path=caculated_csv_dir, portfolio_csv_path=portfolio_csv_dir): print('generate_portfolio_csv_file start ' + strftime("%Y-%m-%d %H:%M:%S")) ft.clean_target_dir(portfolio_csv_path) file_name_list = ft.getAllTargetSymbols(caculated_csv_path) index1 = 0 for file_name in file_name_list: index1 = index1 + 1 print('Processing {0}/{1}...'.format(index1, len(file_name_list))) _temp = file_name.split('_') pairs_data = ft.read_csv( os.path.join(caculated_csv_path, file_name + '.csv')) pairs_util.signal_generate(pairs_data, _temp[0], _temp[1], portfolio_csv_path) print('generate_portfolio_csv_file end ' + strftime("%Y-%m-%d %H:%M:%S"))
def getTargetTradeData(year, month, sellCode, buyCode): csvFile = sellCode + "_" + buyCode + '.csv' if (sellCode > buyCode): csvFile = buyCode + "_" + sellCode + '.csv' targetFilePath = os.path.join(setting.get_trade_dir(), year, month, csvFile) if not os.path.exists(targetFilePath): _logger.error( "No target csv file exists. {0} - {1}...{2}/{3}...{4}".format( sellCode, buyCode, year, month, targetFilePath)) return trade_data = None try: trade_data = ft.read_csv(targetFilePath) trade_data['DATE'] = pd.to_datetime(trade_data['DATE']) trade_data.index = trade_data['DATE'] trade_data = trade_data.sort_index(ascending=True) except: _logger.error("Error. {0} - {1}...{2}/{3}...{4}".format( sellCode, buyCode, year, month, targetFilePath)) return trade_data
def generate_trade_report(history_data_file_full_path): trade_history_data = ft.read_csv(history_data_file_full_path) trade_history_data['open_date'] = pd.to_datetime( trade_history_data['open_date']) if config.backtest_start_ymd is not None: trade_history_data = trade_history_data[ trade_history_data.close > config.backtest_start_ymd] if config.backtest_end_ymd is not None: trade_history_data = trade_history_data[ trade_history_data.close < config.backtest_end_ymd] outputList = pd.DataFrame(columns=record_column()) for index, row in trade_history_data.iterrows(): year = str(int(row.YEAR)) month = str(int(row.MONTH)) sellCode = str(int(row.sellCode)) buytCode = str(int(row.buyCode)) _logger.debug("Processing {0}/{1} {2} - {3}...{4}/{5}".format( index + 1, len(trade_history_data), sellCode, buytCode, year, month)) trade_data = getTargetTradeData(year, month, sellCode, buytCode) if trade_data is None: _logger.error("No target data found. {0} - {1}...{2}".format( sellCode, buytCode, year + month)) continue search_data = trade_data.loc[trade_data['DATE'] >= row.open_date] result_row = generate_result(search_data, sellCode, buytCode, row) print(result_row) outputList = outputList.append(result_row, ignore_index=True) outputAnalysisCsvFile(outputList)
def generate_caculated_data_csv(symbols, caculated_csv_path=caculated_csv_dir, startdate=None, enddate=None, mode='insert'): symbol_check_dict = {} if (mode == 'create'): ft.clean_target_dir(caculated_csv_path) else: ft.create_target_dir(caculated_csv_path) index1 = 0 for symb1 in symbols: index1 = index1 + 1 print('Processing {0}/{1} {2}...'.format(index1, len(symbols), symb1)) for symb2 in symbols: if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict or (symb2 + symb1) in symbol_check_dict): continue symbol_check_dict[symb1 + symb2] = '' _pairs = pairs_util.create_pairs_dataframe( setting.get_input_data_dir(), symb1, symb2) if startdate is not None: start_date = datetime.datetime.strftime(startdate, '%Y-%m-%d') _pairs = _pairs[_pairs.index >= start_date] if enddate is not None: end_date = datetime.datetime.strftime(enddate, '%Y-%m-%d') _pairs = _pairs[_pairs.index <= end_date] #_pairs = _pairs[(_pairs.index >= startdate) & (_pairs.index <= enddate)] result_write_csv = os.path.join(caculated_csv_path, symb1 + '_' + symb2 + '.csv') _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = pairs_util.calculate_spread_zscore(_pairs, symb1, symb2) if ft.is_file_exists(result_write_csv): csv_pairs = ft.read_csv(result_write_csv) csv_pairs['DATE'] = pd.to_datetime(csv_pairs['DATE']) csv_pairs = csv_pairs.sort_values('DATE', ascending=True) csv_pairs.index = csv_pairs['DATE'] last_row_date = csv_pairs.tail(1).index # print ('last_row_date {0}'.format(last_row_date)) _pairs = _pairs.combine_first(csv_pairs) result_write_csv = os.path.join(caculated_csv_path, symb1 + '_' + symb2 + '.csv') _pairs = _pairs.loc[:, [ 'OPEN_' + symb1, 'CLOSE_' + symb1, 'OPEN_' + symb2, 'CLOSE_' + symb2, 'saya_divide', 'saya_divide_mean', 'saya_divide_std', 'saya_divide_sigma', 'deviation_rate(%)', 'CORR_3M', 'COINT_3M', 'CORR_1Y', 'COINT_1Y' ]] _pairs = _pairs.sort_values('DATE', ascending=False) set_corr_and_coint(_pairs, symb1, symb2, last_row_date) ft.write_csv(_pairs, result_write_csv) else: _pairs = _pairs.sort_values('DATE', ascending=False) set_corr_and_coint(_pairs, symb1, symb2) ft.write_csv(_pairs, result_write_csv)
'CLOSE_' + symblB] else: CLOSE_A_1Y_ago = 0 CLOSE_B_1Y_ago = 0 two_year_ago = datetime.today() - relativedelta(years=2) two_year_data = pairs[pairs.index > two_year_ago] date_2y_ago = two_year_ago.strftime('%Y/%m/%d') if not two_year_data.empty: CLOSE_A_2Y_ago = two_year_data.loc[two_year_data.index[-1], 'CLOSE_' + symblA] CLOSE_B_2Y_ago = two_year_data.loc[two_year_data.index[-1], 'CLOSE_' + symblB] else: CLOSE_A_2Y_ago = 0 CLOSE_B_2Y_ago = 0 return date_3m_ago, CLOSE_A_3M_ago, CLOSE_B_3M_ago, date_6m_ago, CLOSE_A_6M_ago, CLOSE_B_6M_ago, date_1y_ago,\ CLOSE_A_1Y_ago, CLOSE_B_1Y_ago, date_2y_ago, CLOSE_A_2Y_ago, CLOSE_B_2Y_ago if __name__ == '__main__': #result=get_lot_size(1401,2000) #print(result) symblA = '9513' symblB = '9810' _file = os.path.join(setting.get_result_dir(), symblA + '_' + symblB + '.csv') _df = file_util.read_csv(_file) print(get_before_close_price_data(_df, symblA, symblB))
def generate_input_stock_data(target_date): target_download_csv_file_path = setting.get_target_download_csv_file_path(target_date) if (file_util.is_file_exists(target_download_csv_file_path)): print('Generating input target stock data start.') target_stock_data_list = file_util.read_csv(setting.get_target_stock_data_list_file_path()) # print(target_stock_data_list) # print(target_stock_data_list['CODE'][0]) download_stock_data =file_util.read_csv(target_download_csv_file_path) columns=['DATE', 'CODE', 'KBN', 'OPEN', 'HIGH', 'LOW', 'CLOSE','Volume'] download_stock_data.columns = columns # print(download_stock_data) # print(download_stock_data.dtypes) file_util.clean_target_dir(setting.get_generated_input_target_stock_data_dir()) for index, data_row in target_stock_data_list.iterrows(): symb = str(data_row['CODE']) print('symb:'+ symb) data_csv_file = os.path.join(setting.get_org_all_stock_data_file_dir(), symb + '.csv') symb_df = file_util.read_csv(data_csv_file) # symb_df['DATE'] = symb_df.to_datetime(symb_df['DATE']) newest_date = symb_df['DATE'][0] # print('newest_date:' + newest_date) searched_data = download_stock_data[download_stock_data['CODE'] == data_row['CODE']] if searched_data.empty: continue # print(searched_data['HIGH']) # print(searched_data.iloc[0]['HIGH']) target_date_time = datetime.strptime(target_date, '%Y%m%d') insert_date = target_date_time.strftime("%Y-%m-%d") # print(searched_data) insert_value = [[insert_date, searched_data.iloc[0]['OPEN'], searched_data.iloc[0]['HIGH'], searched_data.iloc[0]['LOW'], searched_data.iloc[0]['CLOSE'], searched_data.iloc[0]['Volume']]] _tmp_df = pd.DataFrame(data=insert_value, columns=['DATE', 'OPEN', 'HIGH','LOW', 'CLOSE','Volume']) symb_df = pd.concat([_tmp_df, symb_df], sort=True) symb_df = symb_df.loc[:, ['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'Volume']] symb_df['DATE'] = pd.to_datetime(symb_df['DATE']) symb_df.index = symb_df['DATE'] symb_df = symb_df.drop_duplicates(keep='first') symb_df['OPEN'] = symb_df['OPEN'].astype(str).replace('\.0', '', regex=True) symb_df['HIGH'] = symb_df['HIGH'].astype(str).replace('\.0', '', regex=True) symb_df['LOW'] = symb_df['LOW'].astype(str).replace('\.0', '', regex=True) symb_df['CLOSE'] = symb_df['CLOSE'].astype(str).replace('\.0', '', regex=True) symb_df['Volume'] = symb_df['Volume'].astype(str).replace('\.0', '', regex=True) file_util.write_csv_without_index(symb_df, data_csv_file) target_data_span = target_date_time - relativedelta(years=INPUT_TARGET_DATA_YEAR_SPAN) target_data_span = target_data_span - relativedelta(days=1) # symb_df['DATE'] = pd.to_datetime(symb_df['DATE']) # symb_df.index = symb_df['DATE'] target_data = symb_df[symb_df.index > target_data_span] # print(symb_df.info) save_file_path = os.path.join(setting.get_generated_input_target_stock_data_dir(), symb + '.csv') file_util.write_csv_without_index(target_data, save_file_path) generate_tracking_file(target_date) print('Generating input target stock data end.') else: print('No target download CSV file exists. file=' + target_download_csv_file_path)
def output_report(corr_df, isFastCaculateMode, resultDir, corr_result_file_name): print('Output Report Processing...') timestr = time.strftime("%Y%m%d-%H%M%S") report_file = os.path.join(resultDir, 'report_' + timestr + '.xlsx') # corr_df = file_util.read_csv(os.path.join(setting.get_result_dir(), corr_result_file_name)) master_df = file_util.read_csv(os.path.join(setting.get_master_file_dir())) corr_df = trade_util.addMasterInfo(corr_df, master_df) corr_df_new = corr_df.copy(deep=True) OPEN_A_list = [] CLOSE_A_list = [] OPEN_B_list = [] CLOSE_B_list = [] SIGMA = [] ABS_SIGMA = [] LAST_DAY_SIGMA = [] TRADE_A = [] TRADE_B = [] DEV_RATE = [] AXIS_LOT_SIZE = [] PAIR_LOT_SIZE = [] LOT_SIZE_DIFF = [] total_profit_list = [] average_profit_list = [] average_plt_list = [] total_times_list = [] plus_times_list = [] minus_times_list = [] pl_times_list = [] open_days_list = [] stop_profit_times_list = [] stop_loss_times_list = [] max_day_over_times_list = [] DATE_3M_ago_list = [] CLOSE_A_3M_ago_list = [] CLOSE_B_3M_ago_list = [] DATE_6M_ago_list = [] CLOSE_A_6M_ago_list = [] CLOSE_B_6M_ago_list = [] DATE_1y_ago_list = [] CLOSE_A_1y_ago_list = [] CLOSE_B_1y_ago_list = [] DATE_2y_ago_list = [] CLOSE_A_2y_ago_list = [] CLOSE_B_2y_ago_list = [] index1 = 0 for index, row in corr_df.iterrows(): # print('row.SYM_A:'+str(int(row.SYM_A))) symblA = str(int(row.SYM_A)) symblB = str(int(row.SYM_B)) # print('symblA=%s symblB=%s' % (symblA,symblB)) index1 = index1 + 1 print('Processing {0}/{1} {2} - {3}...'.format(index1, len(corr_df), symblA, symblB)) try: _file = os.path.join(resultDir, symblA + '_' + symblB + '.csv') _df = file_util.read_csv(_file) OPEN_A_list.append(_df['OPEN_' + symblA][0]) CLOSE_A_list.append(_df['CLOSE_' + symblA][0]) OPEN_B_list.append(_df['OPEN_' + symblB][0]) CLOSE_B_list.append(_df['CLOSE_' + symblB][0]) SIGMA.append(_df['saya_divide_sigma'][0]) ABS_SIGMA.append(np.abs(_df['saya_divide_sigma'][0])) LAST_DAY_SIGMA.append(np.abs(_df['saya_divide_sigma'][1])) if (_df['saya_divide_sigma'][0] > 0): TRADE_A.append("SELL") TRADE_B.append("BUY") else: TRADE_A.append("BUY") TRADE_B.append("SELL") DEV_RATE.append(_df['deviation_rate(%)'][0]) axis_lot_size, pair_lot_size, lot_size_diff = trade_util.get_lot_size( _df['CLOSE_' + symblA][0], _df['CLOSE_' + symblB][0]) # print(axis_lot_size) AXIS_LOT_SIZE.append(axis_lot_size) PAIR_LOT_SIZE.append(pair_lot_size) LOT_SIZE_DIFF.append(lot_size_diff) # print(_df) total_profit, average_profit, average_pl, total_times, plus_times, minus_times, open_days, stop_profit_times, stop_loss_times, \ max_day_over_times = signal_generate(_df, symblA, symblB, resultDir) total_profit_list.append(total_profit) average_profit_list.append(average_profit) average_plt_list.append(average_pl) total_times_list.append(total_times) plus_times_list.append(plus_times) minus_times_list.append(minus_times) if plus_times <= 0 or total_times <= 0: pl_times_list.append(0) else: pl_times_list.append(round(plus_times / total_times * 100, 2)) open_days_list.append(open_days) stop_profit_times_list.append(stop_profit_times) stop_loss_times_list.append(stop_loss_times) max_day_over_times_list.append(max_day_over_times) date_3m_ago, CLOSE_A_3M_ago, CLOSE_B_3M_ago, date_6m_ago, CLOSE_A_6M_ago, CLOSE_B_6M_ago, date_1y_ago, \ CLOSE_A_1Y_ago, CLOSE_B_1Y_ago, date_2y_ago, CLOSE_A_2Y_ago, CLOSE_B_2Y_ago = trade_util.get_before_close_price_data(_df, symblA, symblB) DATE_3M_ago_list.append(date_3m_ago) CLOSE_A_3M_ago_list.append(CLOSE_A_3M_ago) CLOSE_B_3M_ago_list.append(CLOSE_B_3M_ago) DATE_6M_ago_list.append(date_6m_ago) CLOSE_A_6M_ago_list.append(CLOSE_A_6M_ago) CLOSE_B_6M_ago_list.append(CLOSE_B_6M_ago) DATE_1y_ago_list.append(date_1y_ago) CLOSE_A_1y_ago_list.append(CLOSE_A_1Y_ago) CLOSE_B_1y_ago_list.append(CLOSE_B_1Y_ago) DATE_2y_ago_list.append(date_2y_ago) CLOSE_A_2y_ago_list.append(CLOSE_A_2Y_ago) CLOSE_B_2y_ago_list.append(CLOSE_B_2Y_ago) # path, ext = os.path.splitext(os.path.basename(_file)) # _df.to_excel(writer, sheet_name=path) except FileNotFoundError: OPEN_A_list.append(0) CLOSE_A_list.append(0) OPEN_B_list.append(0) CLOSE_B_list.append(0) SIGMA.append(0) ABS_SIGMA.append(0) LAST_DAY_SIGMA.append(0) DEV_RATE.append(0) AXIS_LOT_SIZE.append(0) PAIR_LOT_SIZE.append(0) LOT_SIZE_DIFF.append(0) TRADE_A.append("") TRADE_B.append("") total_profit_list.append(0) average_profit_list.append(0) average_plt_list.append(0) total_times_list.append(0) plus_times_list.append(0) minus_times_list.append(0) pl_times_list.append(0) open_days_list.append(0) stop_profit_times_list.append(0) stop_loss_times_list.append(0) max_day_over_times_list.append(0) DATE_3M_ago_list.append(0) CLOSE_A_3M_ago_list.append(0) CLOSE_B_3M_ago_list.append(0) DATE_6M_ago_list.append(0) CLOSE_A_6M_ago_list.append(0) CLOSE_B_6M_ago_list.append(0) DATE_1y_ago_list.append(0) CLOSE_A_1y_ago_list.append(0) CLOSE_B_1y_ago_list.append(0) DATE_2y_ago_list.append(0) CLOSE_A_2y_ago_list.append(0) CLOSE_B_2y_ago_list.append(0) continue corr_df_new = corr_df_new.assign( OPEN_A=OPEN_A_list, CLOSE_A=CLOSE_A_list, OPEN_B=OPEN_B_list, CLOSE_B=CLOSE_B_list, SIGMA=SIGMA, ABS_SIGMA=ABS_SIGMA, LAST_DAY_SIGMA=LAST_DAY_SIGMA, TRADE_A=TRADE_A, TRADE_B=TRADE_B, DEV_RATE=DEV_RATE, AXIS_LOT_SIZE=AXIS_LOT_SIZE, PAIR_LOT_SIZE=PAIR_LOT_SIZE, LOT_SIZE_DIFF=LOT_SIZE_DIFF, total_profit=total_profit_list, average_profit=average_profit_list, average_pl=average_plt_list, total_times=total_times_list, plus_times=plus_times_list, minus_times=minus_times_list, pl_times=pl_times_list, open_days=open_days_list, stop_profit_times=stop_profit_times_list, stop_loss_times=stop_loss_times_list, max_day_over_times=max_day_over_times_list, DATE_3M_ago=DATE_3M_ago_list, CLOSE_A_3M_ago=CLOSE_A_3M_ago_list, CLOSE_B_3M_ago=CLOSE_B_3M_ago_list, DATE_6M_ago=DATE_6M_ago_list, CLOSE_A_6M_ago=CLOSE_A_6M_ago_list, CLOSE_B_6M_ago=CLOSE_B_6M_ago_list, DATE_1y_ago=DATE_1y_ago_list, CLOSE_A_1y_ago=CLOSE_A_1y_ago_list, CLOSE_B_1y_ago=CLOSE_B_1y_ago_list, DATE_2y_ago=DATE_2y_ago_list, CLOSE_A_2y_ago=CLOSE_A_2y_ago_list, CLOSE_B_2y_ago=CLOSE_B_2y_ago_list) # print(corr_df_new) # corr_df_new['ABS_SIGMA'] = np.abs(corr_df_new['SIGMA']) corr_df_new = corr_df_new.sort_values('total_profit', ascending=False) corr_df_new = corr_df_new.loc[:, [ 'SYM_A', 'SYM_A_NAME', 'SYM_A_INDUSTRY', 'OPEN_A', 'CLOSE_A', 'AXIS_LOT_SIZE', 'TRADE_A', 'SYM_B', 'SYM_B_NAME', 'SYM_B_INDUSTRY', 'OPEN_B', 'CLOSE_B', 'PAIR_LOT_SIZE', 'TRADE_B', 'CORR_3M', 'CORR_1Y', 'COINT_3M', 'COINT_1Y', 'SIGMA', 'ABS_SIGMA', 'LAST_DAY_SIGMA', 'DEV_RATE', 'LOT_SIZE_DIFF', 'total_profit', 'average_profit', 'average_pl', 'total_times', 'plus_times', 'minus_times', 'pl_times', 'open_days', 'stop_profit_times', 'stop_loss_times', 'max_day_over_times', 'DATE_3M_ago', 'CLOSE_A_3M_ago', 'CLOSE_B_3M_ago', 'DATE_6M_ago', 'CLOSE_A_6M_ago', 'CLOSE_B_6M_ago', 'DATE_1y_ago', 'CLOSE_A_1y_ago', 'CLOSE_B_1y_ago', 'DATE_2y_ago', 'CLOSE_A_2y_ago', 'CLOSE_B_2y_ago', ]] file_util.write_csv(corr_df_new, os.path.join(resultDir, corr_result_file_name)) if (isFastCaculateMode == False): file_util.write_csv( corr_df_new, os.path.join(setting.get_master_dir(), corr_result_file_name)) # with pd.ExcelWriter(report_file) as writer: #corr_df_new.to_excel(writer, sheet_name='CORR') #writer.save() #writer.close() print('Output Report Process end!')
def main(args): start_time = datetime.now() print('maint start ' + strftime("%Y-%m-%d %H:%M:%S")) isFastCaculateMode = False if (len(args) >= 2 and (args[1] == 'fast' or args[1] == 'FAST')): print('FAST CACULATE MODE') isFastCaculateMode = True file_util.clean_target_dir(setting.get_result_dir()) # get all target stock symbols symbols = file_util.getAllTargetSymbols(setting.get_input_data_dir()) print('Total symbols size:' + str(len(symbols))) index1 = 0 symbols_corr_list = [] symbol_check_dict = {} if (isFastCaculateMode == True): _pais = file_util.read_csv(setting.get_currenty_report_file()) for index, row in _pais.iterrows(): index1 = index1 + 1 symb1 = str(int(row.SYM_A)) symb2 = str(int(row.SYM_B)) print('Processing {0}/{1} {2} - {3}...'.format( index1, len(_pais), symb1, symb2)) _pairs = create_pairs_dataframe(setting.get_input_data_dir(), symb1, symb2) corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2) coint_3m, coint_1y = trade_util.check_cointegration( _pairs, symb1, symb2) if not is_available_pari_data(_pairs, symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y): continue symbols_corr_list.append( [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y]) _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = calculate_spread_zscore(_pairs, symb1, symb2) _pairs = _pairs.sort_values('DATE', ascending=False) file_util.write_csv( _pairs, os.path.join(setting.get_result_dir(), symb1 + '_' + symb2 + '.csv')) else: for symb1 in symbols: index1 = index1 + 1 print('Processing {0}/{1} {2}...'.format(index1, len(symbols), symb1)) for symb2 in symbols: # index2 =index2+1 # print('Processing {0}/{1}/{2} {3}-{4}...'.format(index2,index1, len(symbols), symb1, symb2)) if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict or (symb2 + symb1) in symbol_check_dict): continue symbol_check_dict[symb1 + symb2] = '' _pairs = create_pairs_dataframe(setting.get_input_data_dir(), symb1, symb2) corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2) coint_3m, coint_1y = trade_util.check_cointegration( _pairs, symb1, symb2) if not is_available_pari_data(_pairs, symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y): continue symbols_corr_list.append( [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y]) _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = calculate_spread_zscore(_pairs, symb1, symb2) _pairs = _pairs.sort_values('DATE', ascending=False) file_util.write_csv( _pairs, os.path.join(setting.get_result_dir(), symb1 + '_' + symb2 + '.csv')) # print(symbols_corr_list) corr_data = sorted(symbols_corr_list, key=itemgetter(3), reverse=True) # sort by 3 month corr corr_data = pd.DataFrame(columns=[ 'SYM_A', 'SYM_B', 'CORR_3M', 'CORR_1Y', 'COINT_3M', 'COINT_1Y' ], data=corr_data) # file_util.write_csv(corr_data, os.path.join(setting.get_result_dir(), corr_result_file_name)) output_report(corr_data, isFastCaculateMode, setting.get_result_dir(), setting.corr_result_file_name) process_time = datetime.now() - start_time print('main end!' + strftime("%Y-%m-%d %H:%M:%S")) print('Time cost:{0}'.format(process_time))