def main(targetYear=None, targetMonth=None): print('Watching List data caculate main start!') file_name = os.path.join(setting.get_root_dir(), excel_file_name) workbook = openpyxl.load_workbook(file_name, data_only=True) sheet = workbook[sheet_name_Watching_Input] record_list = [] symbols_corr_list = [] ft.clean_target_dir(os.path.join(setting.get_watching_list_file_dir())) for i in range(4, sheet.max_row + 1, 1): record = WatchingRecord() code1 = str(sheet.cell(row=i, column=3).value) code2 = str(sheet.cell(row=i, column=7).value) if (code1 is None or code2 is None): continue record.code1 = code1 record.code2 = code2 record_list.append(record) for record in record_list: symb1 = record.code1 symb2 = record.code2 if (symb1 is None or symb2 is None or len(symb1) <= 0 or len(symb2) <= 0 or symb1 == "None" or symb2 == "None"): continue _pairs = pairs_main.create_pairs_dataframe( setting.get_input_data_dir(), symb1, symb2) corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2) coint_3m, coint_1y = trade_util.check_cointegration( _pairs, symb1, symb2) symbols_corr_list.append( [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y]) _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = pairs_main.calculate_spread_zscore(_pairs, symb1, symb2) _pairs = _pairs.sort_values('DATE', ascending=False) file_util.write_csv( _pairs, os.path.join(setting.get_watching_list_file_dir(), symb1 + '_' + symb2 + '.csv')) corr_data = sorted(symbols_corr_list, key=itemgetter(3), reverse=True) # sort by 3 month corr corr_data = pd.DataFrame(columns=[ 'SYM_A', 'SYM_B', 'CORR_3M', 'CORR_1Y', 'COINT_3M', 'COINT_1Y' ], data=corr_data) # file_util.write_csv(corr_data, os.path.join(setting.get_result_dir(), corr_result_file_name)) pairs_main.output_report(corr_data, False, setting.get_watching_list_file_dir(), setting.watching_corr_result_file_name) print('Watching List data caculate main end!')
def outputAnalysisCsvFile(dataList): targetDir = os.path.join(analysis_output_csv_file_dir, runDateTime) ft.clean_target_dir(targetDir) ft.write_csv_without_index( dataList, os.path.join(targetDir, 'trade_analysis_' + runDateTime + '.csv')) configFile = open( os.path.join(analysis_output_csv_file_dir, runDateTime, 'config_' + runDateTime + '.txt'), "w") configFile.write(str(vars(config))) configFile.close()
def generate_portfolio_csv_file(caculated_csv_path=caculated_csv_dir, portfolio_csv_path=portfolio_csv_dir): print('generate_portfolio_csv_file start ' + strftime("%Y-%m-%d %H:%M:%S")) ft.clean_target_dir(portfolio_csv_path) file_name_list = ft.getAllTargetSymbols(caculated_csv_path) index1 = 0 for file_name in file_name_list: index1 = index1 + 1 print('Processing {0}/{1}...'.format(index1, len(file_name_list))) _temp = file_name.split('_') pairs_data = ft.read_csv( os.path.join(caculated_csv_path, file_name + '.csv')) pairs_util.signal_generate(pairs_data, _temp[0], _temp[1], portfolio_csv_path) print('generate_portfolio_csv_file end ' + strftime("%Y-%m-%d %H:%M:%S"))
def generate_backtest_report(caculated_csv_path=caculated_csv_dir, portfolio_csv_path=portfolio_csv_dir, backtest_report_path=backtest_report_dir, startdate=start_date, enddate=end_date): print('generate_backtest_report start ' + strftime("%Y-%m-%d %H:%M:%S")) print('start_date: {0} enddate: {1}'.format(startdate, enddate)) ft.clean_target_dir(backtest_report_dir) ft.clean_target_dir(backtest_report_each_day_dir) start_date = datetime.strptime(startdate, '%Y-%m-%d') end_date = datetime.strptime(enddate, '%Y-%m-%d') file_name_list = ft.getAllTargetSymbols(caculated_csv_path) for target_date in tu.date_span(start_date, end_date): print(target_date) weekno = target_date.weekday() if weekno < 5: generate_day_report(target_date, file_name_list) print('generate_backtest_report end ' + strftime("%Y-%m-%d %H:%M:%S"))
def generate_caculated_data_csv(symbols, caculated_csv_path=caculated_csv_dir, startdate=None, enddate=None, mode='insert'): symbol_check_dict = {} if (mode == 'create'): ft.clean_target_dir(caculated_csv_path) else: ft.create_target_dir(caculated_csv_path) index1 = 0 for symb1 in symbols: index1 = index1 + 1 print('Processing {0}/{1} {2}...'.format(index1, len(symbols), symb1)) for symb2 in symbols: if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict or (symb2 + symb1) in symbol_check_dict): continue symbol_check_dict[symb1 + symb2] = '' _pairs = pairs_util.create_pairs_dataframe( setting.get_input_data_dir(), symb1, symb2) if startdate is not None: start_date = datetime.datetime.strftime(startdate, '%Y-%m-%d') _pairs = _pairs[_pairs.index >= start_date] if enddate is not None: end_date = datetime.datetime.strftime(enddate, '%Y-%m-%d') _pairs = _pairs[_pairs.index <= end_date] #_pairs = _pairs[(_pairs.index >= startdate) & (_pairs.index <= enddate)] result_write_csv = os.path.join(caculated_csv_path, symb1 + '_' + symb2 + '.csv') _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = pairs_util.calculate_spread_zscore(_pairs, symb1, symb2) if ft.is_file_exists(result_write_csv): csv_pairs = ft.read_csv(result_write_csv) csv_pairs['DATE'] = pd.to_datetime(csv_pairs['DATE']) csv_pairs = csv_pairs.sort_values('DATE', ascending=True) csv_pairs.index = csv_pairs['DATE'] last_row_date = csv_pairs.tail(1).index # print ('last_row_date {0}'.format(last_row_date)) _pairs = _pairs.combine_first(csv_pairs) result_write_csv = os.path.join(caculated_csv_path, symb1 + '_' + symb2 + '.csv') _pairs = _pairs.loc[:, [ 'OPEN_' + symb1, 'CLOSE_' + symb1, 'OPEN_' + symb2, 'CLOSE_' + symb2, 'saya_divide', 'saya_divide_mean', 'saya_divide_std', 'saya_divide_sigma', 'deviation_rate(%)', 'CORR_3M', 'COINT_3M', 'CORR_1Y', 'COINT_1Y' ]] _pairs = _pairs.sort_values('DATE', ascending=False) set_corr_and_coint(_pairs, symb1, symb2, last_row_date) ft.write_csv(_pairs, result_write_csv) else: _pairs = _pairs.sort_values('DATE', ascending=False) set_corr_and_coint(_pairs, symb1, symb2) ft.write_csv(_pairs, result_write_csv)
def pre_process(): if (config.clear_all_analysis_data): _logger.info("Clear all analysis old data under {}".format( analysis_output_csv_file_dir)) ft.clean_target_dir(analysis_output_csv_file_dir)
def main(targetYear=None, targetMonth=None): print('Dry Run Positin / History Aalysis main start!') file_name = os.path.join(setting.get_root_dir(), excel_file_name) workbook = openpyxl.load_workbook(file_name, data_only=True) sheet = workbook[sheet_name_history] record_list = [] ft.clean_target_dir(setting.get_dryrun_trade_open_dir()) for i in range(4, sheet.max_row + 1, 2): record = TradeRecord() year = sheet.cell(row=i, column=27).value if (year is None): continue record.year = str(year) record.month = str(sheet.cell(row=i, column=28).value) record.sellCode = str(sheet.cell(row=i, column=6).value) record.buyCode = str(sheet.cell(row=i + 1, column=6).value) record_list.append(record) open_position_sheet = workbook[sheet_name_open_position] for i in range(4, open_position_sheet.max_row + 1, 2): if (open_position_sheet.cell(row=i, column=6).value is None): break record = TradeRecord() record.sellCode = str(open_position_sheet.cell(row=i, column=6).value) record.buyCode = str( open_position_sheet.cell(row=i + 1, column=6).value) record.kbn = 'open' record_list.append(record) for record in record_list: symb1 = record.sellCode symb2 = record.buyCode if record.sellCode > record.buyCode: symb1 = record.buyCode symb2 = record.sellCode file_name = symb1 + '_' + symb2 + '.csv' if (record.kbn != 'open'): target_ymd = datetime.datetime(year=int(record.year), month=int(record.month), day=1) ymd_edit = target_ymd + relativedelta(months=2) if (datetime.datetime.now() > ymd_edit): continue if (record.kbn == 'open'): ft.create_target_dir( os.path.join(setting.get_dryrun_trade_dir(), record.kbn)) csv_file_full_path = os.path.join(setting.get_dryrun_trade_dir(), record.kbn, file_name) else: ft.create_target_dir( os.path.join(setting.get_dryrun_trade_history_dir(), record.year)) ft.create_target_dir( os.path.join(setting.get_dryrun_trade_history_dir(), record.year, record.month)) csv_file_full_path = os.path.join( setting.get_dryrun_trade_history_dir(), record.year, record.month, file_name) cacluate_needed_data(symb1, symb2, csv_file_full_path) print('Dry Run Positin / History Aalysis main end!')
def generate_input_stock_data(target_date): target_download_csv_file_path = setting.get_target_download_csv_file_path(target_date) if (file_util.is_file_exists(target_download_csv_file_path)): print('Generating input target stock data start.') target_stock_data_list = file_util.read_csv(setting.get_target_stock_data_list_file_path()) # print(target_stock_data_list) # print(target_stock_data_list['CODE'][0]) download_stock_data =file_util.read_csv(target_download_csv_file_path) columns=['DATE', 'CODE', 'KBN', 'OPEN', 'HIGH', 'LOW', 'CLOSE','Volume'] download_stock_data.columns = columns # print(download_stock_data) # print(download_stock_data.dtypes) file_util.clean_target_dir(setting.get_generated_input_target_stock_data_dir()) for index, data_row in target_stock_data_list.iterrows(): symb = str(data_row['CODE']) print('symb:'+ symb) data_csv_file = os.path.join(setting.get_org_all_stock_data_file_dir(), symb + '.csv') symb_df = file_util.read_csv(data_csv_file) # symb_df['DATE'] = symb_df.to_datetime(symb_df['DATE']) newest_date = symb_df['DATE'][0] # print('newest_date:' + newest_date) searched_data = download_stock_data[download_stock_data['CODE'] == data_row['CODE']] if searched_data.empty: continue # print(searched_data['HIGH']) # print(searched_data.iloc[0]['HIGH']) target_date_time = datetime.strptime(target_date, '%Y%m%d') insert_date = target_date_time.strftime("%Y-%m-%d") # print(searched_data) insert_value = [[insert_date, searched_data.iloc[0]['OPEN'], searched_data.iloc[0]['HIGH'], searched_data.iloc[0]['LOW'], searched_data.iloc[0]['CLOSE'], searched_data.iloc[0]['Volume']]] _tmp_df = pd.DataFrame(data=insert_value, columns=['DATE', 'OPEN', 'HIGH','LOW', 'CLOSE','Volume']) symb_df = pd.concat([_tmp_df, symb_df], sort=True) symb_df = symb_df.loc[:, ['DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'Volume']] symb_df['DATE'] = pd.to_datetime(symb_df['DATE']) symb_df.index = symb_df['DATE'] symb_df = symb_df.drop_duplicates(keep='first') symb_df['OPEN'] = symb_df['OPEN'].astype(str).replace('\.0', '', regex=True) symb_df['HIGH'] = symb_df['HIGH'].astype(str).replace('\.0', '', regex=True) symb_df['LOW'] = symb_df['LOW'].astype(str).replace('\.0', '', regex=True) symb_df['CLOSE'] = symb_df['CLOSE'].astype(str).replace('\.0', '', regex=True) symb_df['Volume'] = symb_df['Volume'].astype(str).replace('\.0', '', regex=True) file_util.write_csv_without_index(symb_df, data_csv_file) target_data_span = target_date_time - relativedelta(years=INPUT_TARGET_DATA_YEAR_SPAN) target_data_span = target_data_span - relativedelta(days=1) # symb_df['DATE'] = pd.to_datetime(symb_df['DATE']) # symb_df.index = symb_df['DATE'] target_data = symb_df[symb_df.index > target_data_span] # print(symb_df.info) save_file_path = os.path.join(setting.get_generated_input_target_stock_data_dir(), symb + '.csv') file_util.write_csv_without_index(target_data, save_file_path) generate_tracking_file(target_date) print('Generating input target stock data end.') else: print('No target download CSV file exists. file=' + target_download_csv_file_path)
def main(args): start_time = datetime.now() print('maint start ' + strftime("%Y-%m-%d %H:%M:%S")) isFastCaculateMode = False if (len(args) >= 2 and (args[1] == 'fast' or args[1] == 'FAST')): print('FAST CACULATE MODE') isFastCaculateMode = True file_util.clean_target_dir(setting.get_result_dir()) # get all target stock symbols symbols = file_util.getAllTargetSymbols(setting.get_input_data_dir()) print('Total symbols size:' + str(len(symbols))) index1 = 0 symbols_corr_list = [] symbol_check_dict = {} if (isFastCaculateMode == True): _pais = file_util.read_csv(setting.get_currenty_report_file()) for index, row in _pais.iterrows(): index1 = index1 + 1 symb1 = str(int(row.SYM_A)) symb2 = str(int(row.SYM_B)) print('Processing {0}/{1} {2} - {3}...'.format( index1, len(_pais), symb1, symb2)) _pairs = create_pairs_dataframe(setting.get_input_data_dir(), symb1, symb2) corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2) coint_3m, coint_1y = trade_util.check_cointegration( _pairs, symb1, symb2) if not is_available_pari_data(_pairs, symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y): continue symbols_corr_list.append( [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y]) _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = calculate_spread_zscore(_pairs, symb1, symb2) _pairs = _pairs.sort_values('DATE', ascending=False) file_util.write_csv( _pairs, os.path.join(setting.get_result_dir(), symb1 + '_' + symb2 + '.csv')) else: for symb1 in symbols: index1 = index1 + 1 print('Processing {0}/{1} {2}...'.format(index1, len(symbols), symb1)) for symb2 in symbols: # index2 =index2+1 # print('Processing {0}/{1}/{2} {3}-{4}...'.format(index2,index1, len(symbols), symb1, symb2)) if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict or (symb2 + symb1) in symbol_check_dict): continue symbol_check_dict[symb1 + symb2] = '' _pairs = create_pairs_dataframe(setting.get_input_data_dir(), symb1, symb2) corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2) coint_3m, coint_1y = trade_util.check_cointegration( _pairs, symb1, symb2) if not is_available_pari_data(_pairs, symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y): continue symbols_corr_list.append( [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y]) _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = calculate_spread_zscore(_pairs, symb1, symb2) _pairs = _pairs.sort_values('DATE', ascending=False) file_util.write_csv( _pairs, os.path.join(setting.get_result_dir(), symb1 + '_' + symb2 + '.csv')) # print(symbols_corr_list) corr_data = sorted(symbols_corr_list, key=itemgetter(3), reverse=True) # sort by 3 month corr corr_data = pd.DataFrame(columns=[ 'SYM_A', 'SYM_B', 'CORR_3M', 'CORR_1Y', 'COINT_3M', 'COINT_1Y' ], data=corr_data) # file_util.write_csv(corr_data, os.path.join(setting.get_result_dir(), corr_result_file_name)) output_report(corr_data, isFastCaculateMode, setting.get_result_dir(), setting.corr_result_file_name) process_time = datetime.now() - start_time print('main end!' + strftime("%Y-%m-%d %H:%M:%S")) print('Time cost:{0}'.format(process_time))