def cacluate_needed_data(symb1, symb2, csv_file_full_path): _pairs = pair_trade.create_pairs_dataframe(setting.get_input_data_dir(), symb1, symb2) _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = pair_trade.calculate_spread_zscore(_pairs, symb1, symb2) if ft.is_file_exists(csv_file_full_path): csv_pairs = ft.read_csv(csv_file_full_path) csv_pairs['DATE'] = pd.to_datetime(csv_pairs['DATE']) csv_pairs = csv_pairs.sort_values('DATE', ascending=True) csv_pairs.index = csv_pairs['DATE'] last_row_date = csv_pairs.tail(1).index # print ('last_row_date {0}'.format(last_row_date)) _pairs = _pairs.combine_first(csv_pairs) _pairs = _pairs.loc[:, [ 'OPEN_' + symb1, 'CLOSE_' + symb1, 'OPEN_' + symb2, 'CLOSE_' + symb2, 'saya_divide', 'saya_divide_mean', 'saya_divide_std', 'saya_divide_sigma', 'deviation_rate(%)', 'CORR_3M', 'COINT_3M', 'CORR_1Y', 'COINT_1Y' ]] _pairs = _pairs.sort_values('DATE', ascending=False) pair_back_test.set_corr_and_coint(_pairs, symb1, symb2, last_row_date) ft.write_csv(_pairs, csv_file_full_path) else: _pairs = _pairs.sort_values('DATE', ascending=False) pair_back_test.set_corr_and_coint(_pairs, symb1, symb2) ft.write_csv(_pairs, csv_file_full_path)
def generate_list(): symbols = file_util.getAllTargetSymbols(setting.get_input_data_dir()) df = pd.DataFrame(data=symbols, columns=["CODE"]) save_file = setting.get_target_stock_data_list_file_path() file_util.write_csv_without_index(df, save_file)
def main(targetYear=None, targetMonth=None): print('Watching List data caculate main start!') file_name = os.path.join(setting.get_root_dir(), excel_file_name) workbook = openpyxl.load_workbook(file_name, data_only=True) sheet = workbook[sheet_name_Watching_Input] record_list = [] symbols_corr_list = [] ft.clean_target_dir(os.path.join(setting.get_watching_list_file_dir())) for i in range(4, sheet.max_row + 1, 1): record = WatchingRecord() code1 = str(sheet.cell(row=i, column=3).value) code2 = str(sheet.cell(row=i, column=7).value) if (code1 is None or code2 is None): continue record.code1 = code1 record.code2 = code2 record_list.append(record) for record in record_list: symb1 = record.code1 symb2 = record.code2 if (symb1 is None or symb2 is None or len(symb1) <= 0 or len(symb2) <= 0 or symb1 == "None" or symb2 == "None"): continue _pairs = pairs_main.create_pairs_dataframe( setting.get_input_data_dir(), symb1, symb2) corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2) coint_3m, coint_1y = trade_util.check_cointegration( _pairs, symb1, symb2) symbols_corr_list.append( [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y]) _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = pairs_main.calculate_spread_zscore(_pairs, symb1, symb2) _pairs = _pairs.sort_values('DATE', ascending=False) file_util.write_csv( _pairs, os.path.join(setting.get_watching_list_file_dir(), symb1 + '_' + symb2 + '.csv')) corr_data = sorted(symbols_corr_list, key=itemgetter(3), reverse=True) # sort by 3 month corr corr_data = pd.DataFrame(columns=[ 'SYM_A', 'SYM_B', 'CORR_3M', 'CORR_1Y', 'COINT_3M', 'COINT_1Y' ], data=corr_data) # file_util.write_csv(corr_data, os.path.join(setting.get_result_dir(), corr_result_file_name)) pairs_main.output_report(corr_data, False, setting.get_watching_list_file_dir(), setting.watching_corr_result_file_name) print('Watching List data caculate main end!')
def generate_caculated_data_csv(symbols, caculated_csv_path=caculated_csv_dir, startdate=None, enddate=None, mode='insert'): symbol_check_dict = {} if (mode == 'create'): ft.clean_target_dir(caculated_csv_path) else: ft.create_target_dir(caculated_csv_path) index1 = 0 for symb1 in symbols: index1 = index1 + 1 print('Processing {0}/{1} {2}...'.format(index1, len(symbols), symb1)) for symb2 in symbols: if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict or (symb2 + symb1) in symbol_check_dict): continue symbol_check_dict[symb1 + symb2] = '' _pairs = pairs_util.create_pairs_dataframe( setting.get_input_data_dir(), symb1, symb2) if startdate is not None: start_date = datetime.datetime.strftime(startdate, '%Y-%m-%d') _pairs = _pairs[_pairs.index >= start_date] if enddate is not None: end_date = datetime.datetime.strftime(enddate, '%Y-%m-%d') _pairs = _pairs[_pairs.index <= end_date] #_pairs = _pairs[(_pairs.index >= startdate) & (_pairs.index <= enddate)] result_write_csv = os.path.join(caculated_csv_path, symb1 + '_' + symb2 + '.csv') _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = pairs_util.calculate_spread_zscore(_pairs, symb1, symb2) if ft.is_file_exists(result_write_csv): csv_pairs = ft.read_csv(result_write_csv) csv_pairs['DATE'] = pd.to_datetime(csv_pairs['DATE']) csv_pairs = csv_pairs.sort_values('DATE', ascending=True) csv_pairs.index = csv_pairs['DATE'] last_row_date = csv_pairs.tail(1).index # print ('last_row_date {0}'.format(last_row_date)) _pairs = _pairs.combine_first(csv_pairs) result_write_csv = os.path.join(caculated_csv_path, symb1 + '_' + symb2 + '.csv') _pairs = _pairs.loc[:, [ 'OPEN_' + symb1, 'CLOSE_' + symb1, 'OPEN_' + symb2, 'CLOSE_' + symb2, 'saya_divide', 'saya_divide_mean', 'saya_divide_std', 'saya_divide_sigma', 'deviation_rate(%)', 'CORR_3M', 'COINT_3M', 'CORR_1Y', 'COINT_1Y' ]] _pairs = _pairs.sort_values('DATE', ascending=False) set_corr_and_coint(_pairs, symb1, symb2, last_row_date) ft.write_csv(_pairs, result_write_csv) else: _pairs = _pairs.sort_values('DATE', ascending=False) set_corr_and_coint(_pairs, symb1, symb2) ft.write_csv(_pairs, result_write_csv)
def main(args): start_time = datetime.now() print('maint start ' + strftime("%Y-%m-%d %H:%M:%S")) isFastCaculateMode = False if (len(args) >= 2 and (args[1] == 'fast' or args[1] == 'FAST')): print('FAST CACULATE MODE') isFastCaculateMode = True file_util.clean_target_dir(setting.get_result_dir()) # get all target stock symbols symbols = file_util.getAllTargetSymbols(setting.get_input_data_dir()) print('Total symbols size:' + str(len(symbols))) index1 = 0 symbols_corr_list = [] symbol_check_dict = {} if (isFastCaculateMode == True): _pais = file_util.read_csv(setting.get_currenty_report_file()) for index, row in _pais.iterrows(): index1 = index1 + 1 symb1 = str(int(row.SYM_A)) symb2 = str(int(row.SYM_B)) print('Processing {0}/{1} {2} - {3}...'.format( index1, len(_pais), symb1, symb2)) _pairs = create_pairs_dataframe(setting.get_input_data_dir(), symb1, symb2) corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2) coint_3m, coint_1y = trade_util.check_cointegration( _pairs, symb1, symb2) if not is_available_pari_data(_pairs, symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y): continue symbols_corr_list.append( [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y]) _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = calculate_spread_zscore(_pairs, symb1, symb2) _pairs = _pairs.sort_values('DATE', ascending=False) file_util.write_csv( _pairs, os.path.join(setting.get_result_dir(), symb1 + '_' + symb2 + '.csv')) else: for symb1 in symbols: index1 = index1 + 1 print('Processing {0}/{1} {2}...'.format(index1, len(symbols), symb1)) for symb2 in symbols: # index2 =index2+1 # print('Processing {0}/{1}/{2} {3}-{4}...'.format(index2,index1, len(symbols), symb1, symb2)) if (symb1 == symb2 or (symb1 + symb2) in symbol_check_dict or (symb2 + symb1) in symbol_check_dict): continue symbol_check_dict[symb1 + symb2] = '' _pairs = create_pairs_dataframe(setting.get_input_data_dir(), symb1, symb2) corr_3m, corr_1y = trade_util.check_corr(_pairs, symb1, symb2) coint_3m, coint_1y = trade_util.check_cointegration( _pairs, symb1, symb2) if not is_available_pari_data(_pairs, symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y): continue symbols_corr_list.append( [symb1, symb2, corr_3m, corr_1y, coint_3m, coint_1y]) _pairs = _pairs.sort_values('DATE', ascending=True) _pairs = calculate_spread_zscore(_pairs, symb1, symb2) _pairs = _pairs.sort_values('DATE', ascending=False) file_util.write_csv( _pairs, os.path.join(setting.get_result_dir(), symb1 + '_' + symb2 + '.csv')) # print(symbols_corr_list) corr_data = sorted(symbols_corr_list, key=itemgetter(3), reverse=True) # sort by 3 month corr corr_data = pd.DataFrame(columns=[ 'SYM_A', 'SYM_B', 'CORR_3M', 'CORR_1Y', 'COINT_3M', 'COINT_1Y' ], data=corr_data) # file_util.write_csv(corr_data, os.path.join(setting.get_result_dir(), corr_result_file_name)) output_report(corr_data, isFastCaculateMode, setting.get_result_dir(), setting.corr_result_file_name) process_time = datetime.now() - start_time print('main end!' + strftime("%Y-%m-%d %H:%M:%S")) print('Time cost:{0}'.format(process_time))