def get_net_data(BEG, END): beg_idx, end_idx = env.get_data_idxs_range(BEG, END) raw_dates = env.get_raw_dates(beg_idx, end_idx) input = env.get_input(beg_idx, end_idx) px = env.get_adj_close_px(beg_idx, end_idx) px_pred_hor = env.get_adj_close_px( beg_idx + get_config().PRED_HORIZON, end_idx + get_config().PRED_HORIZON) tradeable_mask = env.get_tradeable_mask(beg_idx, end_idx) port_mask = env.get_portfolio_mask(beg_idx, end_idx) ds_sz = px_pred_hor.shape[1] raw_dates = raw_dates[:ds_sz] raw_week_days = np.full(raw_dates.shape, 0, dtype=np.int32) for i in range(raw_dates.shape[0]): date = date_from_timestamp(raw_dates[i]) raw_week_days[i] = date.isoweekday() input = input[:, :ds_sz, :] tradeable_mask = tradeable_mask[:, :ds_sz] port_mask = port_mask[:, :ds_sz] px = px[:, :ds_sz] labels = (px_pred_hor - px) / px batch_num = get_batches_num(ds_sz, get_config().BPTT_STEPS) return beg_idx, ds_sz, batch_num, raw_dates, raw_week_days, tradeable_mask, port_mask, px, input, labels
def get_portfolio_mask(self, BEG_DATA_IDX, END_DATA_IDX): if get_config().is_snp_index() or get_config().is_all_stocks_index( ) or get_config().is_universal_net(): return self.tradable_mask[:, BEG_DATA_IDX:END_DATA_IDX + 1] & self.snp_mask[:, BEG_DATA_IDX: END_DATA_IDX + 1] else: return self.get_tradeable_mask(BEG_DATA_IDX, END_DATA_IDX)
def create_folders(): if not os.path.exists(get_config().WEIGHTS_FOLDER_PATH): os.makedirs(get_config().WEIGHTS_FOLDER_PATH) if not os.path.exists(get_config().TRAIN_FIG_PATH): os.makedirs(get_config().TRAIN_FIG_PATH) if not os.path.exists(get_config().TEST_FIG_PATH): os.makedirs(get_config().TEST_FIG_PATH)
def eval(): nonlocal raw_dates, beg_data_idx, ds_size, batch_num, input, labels, px, mask, port_mask, eq, pred_hist, state curr_progress = 0 cash = 1 pos = np.zeros((total_tickers)) pos_px = np.zeros((total_tickers)) losses = np.zeros((batch_num)) for b in range(batch_num): if state is None: state = net.zero_state(total_tickers) b_b_i, b_e_i = get_batch_range(b) _input = input[:, b_b_i:b_e_i, :] _labels = labels[:, b_b_i:b_e_i] _mask = mask[:, b_b_i:b_e_i] state, loss, predictions = net.eval( state, _input, _labels, _mask.astype(np.float32)) pred_hist[:, b_b_i:b_e_i] = predictions[:, :, 0] for i in range(predictions.shape[1]): data_idx = b * get_config().BPTT_STEPS + i curr_px = px[:, data_idx] global_data_idx = beg_data_idx + data_idx date = datetime.datetime.fromtimestamp( raw_dates[data_idx]).date() open_pos = False close_pos = False if get_config().REBALANCE_FRI: if date.isoweekday() == 5: open_pos = True if date.isoweekday() == 5: close_pos = True else: if data_idx % get_config().REBALANCE_FREQ == 0: close_pos = True open_pos = True if close_pos: rpl = np.sum(pos * (curr_px - pos_px)) cash += rpl pos[:] = 0 if open_pos: pos_px = curr_px pos_mask = port_mask[:, data_idx] num_stks = np.sum(pos_mask) if get_config().CAPM: exp, cov = env.get_exp_and_cov( pos_mask, global_data_idx - get_config().COVARIANCE_LENGTH + 1, global_data_idx) exp = get_config().REBALANCE_FREQ * exp cov = get_config().REBALANCE_FREQ * get_config( ).REBALANCE_FREQ * cov if get_config().CAPM_USE_NET_PREDICTIONS: exp = predictions[:, i, 0][pos_mask] capm = Capm(num_stks) capm.init() best_sharpe = None best_weights = None best_constriant = None while i <= 10000: w, sharpe, constraint = capm.get_params( exp, cov) # print("Iteration: %d Sharpe: %.2f Constraint: %.6f" % (i, sharpe, constraint)) if w is None: break if best_sharpe is None or sharpe >= best_sharpe: best_weights = w best_sharpe = sharpe best_constriant = constraint capm.fit(exp, cov) capm.rescale_weights() i += 1 date = datetime.datetime.fromtimestamp( raw_dates[data_idx]).date() print( "Date: %s sharpe: %.2f constraint: %.6f" % (date.strftime('%Y-%m-%d'), best_sharpe, best_constriant)) pos[pos_mask] = best_weights / curr_px[pos_mask] else: pos[pos_mask] = 1 / num_stks / curr_px[ pos_mask] * np.sign(predictions[pos_mask, i, 0]) urpl = np.sum(pos * (curr_px - pos_px)) nlv = cash + urpl eq[data_idx] = nlv losses[b] = loss curr_progress = progress.print_progress( curr_progress, b, tr_batch_num) progress.print_progess_end() avg_loss = np.mean(np.sqrt(losses)) return avg_loss
def get_batch_range(b): return b * get_config().BPTT_STEPS, (b + 1) * get_config().BPTT_STEPS
import csv import os.path import pandas as pd import datetime from portfolio.net_shiva import NetShiva from portfolio.multi_stock_config import get_config, Mode from portfolio.stat import print_alloc, get_draw_down, get_sharpe_ratio, get_capital, get_avg_yeat_ret from portfolio.graphs import plot_equity_curve, show_plots, create_time_serie_fig, plot_time_serie import progress import matplotlib.pyplot as plt from portfolio.capm import Capm from portfolio.multi_stock_env import Env, date_from_timestamp if get_config().MODE == Mode.TRAIN: plt.ioff() def create_folders(): if not os.path.exists(get_config().WEIGHTS_FOLDER_PATH): os.makedirs(get_config().WEIGHTS_FOLDER_PATH) if not os.path.exists(get_config().TRAIN_FIG_PATH): os.makedirs(get_config().TRAIN_FIG_PATH) if not os.path.exists(get_config().TEST_FIG_PATH): os.makedirs(get_config().TEST_FIG_PATH) def get_batches_num(ds_sz, bptt_steps): return ds_sz // bptt_steps + (0 if ds_sz % bptt_steps == 0 else 1)
def idx_to_date(idx): return get_config().TRAIN_BEG + datetime.timedelta(days=idx)
# stocks = get_snp_tickers() # 2015-10-30 # stocks = ['HPQ'] # stocks = ['BAC'] def create_folders(): if not os.path.exists(get_config().DATA_FOLDER_PATH): os.makedirs(get_config().DATA_FOLDER_PATH) net = NetShiva() for stock in stocks: print("Processing %s stock" % stock) get_config().TICKER = stock get_config().reload() _tickers = [stock] create_folders() download_data(_tickers, get_config().DATA_PATH, get_config().HIST_BEG, get_config().HIST_END) preprocess_data(_tickers, get_config().DATA_PATH, get_config().HIST_BEG, get_config().HIST_END, get_config().DATA_NPZ_PATH, get_config().DATA_FEATURES) train(net)
def is_train(): return get_config().MODE == Mode.TRAIN
df['date'] = df['ts'].apply(date_from_timestamp) predications.append(df) except: pass os.chdir(pwd) pred_df = pd.concat(predications, ignore_index=True) pred_df_length = len(pred_df['date']) # add stock idx column pred_df['stk_idx'] = pd.Series(np.zeros((pred_df_length), dtype=np.int32), index=pred_df.index) get_config().TICKER = 'UNIVERSAL_NET' get_config().reload() env = Env() total_stocks = len(env.tickers) # update stk_idx column for all records for stock in stocks: stk_idx = env._ticker_to_idx(stock) pred_df.loc[pred_df['ticker'] == stock, 'stk_idx'] = stk_idx beg_idx, end_idx = env.get_data_idxs_range(get_config().TRAIN_BEG, get_config().TRAIN_END) trading_days = end_idx + 1 - beg_idx adj_px = env.get_adj_close_px(beg_idx, end_idx) tradable_maks = env.get_portfolio_mask(beg_idx, end_idx) # tradable_maks = env.get_tradeable_mask(beg_idx, end_idx)
'XOM', 'XRAY', 'XRX', 'XYL', 'YUM', 'ZBH', 'ZION', 'ZTS', 'Q', 'BHF', 'RMD', 'PKG', 'MGM', ] get_config().PREDICTION_MODE = True def create_folders(): if not os.path.exists(get_config().DATA_FOLDER_PATH): os.makedirs(get_config().DATA_FOLDER_PATH) ticker_exch_map = get_snp_tickers_exch_map() def get_net_data(env, BEG, END): beg_idx, end_idx = env.get_data_idxs_range(BEG, END) raw_dates = env.get_raw_dates(beg_idx, end_idx) input = env.get_input(beg_idx, end_idx)
def get_adj_close_px(self, BEG_DATA_IDX, END_DATA_IDX): return self.raw_data[:, BEG_DATA_IDX:END_DATA_IDX + 1, get_config().ADJ_CLOSE_DATA_IDX]
def __init__(self): print('loading data...') tickers, raw_dt, raw_data = load_npz_data(get_config().DATA_NPZ_PATH) # tickers, raw_dt, raw_data = load_npz_data('data/stocks/DIS/DIS.npz') print('data load complete') self._tickers = tickers self.stks = self.tickers.shape[0] days = raw_dt.shape[0] def _idx_to_date(idx): return datetime.datetime.fromtimestamp(raw_dt[idx]).date() # calc data dates range HIST_BEG = _idx_to_date(0) HIST_END = _idx_to_date(-1) def _date_to_idx(date): if HIST_BEG <= date <= HIST_END: return (date - HIST_BEG).days return None # calc snp_mask snp_mask = np.full((self.stks, days), False) snp_mask_df = pd.read_csv('data/snp/snp_mask.csv') for idx, row in snp_mask_df.iterrows(): _from = datetime.datetime.strptime(row['from'], '%Y-%m-%d').date() _to = datetime.datetime.strptime(row['to'], '%Y-%m-%d').date() _ticker = row['ticker'] stk_idx = self._ticker_to_idx(_ticker) if stk_idx is None: continue _from = max(_from, HIST_BEG) _from = min(_from, HIST_END) _to = min(_to, HIST_END) _to = max(_to, HIST_BEG) _from_idx = _date_to_idx(_from) _to_idx = _date_to_idx(_to) snp_mask[stk_idx, _from_idx:_to_idx + 1] = True # calc tradable_mask, traded_stocks_per_day, trading_day_mask tradable_mask = np.all(raw_data > 0.0, axis=2) traded_stocks_per_day = tradable_mask[:, :].sum(0) trading_day_mask = traded_stocks_per_day >= get_config( ).MIN_STOCKS_TRADABLE_PER_TRADING_DAY # trading_day_mask = traded_stocks_per_day > 0 self.trading_days = np.sum(trading_day_mask) # leave tradeable days only self.raw_dt = raw_dt[trading_day_mask] self.raw_data = raw_data[:, trading_day_mask, :] self.traded_stocks_per_day = traded_stocks_per_day[trading_day_mask] self.tradable_mask = tradable_mask[:, trading_day_mask] self.snp_mask = snp_mask[:, trading_day_mask] # prepare input array input = np.zeros((self.stks, self.trading_days, 6)) # fill array for each stock for stk_idx in range(self.stks): stk_raw_data = self.raw_data[stk_idx, :, :] tradable_mask = self.tradable_mask[stk_idx] # dirty hack: fill prices with first known px tr_days_idxs = np.nonzero(tradable_mask)[0] first_adj_volume = None first_volume = None if tr_days_idxs.shape[0] > 0: first_adj_close_px = stk_raw_data[ tr_days_idxs[0], get_config().ADJ_CLOSE_DATA_IDX] first_adj_volume = stk_raw_data[ tr_days_idxs[0], get_config().ADJ_VOLUME_DATA_IDX] if first_adj_close_px is None: stk_raw_data[:, :] = 1 else: last_px = first_adj_close_px last_volume = first_adj_volume for day_idx in range(self.trading_days): if tradable_mask[day_idx] == 0: stk_raw_data[day_idx, get_config().ADJ_CLOSE_DATA_IDX] = last_px stk_raw_data[day_idx, get_config().ADJ_OPEN_DATA_IDX] = last_px stk_raw_data[day_idx, get_config().ADJ_HIGH_DATA_IDX] = last_px stk_raw_data[day_idx, get_config().ADJ_LOW_DATA_IDX] = last_px stk_raw_data[ day_idx, get_config().ADJ_VOLUME_DATA_IDX] = last_volume else: last_px = stk_raw_data[day_idx, get_config().ADJ_CLOSE_DATA_IDX] last_volume = stk_raw_data[ day_idx, get_config().ADJ_VOLUME_DATA_IDX] self.raw_data[stk_idx, :, :] = stk_raw_data stk_data = stk_raw_data[tradable_mask, :] a_o = stk_data[:, get_config().ADJ_OPEN_DATA_IDX] a_c = stk_data[:, get_config().ADJ_CLOSE_DATA_IDX] a_h = stk_data[:, get_config().ADJ_HIGH_DATA_IDX] a_l = stk_data[:, get_config().ADJ_LOW_DATA_IDX] a_v = stk_data[:, get_config().ADJ_VOLUME_DATA_IDX] if a_c.shape[0] == 0: continue prev_a_c = np.roll(a_c, 1) prev_a_c[0] = a_c[0] prev_a_v = np.roll(a_v, 1) prev_a_v[0] = a_v[0] x_o = (a_o - prev_a_c) / prev_a_c x_c = (a_c - prev_a_c) / prev_a_c x_h = (a_h - prev_a_c) / prev_a_c x_l = (a_l - prev_a_c) / prev_a_c x_v = (a_v - prev_a_v) / prev_a_v input[stk_idx, tradable_mask, 0] = x_o input[stk_idx, tradable_mask, 1] = x_c input[stk_idx, tradable_mask, 2] = x_h input[stk_idx, tradable_mask, 3] = x_l input[stk_idx, tradable_mask, 4] = x_v input[stk_idx, tradable_mask, 5] = 1 self.input = input
def date_to_idx(date): if get_config().TRAIN_BEG <= date <= get_config().TRAIN_END: return (date - get_config().TRAIN_BEG).days return None
def train(net): create_folders() env = Env() # net = NetShiva() if not os.path.exists(get_config().TRAIN_STAT_PATH): with open(get_config().TRAIN_STAT_PATH, 'a', newline='') as f: writer = csv.writer(f) writer.writerow( ('epoch', 'tr loss', 'tr dd', 'tr sharpe', 'tr y avg', 'tst loss', 'tst dd', 'tst sharpe', 'tst y avg')) total_tickers = len(env.tickers) def open_train_stat_file(): return open(get_config().TRAIN_STAT_PATH, 'a', newline='') def is_train(): return get_config().MODE == Mode.TRAIN with open_train_stat_file() if is_train() else dummy_context_mgr() as f: if is_train(): writer = csv.writer(f) if get_config().EPOCH_WEIGHTS_TO_LOAD != 0: net.load_weights(get_config().WEIGHTS_PATH, get_config().EPOCH_WEIGHTS_TO_LOAD) epoch = get_config().EPOCH_WEIGHTS_TO_LOAD if is_train(): epoch += 1 else: net.init() epoch = 0 def get_net_data(BEG, END): beg_idx, end_idx = env.get_data_idxs_range(BEG, END) raw_dates = env.get_raw_dates(beg_idx, end_idx) input = env.get_input(beg_idx, end_idx) px = env.get_adj_close_px(beg_idx, end_idx) px_pred_hor = env.get_adj_close_px( beg_idx + get_config().PRED_HORIZON, end_idx + get_config().PRED_HORIZON) tradeable_mask = env.get_tradeable_mask(beg_idx, end_idx) port_mask = env.get_portfolio_mask(beg_idx, end_idx) ds_sz = px_pred_hor.shape[1] raw_dates = raw_dates[:ds_sz] raw_week_days = np.full(raw_dates.shape, 0, dtype=np.int32) for i in range(raw_dates.shape[0]): date = date_from_timestamp(raw_dates[i]) raw_week_days[i] = date.isoweekday() input = input[:, :ds_sz, :] tradeable_mask = tradeable_mask[:, :ds_sz] port_mask = port_mask[:, :ds_sz] px = px[:, :ds_sz] labels = (px_pred_hor - px) / px batch_num = get_batches_num(ds_sz, get_config().BPTT_STEPS) return beg_idx, ds_sz, batch_num, raw_dates, raw_week_days, tradeable_mask, port_mask, px, input, labels tr_beg_data_idx, tr_ds_sz, tr_batch_num, tr_raw_dates, tr_week_days, tr_tradeable_mask, tr_port_mask, tr_px, tr_input, tr_labels = get_net_data( get_config().TRAIN_BEG, get_config().TRAIN_END) tr_eq = np.zeros((tr_ds_sz)) tr_pred = np.zeros((total_tickers, tr_ds_sz)) if get_config().TEST: tst_beg_data_idx, tst_ds_sz, tst_batch_num, tst_raw_dates, tst_week_days, tst_tradeable_mask, tst_port_mask, tst_px, tst_input, tst_labels = get_net_data( get_config().TEST_BEG, get_config().TEST_END) tst_eq = np.zeros((tst_ds_sz)) tst_pred = np.zeros((total_tickers, tst_ds_sz)) def get_batch_range(b): return b * get_config().BPTT_STEPS, (b + 1) * get_config().BPTT_STEPS while epoch <= get_config().MAX_EPOCH: print("Eval %d epoch on train set..." % epoch) batch_num = tr_batch_num ds_size = tr_ds_sz input = tr_input labels = tr_labels px = tr_px mask = tr_tradeable_mask port_mask = tr_port_mask eq = tr_eq beg_data_idx = tr_beg_data_idx raw_dates = tr_raw_dates pred_hist = tr_pred state = None def eval_with_state_reset(): nonlocal raw_dates, beg_data_idx, ds_size, batch_num, input, labels, px, mask, port_mask, eq, pred_hist, state curr_progress = 0 cash = 1 pos = np.zeros((total_tickers)) pos_px = np.zeros((total_tickers)) losses = np.zeros((batch_num)) for i in range(ds_size): state = net.zero_state(total_tickers) b_b_i = max(0, i + 1 - get_config().RESET_HIDDEN_STATE_FREQ) b_e_i = i + 1 _input = input[:, b_b_i:b_e_i, :] _labels = labels[:, b_b_i:b_e_i] _mask = mask[:, b_b_i:b_e_i] state, loss, predictions = net.eval( state, _input, _labels, _mask.astype(np.float32)) pred_hist[:, i] = predictions[:, -1, 0] data_idx = i curr_px = px[:, data_idx] global_data_idx = beg_data_idx + data_idx date = datetime.datetime.fromtimestamp( raw_dates[data_idx]).date() open_pos = False close_pos = False if get_config().REBALANCE_FRI: if date.isoweekday() == 5: open_pos = True if date.isoweekday() == 5: close_pos = True else: if data_idx % get_config().REBALANCE_FREQ == 0: close_pos = True open_pos = True if close_pos: rpl = np.sum(pos * (curr_px - pos_px)) cash += rpl pos[:] = 0 if open_pos: pos_px = curr_px pos_mask = port_mask[:, data_idx] num_stks = np.sum(pos_mask) if get_config().CAPM: exp, cov = env.get_exp_and_cov( pos_mask, global_data_idx - get_config().COVARIANCE_LENGTH + 1, global_data_idx) exp = get_config().REBALANCE_FREQ * exp cov = get_config().REBALANCE_FREQ * get_config( ).REBALANCE_FREQ * cov if get_config().CAPM_USE_NET_PREDICTIONS: exp = predictions[:, i, 0][pos_mask] capm = Capm(num_stks) capm.init() best_sharpe = None best_weights = None best_constriant = None while i <= 10000: w, sharpe, constraint = capm.get_params( exp, cov) # print("Iteration: %d Sharpe: %.2f Constraint: %.6f" % (i, sharpe, constraint)) if w is None: break if best_sharpe is None or sharpe >= best_sharpe: best_weights = w best_sharpe = sharpe best_constriant = constraint capm.fit(exp, cov) capm.rescale_weights() i += 1 date = datetime.datetime.fromtimestamp( raw_dates[data_idx]).date() print("Date: %s sharpe: %.2f constraint: %.6f" % (date.strftime('%Y-%m-%d'), best_sharpe, best_constriant)) pos[pos_mask] = best_weights / curr_px[pos_mask] else: pos[pos_mask] = 1 / num_stks / curr_px[ pos_mask] * np.sign(predictions[pos_mask, -1, 0]) urpl = np.sum(pos * (curr_px - pos_px)) nlv = cash + urpl eq[data_idx] = nlv curr_progress = progress.print_progress( curr_progress, i, ds_size) progress.print_progess_end() avg_loss = np.mean(np.sqrt(losses)) return avg_loss def eval(): nonlocal raw_dates, beg_data_idx, ds_size, batch_num, input, labels, px, mask, port_mask, eq, pred_hist, state curr_progress = 0 cash = 1 pos = np.zeros((total_tickers)) pos_px = np.zeros((total_tickers)) losses = np.zeros((batch_num)) for b in range(batch_num): if state is None: state = net.zero_state(total_tickers) b_b_i, b_e_i = get_batch_range(b) _input = input[:, b_b_i:b_e_i, :] _labels = labels[:, b_b_i:b_e_i] _mask = mask[:, b_b_i:b_e_i] state, loss, predictions = net.eval( state, _input, _labels, _mask.astype(np.float32)) pred_hist[:, b_b_i:b_e_i] = predictions[:, :, 0] for i in range(predictions.shape[1]): data_idx = b * get_config().BPTT_STEPS + i curr_px = px[:, data_idx] global_data_idx = beg_data_idx + data_idx date = datetime.datetime.fromtimestamp( raw_dates[data_idx]).date() open_pos = False close_pos = False if get_config().REBALANCE_FRI: if date.isoweekday() == 5: open_pos = True if date.isoweekday() == 5: close_pos = True else: if data_idx % get_config().REBALANCE_FREQ == 0: close_pos = True open_pos = True if close_pos: rpl = np.sum(pos * (curr_px - pos_px)) cash += rpl pos[:] = 0 if open_pos: pos_px = curr_px pos_mask = port_mask[:, data_idx] num_stks = np.sum(pos_mask) if get_config().CAPM: exp, cov = env.get_exp_and_cov( pos_mask, global_data_idx - get_config().COVARIANCE_LENGTH + 1, global_data_idx) exp = get_config().REBALANCE_FREQ * exp cov = get_config().REBALANCE_FREQ * get_config( ).REBALANCE_FREQ * cov if get_config().CAPM_USE_NET_PREDICTIONS: exp = predictions[:, i, 0][pos_mask] capm = Capm(num_stks) capm.init() best_sharpe = None best_weights = None best_constriant = None while i <= 10000: w, sharpe, constraint = capm.get_params( exp, cov) # print("Iteration: %d Sharpe: %.2f Constraint: %.6f" % (i, sharpe, constraint)) if w is None: break if best_sharpe is None or sharpe >= best_sharpe: best_weights = w best_sharpe = sharpe best_constriant = constraint capm.fit(exp, cov) capm.rescale_weights() i += 1 date = datetime.datetime.fromtimestamp( raw_dates[data_idx]).date() print( "Date: %s sharpe: %.2f constraint: %.6f" % (date.strftime('%Y-%m-%d'), best_sharpe, best_constriant)) pos[pos_mask] = best_weights / curr_px[pos_mask] else: pos[pos_mask] = 1 / num_stks / curr_px[ pos_mask] * np.sign(predictions[pos_mask, i, 0]) urpl = np.sum(pos * (curr_px - pos_px)) nlv = cash + urpl eq[data_idx] = nlv losses[b] = loss curr_progress = progress.print_progress( curr_progress, b, tr_batch_num) progress.print_progess_end() avg_loss = np.mean(np.sqrt(losses)) return avg_loss if get_config().RESET_HIDDEN_STATE_FREQ == 0: tr_avg_loss = eval() else: tr_avg_loss = eval_with_state_reset() print("Train loss: %.4f%%" % (tr_avg_loss * 100)) if get_config().TEST: print("Eval %d epoch on test set..." % epoch) batch_num = tst_batch_num ds_size = tr_ds_sz input = tst_input labels = tst_labels px = tst_px mask = tst_tradeable_mask port_mask = tst_port_mask eq = tst_eq beg_data_idx = tst_beg_data_idx raw_dates = tst_raw_dates pred_hist = tst_pred state = None if get_config().RESET_HIDDEN_STATE_FREQ == 0: tst_avg_loss = eval() else: tst_avg_loss = eval_with_state_reset() print("Test loss: %.4f%%" % (tst_avg_loss * 100)) if not is_train(): dt = build_time_axis(tr_raw_dates) if not get_config().HIDE_PLOTS: plot_eq('Train', get_config().TRAIN_BEG, get_config().TRAIN_END, dt, tr_eq) result = pd.DataFrame(columns=('date', 'ticker', 'prediction')) for i in range(tr_raw_dates.shape[0]): date = dt[i] for j in range(total_tickers): ticker = env._idx_to_ticker(j) prediction = tr_pred[j, i] row = [date, ticker, prediction] result.loc[i * total_tickers + j] = row result.to_csv(get_config().TRAIN_PRED_PATH, index=False) if get_config().TEST: dt = build_time_axis(tst_raw_dates) if not get_config().HIDE_PLOTS: plot_eq('Test', get_config().TEST_BEG, get_config().TEST_END, dt, tst_eq) if not get_config().HIDE_PLOTS: show_plots() break if is_train() and epoch <= get_config().MAX_EPOCH: # plot and save graphs dt = build_time_axis(tr_raw_dates) fig, tr_dd, tr_sharpe, tr_y_avg = plot_eq( 'Train', get_config().TRAIN_BEG, get_config().TRAIN_END, dt, tr_eq) fig.savefig('%s/%04d.png' % (get_config().TRAIN_FIG_PATH, epoch)) plt.close(fig) if get_config().TEST: dt = build_time_axis(tst_raw_dates) fig, tst_dd, tst_sharpe, tst_y_avg = plot_eq( 'Test', get_config().TEST_BEG, get_config().TEST_END, dt, tst_eq) fig.savefig('%s/%04d.png' % (get_config().TEST_FIG_PATH, epoch)) plt.close(fig) else: tst_avg_loss = 0 tst_dd = 0 tst_sharpe = 0 tst_y_avg = 0 writer.writerow(( epoch, tr_avg_loss, tr_dd, tr_sharpe, tr_y_avg, tst_avg_loss, tst_dd, tst_sharpe, tst_y_avg, )) f.flush() if epoch == get_config().MAX_EPOCH: tr_df = pd.DataFrame({'date': dt, 'capital': tr_eq[:]}) tr_df.to_csv(get_config().TRAIN_EQ_PATH, index=False) if get_config().TEST: tst_df = pd.DataFrame({ 'date': dt, 'capital': tst_eq[:] }) tst_df.to_csv(get_config().TEST_EQ_PATH, index=False) epoch += 1 if epoch > get_config().MAX_EPOCH: break print("Training %d epoch..." % epoch) curr_progress = 0 state = None for b in range(tr_batch_num): if state is None: state = net.zero_state(total_tickers) b_b_i, b_e_i = get_batch_range(b) _input = tr_input[:, b_b_i:b_e_i, :] _labels = tr_labels[:, b_b_i:b_e_i] _mask = tr_tradeable_mask[:, b_b_i:b_e_i] if get_config().FIT_FRI_PREDICTION_ONLY: batch_week_days = tr_week_days[b_b_i:b_e_i] batch_mon_mask = batch_week_days == 5 for i in range(_mask.shape[0]): _mask[i, :] = _mask[i, :] & batch_mon_mask state, loss, predictions = net.fit( state, _input, _labels, _mask.astype(np.float32)) curr_progress = progress.print_progress( curr_progress, b, tr_batch_num) progress.print_progess_end() net.save_weights(get_config().WEIGHTS_PATH, epoch)
def open_train_stat_file(): return open(get_config().TRAIN_STAT_PATH, 'a', newline='')
get_config().HIST_END) preprocess_data(stocks, get_config().DATA_PATH, get_config().HIST_BEG, get_config().HIST_END, get_config().DATA_NPZ_PATH, get_config().DATA_FEATURES) predications = [] # download_data_for_all_stocks() for stock in stocks: print("Parsing %s predictions" % stock) get_config().TICKER = stock get_config().reload() try: predications.append(pd.read_csv(get_config().TRAIN_PRED_PATH)) except: pass pred_df = pd.concat(predications, ignore_index=True) pred_df_length = len(pred_df['date']) pred_df['date'] = pd.to_datetime(pred_df['date'], format='%Y-%m-%d').dt.date # add stock idx column pred_df['stk_idx'] = pd.Series(np.zeros((pred_df_length), dtype=np.int32), index=pred_df.index)
def download_data_for_all_stocks(): def create_folders(): if not os.path.exists(get_config().DATA_FOLDER_PATH): os.makedirs(get_config().DATA_FOLDER_PATH) get_config().TICKER = 'ALL_STOCKS' get_config().reload() create_folders() download_data(stocks, get_config().DATA_PATH, get_config().HIST_BEG, get_config().HIST_END) preprocess_data(stocks, get_config().DATA_PATH, get_config().HIST_BEG, get_config().HIST_END, get_config().DATA_NPZ_PATH, get_config().DATA_FEATURES)
def calc_pl(pos, curr_px, pos_px): return np.sum( pos * (curr_px * (1 - np.sign(pos) * get_config().SLIPPAGE) - pos_px * (1 + np.sign(pos) * get_config().SLIPPAGE)))
def create_folders(): if not os.path.exists(get_config().DATA_FOLDER_PATH): os.makedirs(get_config().DATA_FOLDER_PATH)
import pandas as pd from portfolio.multi_stock_config import get_config def date_to_idx(date): if get_config().TRAIN_BEG <= date <= get_config().TRAIN_END: return (date - get_config().TRAIN_BEG).days return None def idx_to_date(idx): return get_config().TRAIN_BEG + datetime.timedelta(days=idx) days = (get_config().TRAIN_END - get_config().TRAIN_BEG).days data = np.ones((len(stocks), days)) stk_idx = 0 for stock in stocks: get_config().TICKER = stock get_config().reload() df = pd.read_csv(get_config().TRAIN_EQ_PATH) df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d').dt.date idx_from = date_to_idx(get_config().TRAIN_BEG) capital = 1.0 for index, row in df.iterrows(): date = row['date'] idx_to = date_to_idx(date)