def build_model(config_filename, verbose=False): # -------------------------------------------------- # build DL model # -------------------------------------------------- params = get_params(config_filename) optimizer_choice = params['optimizer_choice'] loss_function = params['loss_function'] weights_filename = params['weights_filename'] model_json_filename = params['model_json_filename'] # load model and weights from json and hdf files. json_file = open(model_json_filename, 'r') loaded_model_json = json_file.read() json_file.close() model = model_from_json(loaded_model_json) # load weights into new model model.load_weights(weights_filename) if verbose: print(" ... model successfully loaded from disk") if optimizer_choice == 'RMSprop': optimizer = RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0) elif optimizer_choice == 'Adam': optimizer = Adam(lr=0.0005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) elif optimizer_choice == 'Adagrad': optimizer = Adagrad(lr=0.005, epsilon=1e-08, decay=0.0) elif optimizer_choice == 'Nadam': optimizer = Nadam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004) if verbose: model.summary() model.compile(optimizer=optimizer, loss=loss_function) return model
def get_predictions_input(config_filename, adjClose, datearray): params = get_params(config_filename) first_history_index = params['first_history_index'] num_periods_history = params['num_periods_history'] increments = params['increments'] print(" ... generating examples ...") Xpredict, Ypredict, dates_predict, companies_predict = generateExamples3layerGen(datearray, adjClose, first_history_index, num_periods_history, increments, output_incr='monthly') print(" ... examples generated ...") return Xpredict, Ypredict, dates_predict, companies_predict
def one_model_prediction(imodel, first_history_index, datearray, adjClose, symbols, num_stocks, verbose=False): # -------------------------------------------------- # build DL model # -------------------------------------------------- config_filename = imodel.replace('.hdf','.txt') print("\n ... config_filename = ", config_filename) #print(".", end='') model = build_model(config_filename) # collect meta data for weighting ensemble_symbols params = get_params(config_filename) #num_stocks = params['num_stocks'] num_periods_history = params['num_periods_history'] increments = params['increments'] symbols_predict = symbols Xpredict, Ypredict, dates_predict, companies_predict = generateExamples3layerGen(datearray, adjClose, first_history_index, num_periods_history, increments, output_incr='monthly') dates_predict = np.array(dates_predict) companies_predict = np.array(companies_predict) # -------------------------------------------------- # make predictions monthly for backtesting # - there might be some bias since entire preiod # has data used for training # -------------------------------------------------- try: model.load_weights(imodel) except: pass dates_predict = np.array(dates_predict) companies_predict = np.array(companies_predict) inum_stocks = num_stocks cumu_system = [10000.0] cumu_BH = [10000.0] plotdates = [dates_predict[0]] _forecast_mean = [] _forecast_median = [] _forecast_stdev = [] for i, idate in enumerate(dates_predict[1:]): if idate != dates[-1] and companies_predict[i] < companies_predict[i-1]: # show predictions for (single) last date _Xtrain = Xpredict[dates_predict == idate] _dates = np.array(dates_predict[dates_predict == idate]) _companies = np.array(companies_predict[dates_predict == idate]) #print("forecast shape = ", model.predict(_Xtrain).shape) _forecast = model.predict(_Xtrain)[:, 0] _symbols = np.array(symbols_predict) indices = _forecast.argsort() sorted_forecast = _forecast[indices] sorted_symbols = _symbols[indices] try: _Ytrain = Ypredict[dates_predict == idate] sorted_Ytrain = _Ytrain[indices] BH_gain = sorted_Ytrain.mean() except: BH_gain = 0.0 avg_gain = sorted_Ytrain[-inum_stocks:].mean() _forecast_mean.append(_forecast.mean()) _forecast_median.append(np.median(_forecast)) _forecast_stdev.append(_forecast.std()) if verbose: print(" ... date, system_gain, B&H_gain = ", idate, format(avg_gain, '3.1%'), format(BH_gain, '3.1%'), sorted_symbols[-inum_stocks:]) cumu_system.append(cumu_system[-1] * (1.+avg_gain)) cumu_BH.append(cumu_BH[-1] * (1.+BH_gain)) plotdates.append(idate) print(" ... system, B&H = ", format(cumu_system[-1], '10,.0f'), format(cumu_BH[-1], '10,.0f')) return cumu_system, cumu_BH, sorted_symbols, plotdates
def ensemble_prediction(models_list, idate, datearray, adjClose, num_stocks, sort_mode='sharpe'): #-------------------------------------------------------------- # loop through best models and pick companies from ensemble prediction #-------------------------------------------------------------- ensemble_symbols = [] ensemble_Ytrain = [] ensemble_sharpe = [] ensemble_recent_sharpe = [] ensemble_equal = [] ensemble_rank = [] for iii,imodel in enumerate(models_list): # -------------------------------------------------- # build DL model # -------------------------------------------------- config_filename = os.path.join(models_folder, imodel).replace('.hdf','.txt') #print(" ... config_filename = ", config_filename) print(".", end='') model = build_model(config_filename, verbose=False) # collect meta data for weighting ensemble_symbols params = get_params(config_filename) #num_stocks = params['num_stocks'] num_periods_history = params['num_periods_history'] increments = params['increments'] symbols_predict = symbols Xpredict, Ypredict, dates_predict, companies_predict = generateExamples3layerForDate(idate, datearray, adjClose, num_periods_history, increments, output_incr='monthly', verbose=False) dates_predict = np.array(dates_predict) companies_predict = np.array(companies_predict) # -------------------------------------------------- # make predictions monthly for backtesting # - there might be some bias since entire preiod # has data used for training # -------------------------------------------------- weights_filename = os.path.join(models_folder, imodel) try: model.load_weights(weights_filename) except: pass # show predictions for (single) last date _Xtrain = Xpredict[dates_predict == idate] _Ytrain = Ypredict[dates_predict == idate][:,0] _dates = np.array(dates_predict[dates_predict == idate]) _companies = np.array(companies_predict[dates_predict == idate]) _forecast = model.predict(_Xtrain)[:, 0] _symbols = np.array(symbols_predict)[_companies] del model K.clear_session() forecast_indices = _forecast.argsort()[-num_stocks:] sorted_Xtrain = _Xtrain[forecast_indices,:,:,:] sorted_Ytrain = _Ytrain[forecast_indices] sorted_companies = _companies[forecast_indices] sorted_forecast = _forecast[forecast_indices] sorted_symbols = _symbols[forecast_indices] ##print("\n ... sorted_symbols = ",sorted_symbols[-num_stocks:]) # ensemble_sharpe_weights = np.ones(np.array(sorted_symbols[-num_stocks:]).shape, 'float') * params['_sharpe_ratio_system'] # ensemble_recent_sharpe_weights = np.ones_like(ensemble_sharpe_weights) * params['_sharpe_ratio_recent_system'] ensemble_sharpe_weights = np.ones(sorted_companies.shape, 'float') ensemble_recent_sharpe_weights = np.ones_like(ensemble_sharpe_weights) #print("sorted_Xtrain.shape = ",sorted_Xtrain.shape, " sorted_companies.shape = ", sorted_companies.shape) for icompany in range(sorted_companies.shape[0]): #print("sorted_Xtrain[icompany,:,2,0].shape, sharpe = ",sorted_Xtrain[icompany,:,2,0].shape,allstats((sorted_Xtrain[icompany,:,0,0]+1.).cumprod()).sharpe(periods_per_year=252./increments[2])) if sort_mode == 'sharpe': ensemble_sharpe_weights[icompany] = allstats((sorted_Xtrain[icompany,:,-1,0]+1.).cumprod()).sharpe(periods_per_year=252./increments[-1]) ensemble_recent_sharpe_weights[icompany] = allstats((sorted_Xtrain[icompany,:,int(sorted_Xtrain.shape[2]/2),0]+1.).cumprod()).sharpe(periods_per_year=252./increments[0]) elif sort_mode == 'sharpe_plus_sortino': ensemble_sharpe_weights[icompany] = allstats((sorted_Xtrain[icompany,:,-1,0]+1.).cumprod()).sharpe(periods_per_year=252./increments[-1]) + \ allstats((sorted_Xtrain[icompany,:,-1,0]+1.).cumprod()).sortino() ensemble_recent_sharpe_weights[icompany] = allstats((sorted_Xtrain[icompany,:,int(sorted_Xtrain.shape[2]/2),0]+1.).cumprod()).sharpe(periods_per_year=252./increments[0]) + \ allstats((sorted_Xtrain[icompany,:,int(sorted_Xtrain.shape[2]/2),0]+1.).cumprod()).sortino() elif sort_mode == 'sortino': ensemble_sharpe_weights[icompany] = allstats((sorted_Xtrain[icompany,:,-1,0]+1.).cumprod()).sortino() ensemble_recent_sharpe_weights[icompany] = allstats((sorted_Xtrain[icompany,:,int(sorted_Xtrain.shape[2]/2),0]+1.).cumprod()).sortino() ensemble_equal_weights = np.ones_like(ensemble_sharpe_weights) ensemble_rank_weights = np.arange(np.array(sorted_symbols[-num_stocks:]).shape[0])[::-1] ensemble_symbols.append(sorted_symbols[-num_stocks:]) ensemble_Ytrain.append(sorted_Ytrain[-num_stocks:]) ensemble_sharpe.append(ensemble_sharpe_weights) ensemble_recent_sharpe.append(ensemble_recent_sharpe_weights) ensemble_equal.append(ensemble_recent_sharpe_weights) ensemble_rank.append(ensemble_rank_weights) #print(imodel,sorted_symbols[-num_stocks:]) #print(" ... ",ensemble_sharpe_weights) # sift through ensemble symbols ensemble_symbols = np.array(ensemble_symbols).flatten() ensemble_Ytrain = np.array(ensemble_Ytrain).flatten() ensemble_sharpe = np.array(ensemble_sharpe).flatten() ensemble_recent_sharpe = np.array(ensemble_recent_sharpe).flatten() ensemble_equal = np.array(ensemble_equal).flatten() ensemble_rank = np.array(ensemble_rank).flatten() #unique_symbols = list(set(np.array(ensemble_symbols))) unique_symbols = list(set(list(np.array(ensemble_symbols).flatten()))) unique_ensemble_symbols = [] unique_ensemble_Ytrain = [] unique_ensemble_sharpe = [] unique_ensemble_recent_sharpe = [] unique_ensemble_equal = [] unique_ensemble_rank = [] for k, ksymbol in enumerate(unique_symbols): unique_ensemble_symbols.append(np.array(ensemble_symbols)[ensemble_symbols == ksymbol][0]) unique_ensemble_Ytrain.append(ensemble_Ytrain[ensemble_symbols == ksymbol].mean()) unique_ensemble_sharpe.append(ensemble_sharpe[ensemble_symbols == ksymbol].sum()) unique_ensemble_recent_sharpe.append(ensemble_recent_sharpe[ensemble_symbols == ksymbol].sum()) unique_ensemble_equal.append(ensemble_equal[ensemble_symbols == ksymbol].sum()) unique_ensemble_rank.append(ensemble_rank[ensemble_symbols == ksymbol].sum()) #print("unique_ensemble_sharpe = ", np.sort(unique_ensemble_sharpe)[-num_stocks:]) indices_recent = np.argsort(unique_ensemble_recent_sharpe)[-num_stocks:] #print("indices = ",indices) sorted_recent_sharpe = np.array(unique_ensemble_recent_sharpe)[indices_recent] sorted_recent_sharpe = np.array(sorted_recent_sharpe) unique_ensemble_sharpe = np.array(unique_ensemble_sharpe) + np.array(unique_ensemble_recent_sharpe) indices = np.argsort(unique_ensemble_sharpe)[-num_stocks:] #print("indices = ",indices) sorted_sharpe = np.array(unique_ensemble_sharpe)[indices] sorted_sharpe = np.array(sorted_sharpe) #print(" ... sorted_sharpe[sorted_sharpe < 0.].shape = ", sorted_sharpe[sorted_sharpe < 0.].shape, sorted_recent_sharpe[sorted_recent_sharpe < 0.].shape) sorted_symbols = np.array(unique_ensemble_symbols)[indices] sorted_Ytrain = np.array(unique_ensemble_Ytrain)[indices] #company_indices = [list(unique_ensemble_symbols).index(isymbol) for isymbol in sorted_symbols] ##print("sorted_symbols = ", sorted_symbols) ##print("sorted_Ytrain = ", sorted_Ytrain) #print("_symbols[company_indices] = ", _symbols[company_indices][-num_stocks:]) #print("_Ytrain[company_indices] = ", _Ytrain[company_indices][-num_stocks:]) try: _Ytrain = _Ytrain[dates_predict == idate] sorted_Ytrain = sorted_Ytrain[-num_stocks:] BH_gain = _Ytrain.mean() except: BH_gain = 0.0 avg_gain = sorted_Ytrain.mean() return avg_gain, BH_gain, sorted_symbols