def main (args): """SRA-TSTR Main function. Args: - data_name: data to be used in this experiment - num_fold: the number of fold (interations) - test_fraction: the fraction of testing data - prob_flip: the fraction of testing labels to be flipped - metric_name: metrics to evaluate the predictive models """ # Diverse Predictive models models = ['logisticregression','randomforest', 'gaussiannb', 'bernoullinb','svmlin','extra trees','lda', 'adaboost', 'bagging','gbm','nn','xgb'] performance_trtr = np.zeros([len(models), args.num_fold]) performance_tstr = np.zeros([len(models), args.num_fold]) performance_tsts = np.zeros([len(models), args.num_fold]) for i in range(args.num_fold): # Load original data train_x_real, train_y_real, test_x_real, test_y_real = \ data_loader(args.data_name, args.test_fraction) # Generate synthetic data train_x_synth, train_y_synth, test_x_synth, test_y_synth = \ synthetic_data_generator(train_x_real, train_y_real, test_x_real, test_y_real, args.prob_flip) for j in range(len(models)): model_name = models[j] print('num fold: ' + str(i+1) + ', predictive model: ' + model_name) _, test_y_hat_trtr = predictive_model(train_x_real, train_y_real, test_x_real, model_name) _, test_y_hat_tstr = predictive_model(train_x_synth, train_y_synth, test_x_real, model_name) _, test_y_hat_tsts = predictive_model(train_x_synth, train_y_synth, test_x_synth, model_name) performance_trtr[j, i] = performance(test_y_real, test_y_hat_trtr, args.metric_name) performance_tstr[j, i] = performance(test_y_real, test_y_hat_tstr, args.metric_name) performance_tsts[j, i] = performance(test_y_synth, test_y_hat_tsts, args.metric_name) #%% print('TSTR Performance (' + args.metric_name + ')') output_visualization (performance_trtr, performance_tstr, models) result = synthetic_ranking_agreement(np.mean(performance_trtr, 1), np.mean(performance_tsts, 1)) print('SRA Performance: ' + str(result))
def portfolio_test(meanDf): sharp_list = [] ret_list = [] std_list = [] mdd_list = [] compare = pd.DataFrame() for oneleg in tqdm(range(len(meanDf.columns))): portfolioDF = pd.DataFrame() portfolioDF['ret'] = meanDf.iloc[:, oneleg] portfolioDF['nav'] = (portfolioDF['ret'] + 1).cumprod() performance_df = performance(portfolioDF, para) sharp_list.append(np.array(performance_df.iloc[:, 0].T)[0]) ret_list.append(np.array(performance_df.iloc[:, 1].T)[0]) std_list.append(np.array(performance_df.iloc[:, 2].T)[0]) mdd_list.append(np.array(performance_df.iloc[:, 3].T)[0]) compare[str(oneleg)] = portfolioDF['nav'] performanceDf = pd.concat([ pd.Series(sharp_list), pd.Series(ret_list), pd.Series(std_list), pd.Series(mdd_list) ], axis=1, sort=True) performanceDf.columns = ['Sharp', 'RetYearly', 'STD', 'MDD'] compare.index = meanDf.index plt.plot(range(len(compare.iloc[1:, 1])), compare.iloc[1:, :]) plt.title(para.factor) plt.grid(True) plt.legend() plt.savefig(para.factor + '_performance_nav.png') plt.show() return performanceDf, compare
def send_feedback(self, X, feature_names, reward, truth): """ Return outlier labels as part of the feedback loop. Arguments: - X: input data - feature_names - reward - truth: outlier labels """ self.label = truth self._labels.append(self.label) self._labels = flatten(self._labels) scores = performance(self._labels, self._predictions, roll_window=self.roll_window) stats = outlier_stats(self._labels, self._predictions, roll_window=self.roll_window) convert = flatten([scores, stats]) metric = [] for c in convert: # convert from np to native python type to jsonify metric.append(np.asscalar(np.asarray(c))) self.metric = metric return
def send_feedback(self,X,feature_names,reward,truth): """ Return outlier labels as part of the feedback loop. Parameters ---------- X : array of the features sent in the original predict request feature_names : array of feature names. May be None if not available. reward (float): the reward truth : array with correct value (optional) """ _ = super().send_feedback(X,feature_names,reward,truth) # historical reconstruction errors and predictions self._mse.append(self.mse) self._mse = flatten(self._mse) self._predictions.append(self.prediction) self._predictions = flatten(self._predictions) # target labels self.label = truth self._labels.append(self.label) self._labels = flatten(self._labels) # performance metrics scores = performance(self._labels,self._predictions,roll_window=self.roll_window) stats = outlier_stats(self._labels,self._predictions,roll_window=self.roll_window) convert = flatten([scores,stats]) metric = [] for c in convert: # convert from np to native python type to jsonify metric.append(np.asscalar(np.asarray(c))) self.metric = metric return []
def portfolio_test(self): # portfolio_test function is to calculate the index of the porfolio # https://blog.csdn.net/weixin_42294255/article/details/103836548 sharp_list = [] ret_list = [] std_list = [] mdd_list = [] r2var_list = [] cr2var_list = [] compare = pd.DataFrame() for oneleg in tqdm(range(len(self.df.columns))): portfolioDF = pd.DataFrame() portfolioDF['ret'] = self.df.iloc[:, oneleg] portfolioDF['nav'] = (portfolioDF['ret'] + 1).cumprod() performance_df = performance(portfolioDF, para) # performance_df_anl = performance_anl(portfolioDF,para) sharp_list.append(np.array(performance_df.iloc[:, 0].T)[0]) ret_list.append(np.array(performance_df.iloc[:, 1].T)[0]) std_list.append(np.array(performance_df.iloc[:, 2].T)[0]) mdd_list.append(np.array(performance_df.iloc[:, 3].T)[0]) r2var_list.append(np.array(performance_df.iloc[:, 4].T)[0]) cr2var_list.append(np.array(performance_df.iloc[:, 5].T)[0]) compare[str(oneleg)] = portfolioDF['nav'] performanceDf = pd.concat([pd.Series(sharp_list), pd.Series(ret_list), pd.Series(std_list), pd.Series(mdd_list), pd.Series(r2var_list), pd.Series(cr2var_list)], axis=1, sort=True) performanceDf.columns = ['Sharp', 'RetYearly', 'STD', 'MDD', 'R2VaR', 'R2CVaR'] compare.index = self.df.index plt.plot(range(len(compare.iloc[1:, 1])), compare.iloc[1:, :] ) plt.title(para.factor +'_'+para.factor2) # plt.xticks([0, 25, 50, 75, 100, 125], # ['2009/12/31', '2011/01/31', '2013/02/28', '2015/03/31', '2017/04/30', '2020/04/30']) # plt.grid(True) # plt.xlim((0, 125)) plt.xticks([0, 25, 50, 65], ['2014/12/31', '2016/12/30', '2018/12/31', '2020/04/30']) plt.grid(True) plt.xlim((0, 65)) plt.legend() plt.savefig(para.result_path + para.factor +'_' + para.factor2 + '_' + para.weightMethod + '_performance_nav.png') plt.show() return performanceDf, compare
def portfolio_test(self): sharp_list = [] ret_list = [] std_list = [] mdd_list = [] r2var_list = [] cr2var_list = [] anl = [] compare= pd.DataFrame() for oneleg in tqdm(range(len(self.meanDf.columns))): portfolioDF = pd.DataFrame() portfolioDF['ret'] = self.meanDf.iloc[:,oneleg] portfolioDF['nav'] = (portfolioDF['ret']+1).cumprod() performance_df = performance(portfolioDF,para) performance_df_anl = performance_anl(portfolioDF,para) sharp_list.append(np.array(performance_df.iloc[:,0].T)[0]) ret_list.append(np.array(performance_df.iloc[:,1].T)[0]) std_list.append(np.array(performance_df.iloc[:,2].T)[0]) mdd_list.append(np.array(performance_df.iloc[:,3].T)[0]) r2var_list.append(np.array(performance_df.iloc[:,4].T)[0]) cr2var_list.append(np.array(performance_df.iloc[:,5].T)[0]) anl.append(np.array(performance_df_anl.iloc[:,0].T)) compare[str(oneleg)] = portfolioDF['nav'] performanceDf = pd.concat([pd.Series(sharp_list), pd.Series(ret_list), pd.Series(std_list), pd.Series(mdd_list), pd.Series(r2var_list), pd.Series(cr2var_list)], axis = 1, sort = True) performanceDf.columns = ['Sharp', 'RetYearly', 'STD', 'MDD', 'R2VaR', 'R2CVaR'] anlDf = pd.DataFrame(anl) print(anlDf) compare.index = self.meanDf.index plt.plot(range(len(compare.iloc[1:, 1])), compare.iloc[1:, :]) plt.title(para.factor) plt.xticks([0, 25, 50, 75, 100, 125], ['2009/12/31', '2011/01/31', '2013/02/28', '2015/03/31', '2017/04/30', '2020/04/30']) plt.grid(True) plt.xlim((0, 125)) plt.legend() plt.savefig(para.result_path + para.factor +'_'+para.weightMethod+ '_'+para.normalize+'_performance_nav.png') plt.show() return performanceDf,compare
def main(args): """Time-series prediction main function. Args: - train_rate: training data ratio - seq_len: sequence length - task: classification or regression - model_type: rnn, lstm, gru, or attention - h_dim: hidden state dimensions - n_layer: number of layers - batch_size: the number of samples in each mini-batch - epoch: the number of iterations - learning_rate: learning rates - metric_name: mse or mae """ # Load data train_x, train_y, test_x, test_y = data_loader(args.train_rate, args.seq_len) # Model traininig / testing model_parameters = { 'task': args.task, 'model_type': args.model_type, 'h_dim': args.h_dim, 'n_layer': args.n_layer, 'batch_size': args.batch_size, 'epoch': args.epoch, 'learning_rate': args.learning_rate } if args.model_type in ['rnn', 'lstm', 'gru']: general_rnn = GeneralRNN(model_parameters) general_rnn.fit(train_x, train_y) test_y_hat = general_rnn.predict(test_x) elif args.model_type == 'attention': basic_attention = Attention(model_parameters) basic_attention.fit(train_x, train_y) test_y_hat = basic_attention.predict(test_x) # Evaluation result = performance(test_y, test_y_hat, args.metric_name) print('Performance (' + args.metric_name + '): ' + str(result))