예제 #1
0
class TraderTrainer:
    def __init__(self,
                 config,
                 stockList,
                 featureList,
                 start_date,
                 end_date,
                 fake_data=False,
                 restore_dir=None,
                 save_path=None,
                 device="cpu",
                 agent=None):
        """
        :param config: config dictionary
        :param fake_data: if True will use data generated randomly
        :param restore_dir: path to the model trained before
        :param save_path: path to save the model
        :param device: the device used to train the network
        :param agent: the nnagent object. If this is provides, the trainer will not
        create a new agent by itself. Therefore the restore_dir will not affect anything.
        """
        self.config = config
        self.train_config = config["training"]
        self.input_config = config["input"]
        self.save_path = save_path
        self.best_metric = 0
        np.random.seed(config["random_seed"])

        self.__window_size = self.input_config["window_size"]
        self.__batch_size = self.train_config["batch_size"]
        self.__snap_shot = self.train_config["snap_shot"]
        config["input"]["fake_data"] = fake_data

        self.stockList = stockList
        self.featureList = featureList
        self.start_date = start_date
        self.end_date = end_date
        self.fake_data = fake_data

        self._matrix = DataMatrices.create_from_config(config, stockList,
                                                       featureList, start_date,
                                                       end_date)  #数据
        self.test_set = self._matrix.get_test_set(
        )  #测试集 dict:{'X', 'y', 'last_w', 'setw'}
        # X: (260, 4, 3, 31), y: (260, 4, 3), last_w: (260, 3)
        # X: (test_length, feature_num, stock_num, time_windows)
        if not config["training"]["fast_train"]:
            self.training_set = self._matrix.get_training_set()  #训练集
        self.upperbound_validation = 1
        self.upperbound_test = 1
        tf.set_random_seed(self.config["random_seed"])
        self.device = device
        if agent:
            self._agent = agent
        else:
            if device == "cpu":
                os.environ["CUDA_VISIBLE_DEVICES"] = ""
                with tf.device("/cpu:0"):
                    self._agent = NNAgent(config, stockList, featureList,
                                          restore_dir, device)
            else:
                self._agent = NNAgent(config, stockList, featureList,
                                      restore_dir, device)

    def _evaluate(self, set_name, *tensors):
        if set_name == "test":
            feed = self.test_set
        elif set_name == "training":
            feed = self.training_set
        else:
            raise ValueError()
        result = self._agent.evaluate_tensors(feed["X"],
                                              feed["y"],
                                              last_w=feed["last_w"],
                                              setw=feed["setw"],
                                              tensors=tensors)
        # len: 6, result[5]: (260,4)
        # 计算的是tensors的值,有几个tensor就相应的有几个输出
        return result

    @staticmethod
    def calculate_upperbound(y):
        array = np.maximum.reduce(y[:, 0, :], 1)  #第1维的最大值
        total = 1.0
        for i in array:
            total = total * i
        return total

    def log_between_steps(self, step):
        fast_train = self.train_config["fast_train"]
        tflearn.is_training(False, self._agent.session)

        summary, v_pv, v_log_mean, v_loss, log_mean_free, weights= \
            self._evaluate("test", self.summary,
                           self._agent.portfolio_value,
                           self._agent.log_mean,
                           self._agent.loss,
                           self._agent.log_mean_free,
                           self._agent.portfolio_weights)
        self.test_writer.add_summary(summary, step)
        if not fast_train:
            summary, loss_value = self._evaluate("training", self.summary,
                                                 self._agent.loss)
            self.train_writer.add_summary(summary, step)

        # print 'ouput is %s' % out
        logging.info('=' * 30)
        logging.info('step %d' % step)
        logging.info('-' * 30)
        if not fast_train:
            logging.info('training loss is %s\n' % loss_value)
        logging.info('the portfolio value on test set is %s\nlog_mean is %s\n'
                     'loss_value is %3f\nlog mean without commission fee is %3f\n' % \
                     (v_pv, v_log_mean, v_loss, log_mean_free))
        logging.info('=' * 30 + "\n")

        if not self.__snap_shot:
            self._agent.save_model(self.save_path)
        elif v_pv > self.best_metric:
            self.best_metric = v_pv
            logging.info("get better model at %s steps,"
                         " whose test portfolio value is %s" % (step, v_pv))
            if self.save_path:
                self._agent.save_model(self.save_path)
        self.check_abnormal(v_pv, weights)

    def check_abnormal(self, portfolio_value, weigths):
        if portfolio_value == 1.0:
            logging.info("average portfolio weights {}".format(
                weigths.mean(axis=0)))

    def next_batch(self):
        batch = self._matrix.next_batch()
        batch_input = batch["X"]
        batch_y = batch["y"]
        batch_last_w = batch["last_w"]
        batch_w = batch["setw"]
        return batch_input, batch_y, batch_last_w, batch_w

    def __init_tensor_board(self, log_file_dir):
        tf.summary.scalar('benefit', self._agent.portfolio_value)
        tf.summary.scalar('log_mean', self._agent.log_mean)
        tf.summary.scalar('loss', self._agent.loss)
        tf.summary.scalar("log_mean_free", self._agent.log_mean_free)
        for layer_key in self._agent.layers_dict:
            tf.summary.histogram(layer_key, self._agent.layers_dict[layer_key])
        for var in tf.trainable_variables():
            tf.summary.histogram(var.name, var)
        grads = tf.gradients(self._agent.loss, tf.trainable_variables())
        for grad in grads:
            tf.summary.histogram(grad.name + '/gradient', grad)
        self.summary = tf.summary.merge_all()
        location = log_file_dir
        self.network_writer = tf.summary.FileWriter(location + '/network',
                                                    self._agent.session.graph)
        self.test_writer = tf.summary.FileWriter(location + '/test')  #保存的是训练结果
        self.train_writer = tf.summary.FileWriter(location + '/train')

    def __print_upperbound(self):
        upperbound_test = self.calculate_upperbound(self.test_set["y"])  #都乘起来
        logging.info("upper bound in test is %s" % upperbound_test)

    def train_net(self, log_file_dir="./tensorboard", index="0"):
        """
        :param log_file_dir: logging of the training process
        :param index: sub-folder name under train_package
        :return: the result named tuple
        """
        self.__print_upperbound()
        if log_file_dir:
            if self.device == "cpu":
                with tf.device("/cpu:0"):
                    self.__init_tensor_board(log_file_dir)  #初始化summary
            else:
                self.__init_tensor_board(log_file_dir)
        starttime = time.time()

        total_data_time = 0
        total_training_time = 0
        for i in range(self.train_config["steps"]):  #训练步数
            step_start = time.time()
            x, y, last_w, setw = self.next_batch(
            )  #获取batch x:(109, 3, 11, 31) y:(109, 3, 11) last_w: (109,11) setw:function
            if 'noise' in self.train_config:
                if self.train_config['noise']:
                    noise = np.random.normal(0, 0.002, size=y.shape)
                    y = y + noise
            finish_data = time.time()
            total_data_time += (finish_data - step_start)
            #训练智能体
            self._agent.train(x, y, last_w=last_w, setw=setw)
            total_training_time += time.time() - finish_data
            if i % 50 == 0 and log_file_dir:
                logging.info("average time for data accessing is %s" %
                             (total_data_time / 50))
                logging.info("average time for training is %s" %
                             (total_training_time / 50))
                total_training_time = 0
                total_data_time = 0
                #每隔固定的步数对测试集进行评估
                self.log_between_steps(i)

        if self.save_path:
            self._agent.recycle()  #
            best_agent = NNAgent(self.config,
                                 self.stockList,
                                 self.featureList,
                                 restore_dir=self.save_path)
            self._agent = best_agent

        pv, log_mean = self._evaluate("test", self._agent.portfolio_value,
                                      self._agent.log_mean)  #最后在评估一下
        #logging.warning('the portfolio value train No.%s is %s log_mean is %s,'
        # ' the training time is %d seconds' % (index, pv, log_mean, time.time() - starttime))

        return self.__log_result_csv(index,
                                     time.time() - starttime, self.save_path)

    def __log_result_csv(self, index, time, path):
        from pgportfolio.trade import backtest
        dataframe = None
        csv_dir = './train_package/train_summary.csv'
        tflearn.is_training(False, self._agent.session)
        v_pv, v_log_mean, benefit_array, v_log_mean_free = self._evaluate("test", self._agent.portfolio_value,\
                                                           self._agent.log_mean, self._agent.pv_vector,\
                                                           self._agent.log_mean_free)

        backtest = backtest.BackTest(self.config.copy(),
                                     self.stockList,
                                     self.featureList,
                                     self.start_date,
                                     self.end_date,
                                     self.fake_data,
                                     net_dir=None,
                                     result_path=path,
                                     agent=self._agent)  # 回测

        backtest.start_trading()
        result = Result(
            test_pv=[v_pv],
            test_log_mean=[v_log_mean],
            test_log_mean_free=[v_log_mean_free],
            test_history=[''.join(str(e) + ', ' for e in benefit_array)],
            config=[json.dumps(self.config)],
            net_dir=[index],
            backtest_test_pv=[backtest.test_pv],
            backtest_test_history=[
                ''.join(str(e) + ', ' for e in backtest.test_pc_vector)
            ],
            backtest_test_log_mean=[np.mean(np.log(backtest.test_pc_vector))],
            training_time=int(time))
        new_data_frame = pd.DataFrame(result._asdict()).set_index("net_dir")
        if os.path.isfile(csv_dir):
            dataframe = pd.read_csv(csv_dir).set_index("net_dir")
            dataframe = dataframe.append(new_data_frame)
        else:
            dataframe = new_data_frame
        if int(index) > 0:
            dataframe.to_csv(csv_dir)
        return result
예제 #2
0
파일: tradertrainer.py 프로젝트: kshre/PPN
class TraderTrainer:
    def __init__(self, config, fake_data=False, restore_dir=None, save_path=None, device="cpu",
                 agent=None):
        """
        :param config: config dictionary
        :param fake_data: if True will use data generated randomly 
        :param restore_dir: path to the model trained before
        :param save_path: path to save the model
        :param device: the device used to train the network
        :param agent: the nnagent object. If this is provides, the trainer will not
        create a new agent by itself. Therefore the restore_dir will not affect anything.
        """
        self.config = config
        self.train_config = config["training"]
        self.input_config = config["input"]
        self.save_path = save_path
        self.best_metric = 0
        np.random.seed(config["random_seed"])

        self.__window_size = self.input_config["window_size"]
        self.__coin_number = self.input_config["coin_number"]
        self.__batch_size = self.train_config["batch_size"]
        self.__snap_shot = self.train_config["snap_shot"]
        config["input"]["fake_data"] = fake_data

        self._matrix = DataMatrices.create_from_config(config)   # obtain global_data
        self.test_set = self._matrix.get_test_set()    # obtain test_data 
        if not config["training"]["fast_train"]:
            self.training_set = self._matrix.get_training_set()     # obtain training_data 
        self.upperbound_validation = 1
        self.upperbound_test = 1
        tf.set_random_seed(self.config["random_seed"])
        self.device = device
        if agent:
            self._agent = agent
        else:
            if device == "cpu":
                os.environ["CUDA_VISIBLE_DEVICES"] = ""
                with tf.device("/cpu:0"):
                    self._agent = NNAgent(config, restore_dir, device)
            else:
                self._agent = NNAgent(config, restore_dir, device)

    def _evaluate(self, set_name, *tensors): 
        if set_name == "test":
            feed = self.test_set
        elif set_name == "training":
            feed = self.training_set
        else:
            raise ValueError()       
        result = self._agent.evaluate_tensors(feed["X"],feed["y"],last_w=feed["last_w"],
                                              setw=feed["setw"], tensors=tensors)
        return result

    @staticmethod
    def calculate_upperbound(y): 
        array = np.maximum.reduce(y[:, 0, :], 1)   
        #print("array",array) 
        total = 1.0
        for i in array:
            total = total * i
        return total

    def log_between_steps(self, step):
        # log the information per step
        fast_train = self.train_config["fast_train"]
        tflearn.is_training(False, self._agent.session)

        summary, v_pv, v_log_mean, v_loss, log_mean_free, weights= \
            self._evaluate("test", self.summary,
                           self._agent.portfolio_value,
                           self._agent.log_mean,
                           self._agent.loss,
                           self._agent.log_mean_free,
                           self._agent.portfolio_weights)  
        self.test_writer.add_summary(summary, step)

        if not fast_train:
            summary, loss_value = self._evaluate("training", self.summary, self._agent.loss)
            self.train_writer.add_summary(summary, step)
            # fast_train is ued to control the output of tensorboard. 

        # print 'ouput is %s' % out
        logging.info('='*30)
        logging.info('step %d' % step)
        logging.info('-'*30)
        if not fast_train:
            logging.info('training loss is %s\n' % loss_value)
        logging.info('the portfolio value on test set is %s\nlog_mean is %s\n'
                     'loss_value is %3f\nlog mean without commission fee is %3f\n' % \
                     (v_pv, v_log_mean, v_loss, log_mean_free))
        logging.info('='*30+"\n")

        if not self.__snap_shot:
            self._agent.save_model(self.save_path)
        elif v_pv > self.best_metric:
            self.best_metric = v_pv
            logging.info("get better model at %s steps,"
                         " whose test portfolio value is %s" % (step, v_pv))
            if self.save_path:
                self._agent.save_model(self.save_path)
        self.check_abnormal(v_pv, weights)

    def check_abnormal(self, portfolio_value, weigths): 
        if portfolio_value == 1.0:
            logging.info("average portfolio weights {}".format(weigths.mean(axis=0)))


    def next_batch(self): 
        batch = self._matrix.next_batch()   # sampling batch based on experience
        batch_input = batch["X"]             
        batch_y = batch["y"]                
        batch_last_w = batch["last_w"]      
        batch_w = batch["setw"]
        return batch_input, batch_y, batch_last_w, batch_w

    def __init_tensor_board(self, log_file_dir):       
        tf.summary.scalar('benefit', self._agent.portfolio_value)
        tf.summary.scalar('log_mean', self._agent.log_mean)
        tf.summary.scalar('loss', self._agent.loss)
        tf.summary.scalar("log_mean_free", self._agent.log_mean_free)
        for layer_key in self._agent.layers_dict:
            tf.summary.histogram(layer_key, self._agent.layers_dict[layer_key])
        for var in tf.trainable_variables():
            tf.summary.histogram(var.name, var)
        #grads = tf.gradients(self._agent.loss, tf.trainable_variables())
        #for grad in grads:
        #    tf.summary.histogram(grad.name + '/gradient', grad)
        self.summary = tf.summary.merge_all()
        location = log_file_dir
        self.network_writer = tf.summary.FileWriter(location + '/network',
                                                    self._agent.session.graph)
        self.test_writer = tf.summary.FileWriter(location + '/test')
        self.train_writer = tf.summary.FileWriter(location + '/train')

    def __print_upperbound(self): 
        upperbound_test = self.calculate_upperbound(self.test_set["y"])
        logging.info("upper bound in test is %s" % upperbound_test)

    def train_net(self, log_file_dir="./tensorboard", index="0"):
        """ 
        :param log_file_dir: logging of the training process
        :param index: sub-folder name under train_package
        :return: the result named tuple
        """
        self.__print_upperbound()
        if log_file_dir:
            if self.device == "cpu":
                with tf.device("/cpu:0"):
                    self.__init_tensor_board(log_file_dir)
            else:
                self.__init_tensor_board(log_file_dir)
        starttime = time.time()

        total_data_time = 0
        total_training_time = 0
        for i in range(self.train_config["steps"]):
            step_start = time.time()
            x, y, last_w, setw = self.next_batch()
            finish_data = time.time()
            total_data_time += (finish_data - step_start)
            self._agent.train(x, y, last_w=last_w, setw=setw)
            total_training_time += time.time() - finish_data
            if i % 1000 == 0 and log_file_dir:
                logging.info("average time for data accessing is %s"%(total_data_time/1000))
                logging.info("average time for training is %s"%(total_training_time/1000))
                total_training_time = 0
                total_data_time = 0
                self.log_between_steps(i)   # log the information every 1000 rounds

        if self.save_path:
            self._agent.recycle()
            best_agent = NNAgent(self.config, restore_dir=self.save_path)
            self._agent = best_agent

        pv, log_mean = self._evaluate("test", self._agent.portfolio_value, self._agent.log_mean) # execute test once
        logging.warning('the portfolio value train No.%s is %s log_mean is %s,'
                        ' the training time is %d seconds' % (index, pv, log_mean, time.time() - starttime))

        return self.__log_result_csv(index, time.time() - starttime)   

    def __log_result_csv(self, index, time): 
        #  execute backtest, and save the result in train_summary.csv 
        from pgportfolio.trade import backtest
        dataframe = None
        csv_dir = './train_package/train_summary.csv'    
        tflearn.is_training(False, self._agent.session)
        v_pv, v_log_mean, benefit_array, v_log_mean_free, v_turn_over =\
            self._evaluate("test",
                           self._agent.portfolio_value,
                           self._agent.log_mean,
                           self._agent.pv_vector,
                           self._agent.log_mean_free,
                           self._agent.turn_over)  

        backtest = backtest.BackTest(self.config.copy(),
                                     net_dir=None,
                                     agent=self._agent)

        backtest.start_trading()
        result = Result(test_pv=[v_pv],
                        test_log_mean=[v_log_mean],
                        test_log_mean_free=[v_log_mean_free],
                        test_history=[''.join(str(e)+', ' for e in benefit_array)],
                        test_turn_over = [v_turn_over],
                        config=[json.dumps(self.config)],
                        net_dir=[index],
                        backtest_test_pv=[backtest.test_pv],
                        backtest_test_history=[''.join(str(e)+', ' for e in backtest.test_pc_vector)],
                        backtest_test_log_mean=[np.mean(np.log(backtest.test_pc_vector))],
                        backtest_turn_over = [backtest.turn_over],
                        training_time=int(time))
        new_data_frame = pd.DataFrame(result._asdict()).set_index("net_dir")
        if os.path.isfile(csv_dir):
            dataframe = pd.read_csv(csv_dir).set_index("net_dir")
            dataframe = dataframe.append(new_data_frame)
        else:
            dataframe = new_data_frame
        if int(index) > 0:
            dataframe.to_csv(csv_dir)
        return result
예제 #3
0
class TraderTrainer:
    def __init__(self, config, fake_data=False, restore_dir=None, save_path=None, device="cpu",
                 agent=None):
        """
        :param config: config dictionary
        :param fake_data: if True will use data generated randomly
        :param restore_dir: path to the model trained before
        :param save_path: path to save the model
        :param device: the device used to train the network
        :param agent: the nnagent object. If this is provides, the trainer will not
        create a new agent by itself. Therefore the restore_dir will not affect anything.
        """
        self.config = config
        self.train_config = config["training"]
        self.input_config = config["input"]
        self.save_path = save_path
        self.best_metric = 0
        np.random.seed(config["random_seed"])

        self.__window_size = self.input_config["window_size"]
        self.__coin_number = self.input_config["coin_number"]
        self.__batch_size = self.train_config["batch_size"]
        self.__snap_shot = self.train_config["snap_shot"]
        config["input"]["fake_data"] = fake_data

        self._matrix = DataMatrices.create_from_config(config)

        self.test_set = self._matrix.get_test_set()
        if not config["training"]["fast_train"]:
            self.training_set = self._matrix.get_training_set()
        self.upperbound_validation = 1
        self.upperbound_test = 1
        tf.set_random_seed(self.config["random_seed"])
        self.device = device
        if agent:
            self._agent = agent
        else:
            if device == "cpu":
                os.environ["CUDA_VISIBLE_DEVICES"] = ""
                with tf.device("/cpu:0"):
                    self._agent = NNAgent(config, restore_dir, device)
            else:
                self._agent = NNAgent(config, restore_dir, device)

    def _evaluate(self, set_name, *tensors):
        if set_name == "test":
            feed = self.test_set
        elif set_name == "training":
            feed = self.training_set
        else:
            raise ValueError()
        result = self._agent.evaluate_tensors(feed["X"],feed["y"],last_w=feed["last_w"],
                                              setw=feed["setw"], tensors=tensors)
        return result

    @staticmethod
    def calculate_upperbound(y):
        array = np.maximum.reduce(y[:, 0, :], 1)
        total = 1.0
        for i in array:
            total = total * i
        return total

    def log_between_steps(self, step):
        fast_train = self.train_config["fast_train"]
        tflearn.is_training(False, self._agent.session)

        summary, v_pv, v_log_mean, v_loss, log_mean_free, weights= \
            self._evaluate("test", self.summary,
                           self._agent.portfolio_value,
                           self._agent.log_mean,
                           self._agent.loss,
                           self._agent.log_mean_free,
                           self._agent.portfolio_weights)
        self.test_writer.add_summary(summary, step)

        if not fast_train:
            summary, loss_value = self._evaluate("training", self.summary, self._agent.loss)
            self.train_writer.add_summary(summary, step)

        # print 'ouput is %s' % out
        logging.info('='*30)
        logging.info('step %d' % step)
        logging.info('-'*30)
        if not fast_train:
            logging.info('training loss is %s\n' % loss_value)
        logging.info('the portfolio value on test set is %s\nlog_mean is %s\n'
                     'loss_value is %3f\nlog mean without commission fee is %3f\n' % \
                     (v_pv, v_log_mean, v_loss, log_mean_free))
        logging.info('='*30+"\n")

        if not self.__snap_shot:
            self._agent.save_model(self.save_path)
        elif v_pv > self.best_metric:
            self.best_metric = v_pv
            logging.info("get better model at %s steps,"
                         " whose test portfolio value is %s" % (step, v_pv))
            if self.save_path:
                self._agent.save_model(self.save_path)
        self.check_abnormal(v_pv, weights)

    def check_abnormal(self, portfolio_value, weigths):
        if portfolio_value == 1.0:
            logging.info("average portfolio weights {}".format(weigths.mean(axis=0)))


    def next_batch(self):
        batch = self._matrix.next_batch()
        batch_input = batch["X"]
        batch_y = batch["y"]
        batch_last_w = batch["last_w"]
        batch_w = batch["setw"]
        return batch_input, batch_y, batch_last_w, batch_w

    def __init_tensor_board(self, log_file_dir):
        tf.summary.scalar('benefit', self._agent.portfolio_value)
        tf.summary.scalar('log_mean', self._agent.log_mean)
        tf.summary.scalar('loss', self._agent.loss)
        tf.summary.scalar("log_mean_free", self._agent.log_mean_free)
        self.summary = tf.summary.merge_all()
        location = log_file_dir
        self.network_writer = tf.summary.FileWriter(location + '/network',
                                                    self._agent.session.graph)
        self.test_writer = tf.summary.FileWriter(location + '/test')
        self.train_writer = tf.summary.FileWriter(location + '/train')

    def __print_upperbound(self):
        upperbound_test = self.calculate_upperbound(self.test_set["y"])
        logging.info("upper bound in test is %s" % upperbound_test)

    def train_net(self, log_file_dir="./tensorboard", index="0"):
        """
        :param log_file_dir: logging of the training process
        :param index: sub-folder name under train_package
        :return: the result named tuple
        """
        self.__print_upperbound()
        if log_file_dir:
            if self.device == "cpu":
                with tf.device("/cpu:0"):
                    self.__init_tensor_board(log_file_dir)
            else:
                self.__init_tensor_board(log_file_dir)
        starttime = time.time()

        total_data_time = 0
        total_training_time = 0
        for i in range(self.train_config["steps"]):
            step_start = time.time()
            x, y, last_w, setw = self.next_batch()
            finish_data = time.time()
            total_data_time += (finish_data - step_start)
            self._agent.train(x, y, last_w=last_w, setw=setw)
            total_training_time += time.time() - finish_data
            if i % 1000 == 0 and log_file_dir:
                logging.info("average time for data accessing is %s"%(total_data_time/1000))
                logging.info("average time for training is %s"%(total_training_time/1000))
                total_training_time = 0
                total_data_time = 0
                self.log_between_steps(i)

        if self.save_path:
            self._agent.recycle()
            best_agent = NNAgent(self.config, restore_dir=self.save_path)
            self._agent = best_agent

        pv, log_mean = self._evaluate("test", self._agent.portfolio_value, self._agent.log_mean)
        logging.warning('the portfolio value train No.%s is %s log_mean is %s,'
                        ' the training time is %d seconds' % (index, pv, log_mean, time.time() - starttime))

        return self.__log_result_csv(index, time.time() - starttime)

    def __log_result_csv(self, index, time):
        from pgportfolio.trade import backtest
        dataframe = None
        csv_dir = './train_package/train_summary.csv'
        tflearn.is_training(False, self._agent.session)
        v_pv, v_log_mean, benefit_array, v_log_mean_free =\
            self._evaluate("test",
                           self._agent.portfolio_value,
                           self._agent.log_mean,
                           self._agent.pv_vector,
                           self._agent.log_mean_free)

        backtest = backtest.BackTest(self.config.copy(),
                                     net_dir=None,
                                     agent=self._agent)

        backtest.start_trading()
        result = Result(test_pv=[v_pv],
                        test_log_mean=[v_log_mean],
                        test_log_mean_free=[v_log_mean_free],
                        test_history=[''.join(str(e)+', ' for e in benefit_array)],
                        config=[json.dumps(self.config)],
                        net_dir=[index],
                        backtest_test_pv=[backtest.test_pv],
                        backtest_test_history=[''.join(str(e)+', ' for e in backtest.test_pc_vector)],
                        backtest_test_log_mean=[np.mean(np.log(backtest.test_pc_vector))],
                        training_time=int(time))
        new_data_frame = pd.DataFrame(result._asdict()).set_index("net_dir")
        if os.path.isfile(csv_dir):
            dataframe = pd.read_csv(csv_dir).set_index("net_dir")
            dataframe = dataframe.append(new_data_frame)
        else:
            dataframe = new_data_frame
        if int(index) > 0:
            dataframe.to_csv(csv_dir)
        return result