def eval_performance(self, model):

        print 'start evaluating...'
        Agent = dqn_agent_nature.dqn_agent(
            gpu_id=self.gpu_id,
            state_dimention=1,
            enable_controller=self.enable_controller)
        Agent.agent_init()
        Agent.DQN.model = model
        Agent.DQN.model_to_gpu()
        Agent.policyFrozen = True

        profit_list = []
        test_profit_list = []
        files = os.listdir(self.target_folder)

        #train term evaluation
        print 'start evaluation train term...'
        for f in files:

            stock_agent = env_stockmarket.Stock_agent(Agent,
                                                      self.action_split_number)
            try:
                traindata, trainprice = self.market.get_trainData(
                    f, self.input_num)
            except:
                continue

            profit_ratio = stock_agent.trading(self.input_num, trainprice,
                                               traindata)
            profit_list.append(profit_ratio)

        train_ave = np.mean(np.array(profit_list))
        train_ave_Q = Agent.get_average_Q()
        train_ave_reward = Agent.get_average_reward()

        #test term evaluation
        print 'start evaluation test term...'
        for f in files:

            stock_agent = env_stockmarket.Stock_agent(Agent,
                                                      self.action_split_number)
            try:
                traindata, trainprice = self.market.get_testData(
                    f, self.input_num)
            except:
                continue

            profit_ratio = stock_agent.trading(self.input_num, trainprice,
                                               traindata)
            test_profit_list.append(profit_ratio)

        test_ave = np.mean(np.array(test_profit_list))

        self.train_ave_profit_list.append(train_ave)
        self.test_ave_profit_list.append(test_ave)
        self.ave_Q_list.append(train_ave_Q)
        self.ave_reward_list.append(train_ave_reward)
        print 'finish evaluation'
        return train_ave, test_ave, train_ave_Q, train_ave_reward
Ejemplo n.º 2
0
files = os.listdir("./nikkei100")

Agent.init_max_Q_list()
Agent.init_reward_list()
profit_list = []

for f in files:
    print f
    if args.online_update == 1:
        #銘柄ごとに初期化
        Agent.DQN.model = copy.deepcopy(org_model)
        Agent.DQN.model_target = copy.deepcopy(org_model)
        Agent.DQN.reset_optimizer()

    stock_agent = env_stockmarket.Stock_agent(Agent, args.action_split_number)

    try:
        testdata, testprice = market.get_testData(f, args.input_num)
        #testdata, testprice = market.get_trainData(f,END_TRAIN_DAY,args.input_num)
    except:
        print 'skip', f
        continue

    profit_ratio, proper, order, stocks, price, Q_list, ave_buyprice_list, reward_list = stock_agent.trading_test(
        args.input_num, testprice, testdata)
    profit_list.append(profit_ratio)

    tools.listToCsv(folder + str(f).replace(".CSV", "") + '.csv', price,
                    proper, order, stocks, ave_buyprice_list, reward_list)
Ejemplo n.º 3
0
    o.write('targetFlag:' + str(targetFlag) + '\n')
    
    
files = os.listdir(args.data_folder)
for epoch in tqdm(range(1,n_epoch + 1)):
    Agent.init_max_Q_list()
    Agent.init_reward_list()


    #ファイルの順をシャッフル
    random.shuffle(files)
    #train_loop
    Agent.policyFrozen = False
    for f in tqdm(files):

        stock_agent = env_stockmarket.Stock_agent(Agent)
        
        try:
            traindata,trainprice = market.get_trainData(f,args.input_num)
        except:
            continue
            
        profit_ratio = stock_agent.trading(args.input_num,trainprice,traindata)

    #model evaluation
    eval_model = Agent.DQN.get_model_copy()
    evaluater.eval_performance(eval_model)
    evaluater.get_epsilon(Agent.epsilon)
    evaluater.save_eval_result()

    if epoch % 1 == 0: