コード例 #1
0
    def executeEpisodes(self, game, nnet, args, iteration):
        """ Executes a number of episodes specified in args """
        self.game = game
        self.nnet = nnet
        self.args = args

        self.folder = self.args.folder
        
        iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

        eps_time = AverageMeter()
        bar = Bar('Self Play', max=self.args.numEps)
        end = time.time()

        for eps in range(self.args.numEps):
            #print("episode:", eps+1, " of ", self.args.numEps)
            self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
            self.mcts.debug.folder = os.path.join("Debug", str(iteration)+"-"+str(eps))
            iterationTrainExamples += self.executeEpisode(eps)
            # print MCTS stats after we end up with MCTS instance
            self.mcts.print_stats()

            # bookkeeping + plot progress
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
        bar.finish()
        
        return iterationTrainExamples
コード例 #2
0
ファイル: Arena.py プロジェクト: wh1992v/R2RRMopionSolitaire
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        scores = []
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            scores.append(gameResult)
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
        bar.finish()

        return scores
コード例 #3
0
ファイル: Coach.py プロジェクト: alexbers/alpha-zero-general
    def gen_samples(self, iteration, proc_num):
        print('------ITER ' + str(iteration) + '------')

        iterationTrainExamples = deque([], maxlen=self.args["maxlenOfQueue"])
        eps_time = AverageMeter()
        bar = Bar('Self Play',
                  max=self.args["numEps"] // self.args["genFilesPerIteration"])
        end = time.time()

        for eps in range(self.args["numEps"] //
                         self.args["genFilesPerIteration"]):
            self.mcts = MCTS(self.game, self.nnet,
                             self.args)  # reset search tree

            iterationTrainExamples += self.executeEpisode()

            # bookkeeping + plot progress
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps + 1,
                maxeps=self.args["numEps"] //
                self.args["genFilesPerIteration"],
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()
        bar.finish()

        self.saveTrainExamples(iteration - 1, proc_num, iterationTrainExamples)
コード例 #4
0
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num/2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==1:
                oneWon+=1
            elif gameResult==-1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            if(self.displaybar):
                bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=maxeps, et=eps_time.avg,
                                                                                                        total=bar.elapsed_td, eta=bar.eta_td)
                bar.next()

        self.player1, self.player2 = self.player2, self.player1
        
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==-1:
                oneWon+=1                
            elif gameResult==1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            if(self.displaybar):
                bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=num, et=eps_time.avg,
                                                                                                        total=bar.elapsed_td, eta=bar.eta_td)
                bar.next()
            
        bar.finish()

        return oneWon, twoWon, draws
コード例 #5
0
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num/2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==1:
                oneWon+=1
            elif gameResult==-1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()

        self.player1, self.player2 = self.player2, self.player1
        
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==-1:
                oneWon+=1                
            elif gameResult==1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=num, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
            
        bar.finish()

        return oneWon, twoWon, draws
コード例 #6
0
ファイル: Arena.py プロジェクト: davidschulte/alpha-thesis
    def playGames(self, num):
        """
        plays a number of games
        :param num: number of games, has to be divisible by 6 for fair games
        :return: the summed scores of each agent
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        max_scores = num * 4

        num = int(num / 6)
        # oneWon = 0
        # twoWon = 0
        # draws = 0
        scores = [0, 0]
        for lonely_player in [1, 2]:
            for lonely_turn in range(3):
                for _ in range(num):
                    if scores[
                            0] < self.args.updateThreshold * max_scores and scores[
                                1] < self.args.updateThreshold * max_scores:
                        self.game.reset_logic()
                        print("New Game")
                        print("Lonely Player: " + str(lonely_player))
                        print("Lonely Turn: " + str(lonely_turn + 1))
                        gameResult = self.playGame(lonely_player, lonely_turn)
                        print("RESULTS")
                        print(gameResult)
                        for t in range(3):
                            # if bool(p == lonely_player) != bool(t != lonely_turn):
                            if t == lonely_turn:
                                scores[lonely_player - 1] += gameResult[t]
                            else:
                                scores[2 - lonely_player] += gameResult[t]

                    print("CUMMULATED RESULTS:")
                    print(scores)
                    # bookkeeping + plot progress
                    eps += 1
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=maxeps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()

        bar.finish()

        return scores
コード例 #7
0
def train(trainloader, model, criterion, optimizer, epoch, use_cuda):
    # switch to train mode
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=len(trainloader))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda(async=True)
        inputs, targets = torch.autograd.Variable(
            inputs), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.data[0], inputs.size(0))
        top1.update(prec1[0], inputs.size(0))
        top5.update(prec5[0], inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format(
            batch=batch_idx + 1,
            size=len(trainloader),
            data=data_time.avg,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            top1=top1.avg,
            top5=top5.avg,
        )
        bar.next()
    bar.finish()
    return (losses.avg, top1.avg)
コード例 #8
0
    def self_play(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        # TODO: parallelize this iterations
        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)
            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)
        self.aws_s3_sync()
コード例 #9
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        trainExamples = deque([], maxlen=self.args.maxlenOfQueue)
        for i in range(self.args.numIters):
            # bookkeeping
            print('------ITER ' + str(i+1) + '------')
            eps_time = AverageMeter()
            bar = Bar('Self Play', max=self.args.numEps)
            end = time.time()

            for eps in range(self.args.numEps):
                self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                trainExamples += self.executeEpisode()                

                # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                           total=bar.elapsed_td, eta=bar.eta_td)
                bar.next()
            bar.finish()

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            pnet = self.nnet.__class__(self.game)
            pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            pmcts = MCTS(self.game, pnet, self.args)
            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins) + ' ; DRAWS : ' + str(draws))
            if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet = pnet

            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_' + str(i) + '.pth.tar')
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')                
コード例 #10
0
ファイル: NNet.py プロジェクト: ms8909/alpha-zero-general-1
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))

                # predict and compute gradient and do SGD step
                input_dict = {
                    self.nnet.input_boards: boards,
                    self.nnet.target_pis: pis,
                    self.nnet.target_vs: vs,
                    self.nnet.dropout: args.dropout,
                    self.nnet.isTraining: True
                }

                # measure data loading time
                data_time.update(time.time() - end)

                # record loss
                self.sess.run(self.nnet.train_step, feed_dict=input_dict)
                pi_loss, v_loss = self.sess.run(
                    [self.nnet.loss_pi, self.nnet.loss_v],
                    feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / args.batch_size),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
コード例 #11
0
ファイル: Arena.py プロジェクト: blekinge/alpha-zero-general
    def _play_games(self, bar: Bar, end: float, eps: int,
                    eps_time: AverageMeter, maxeps: int, number_games: int,
                    verbose: bool) -> Tuple[int, float, int, int, int]:
        '''
        Play a set of games

        :param bar: the bar chart to update with wins and losses
        :param end: timestamp
        :param eps: ?
        :param eps_time: ?
        :param maxeps: ?
        :param number_games: number of games to play
        :param verbose: verbose mode
        :returns draws: number of draws
        :returns end: timestamp...
        :returns eps: ?
        :returns oneWon: number of games player 1 won
        :returns twoWon: number of games player 2 won
        '''

        oneWon = 0
        twoWon = 0
        draws = 0

        for _ in range(number_games):
            gameResult = self.play_single_game(verbose=verbose)
            if gameResult == 1:
                oneWon += 1
            elif gameResult == -1:
                twoWon += 1
            else:
                draws += 1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()

            bar.suffix = '({eps}/{maxeps}) ({won}/{loss}/{draw}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps + 1,
                maxeps=maxeps,
                won=oneWon,
                loss=twoWon,
                draw=draws,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()
        return draws, end, eps, oneWon, twoWon
コード例 #12
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters+1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i>1:

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                reward_list = []
                count_list = []
                step_list = []

                for eps in range(self.args.numEps):

                    examples, step_count = self.executeEpisode()

                    self.nnet.train(examples)

                    step_list.append(step_count)
                    reward_list.append(examples[-1][2])
                    count_list.append(eps)

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                               total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()

                plt.scatter(count_list, reward_list, label = 'rewards_training')
                plt.savefig("fig/rewards_"+str(i)+".png")
                plt.close()
                plt.scatter(count_list, step_list, label = 'steps_training')
                plt.savefig("fig/steps_"+str(i)+".png")
                plt.close()
コード例 #13
0
 def _predict_batch_worker(self):
     """
     Thread worker which listens on each pipe in self.pipes for an observation, and then outputs
     the predictions for the policy and value networks when the observations come in. Repeats.
     """
     meter = AverageMeter()
     while True:
         ready = self.wait(self.pipes, timeout=self.args.pipe_timeout)
         if not ready:
             continue
         
         meter.update(len(ready))
         if meter.count % 1000 == 0:
             print("Prediction Worker: count=", meter.count, ", min/avg/max = ", meter.min, meter.avg, meter.max)
         
         data, result_pipes = [], []
         for pipe in ready:
             while pipe.poll():
                 try:
                     (flag,obj) = pipe.recv()
                     if flag==0:
                         print("Stop PIPE")
                         pipe.close()
                         self.pipes.remove(pipe)
                         break
                     data.append(obj)
                     result_pipes.append(pipe)
                 except EOFError:
                     # pipe is closed
                     print("closing pipe...")
                     self.pipes.remove(pipe)
                     break
         if not self.pipes:
             print("There is no PIPE. Prediction worker exits.")
             break
         if not data:
             continue
             #print("There is no DATA. Prediction worker exits.")
             #break
         data = np.asarray(data, dtype=np.float32)
         policy_ary, value_ary = self.nnet.model.predict_on_batch(data)
         for pipe, p, v in zip(result_pipes, policy_ary, value_ary):
             pipe.send((p, float(v)))
     # print stats
     print("Prediction Worker results: count=", meter.count, ", min/avg/max = ", meter.min, meter.avg, meter.max)
コード例 #14
0
ファイル: NNet.py プロジェクト: VVVVVan/mcts_improve
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        optimizer = optim.Adam(self.nnet.parameters())
        print("Train")
        for epoch in range(args.epochs):
            # print('EPOCH ::: ' + str(epoch+1))
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            # bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
                target_pis = torch.FloatTensor(np.array(pis))
                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                # predict
                if args.cuda:
                    boards, target_pis, target_vs = boards.contiguous().cuda(
                    ), target_pis.contiguous().cuda(), target_vs.contiguous(
                    ).cuda()

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output
                out_pi, out_v = self.nnet(boards)
                l_pi = self.loss_pi(target_pis, out_pi)
                l_v = self.loss_v(target_vs, out_v)
                total_loss = l_pi + l_v

                # record loss
                pi_losses.update(l_pi.item(), boards.size(0))
                v_losses.update(l_v.item(), boards.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1
コード例 #15
0
ファイル: Coach.py プロジェクト: zhyack/SCC
    def learn_self_play_iter(self):
        iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)
        eps_time = AverageMeter()
        bar = Bar('Self Play', max=self.selfplaynum)
        end = time.time()

        for eps in range(self.selfplaynum):
            iterationTrainExamples += self.executeEpisode()

            # bookkeeping + plot progress
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.selfplaynum, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
        bar.finish()
        self.trainExampleSelfPlay.extend(iterationTrainExamples)
        shuffle(self.trainExampleSelfPlay)
        print('Got %d replays through self-play.'%(len(self.trainExampleSelfPlay)))
        self.nnet.train(self.trainExampleSelfPlay, transform=True)
コード例 #16
0
def train(trainloader, model, model_index, criterion, optimizer, epoch,
          use_cuda):
    # switch to train mode
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets = torch.autograd.Variable(
            inputs), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.data.item(), inputs.size(0))
        top1.update(prec1.item(), inputs.size(0))
        top5.update(prec5.item(), inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # print('Train for model {}: {}/{}'.format(model_index + 1, batch_idx + 1, len(trainloader)))

    return (losses.avg, top1.avg)
コード例 #17
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        losses = [[], []]
        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))

                # predict and compute gradient and do SGD step
                input_dict = {
                    self.nnet.input_boards: boards,
                    self.nnet.target_pis: pis,
                    self.nnet.target_vs: vs,
                    self.nnet.dropout: args.dropout,
                    self.nnet.isTraining: True
                }

                # measure data loading time
                data_time.update(time.time() - end)

                # record loss
                self.sess.run(self.nnet.train_step, feed_dict=input_dict)
                pi_loss, v_loss = self.sess.run(
                    [self.nnet.loss_pi, self.nnet.loss_v],
                    feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                losses[0].append(pi_loss)
                losses[1].append(v_loss)
                batch_idx += 1
                # measure elapsed time
                batch_time.update(time.time() - end)

                bar.next()
            bar.finish()
        return losses
コード例 #18
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch+1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples)/args.batch_size):
                sample_ids = np.random.randint(len(examples), size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))

                # predict and compute gradient and do SGD step
                input_dict = {self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True}

                # measure data loading time
                data_time.update(time.time() - end)

                # record loss
                self.sess.run(self.nnet.train_step, feed_dict=input_dict)
                pi_loss, v_loss = self.sess.run([self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                            batch=batch_idx,
                            size=int(len(examples)/args.batch_size),
                            data=data_time.avg,
                            bt=batch_time.avg,
                            total=bar.elapsed_td,
                            eta=bar.eta_td,
                            lpi=pi_losses.avg,
                            lv=v_losses.avg,
                            )
                bar.next()
            bar.finish()
コード例 #19
0
ファイル: arena.py プロジェクト: csdankim/CS531_Final_Project
    def playGames(self, num, verbose=False):
        """
        Plays num games.
        Returns:
            solved: number of solved puzzles
            timed_out: number of timed_out puzzles
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num / 2)
        solved = 0
        timed_out = 0

        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult == 1:
                solved += 1
            else:
                timed_out += 1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps + 1,
                maxeps=maxeps,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        bar.finish()

        return solved, timed_out
コード例 #20
0
def test(testloader, model, model_index, criterion, epoch, use_cuda):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    for batch_idx, (inputs, targets) in enumerate(testloader):
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        inputs, targets = torch.autograd.Variable(
            inputs, volatile=True), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.data.item(), inputs.size(0))
        top1.update(prec1.item(), inputs.size(0))
        top5.update(prec5.item(), inputs.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # print('Test for model {}: {}/{}'.format(model_index + 1, batch_idx + 1, len(testloader)))
    return (losses.avg, top1.avg)
コード例 #21
0
    def train(self, examples, transform=False, models=[0]):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            batch_time = AverageMeter()
            losses = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                sample_examples = [examples[i] for i in sample_ids]
                for m in models:
                    loss = self.train_functions[m](sample_examples, transform)
                    if m == 0:
                        pi_losses.update(loss[0], len(sample_examples))
                        v_losses.update(loss[1], len(sample_examples))
                    else:
                        losses.update(loss, len(sample_examples))
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1
                bar.suffix = '({batch}/{size}) Total: {total:} | Loss: {loss:.3f} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / args.batch_size),
                    total=bar.elapsed_td,
                    loss=losses.avg,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
コード例 #22
0
    def train(self, examples):
        """
        form (board, pi, v)
        """
        optimizer = optim.Adam(self.nnet.parameters())

        for epoch in range(self.args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net',
                      max=int(len(examples) / self.args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples) / self.args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=self.args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
                target_pis = torch.FloatTensor(np.array(pis))
                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                # predict
                if self.args.cuda:
                    boards, target_pis, target_vs = boards.contiguous().cuda(
                    ), target_pis.contiguous().cuda(), target_vs.contiguous(
                    ).cuda()
                boards, target_pis, target_vs = Variable(boards), Variable(
                    target_pis), Variable(target_vs)

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output
                out_pi, out_v = self.nnet(boards)
                l_pi = self.loss_pi(target_pis, out_pi)
                l_v = self.loss_v(target_vs, out_v)
                total_loss = l_pi + l_v

                # record loss
                pi_losses.update(l_pi.data[0], boards.size(0))
                v_losses.update(l_v.data[0], boards.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / self.args.batch_size),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
コード例 #23
0
ファイル: Arena_elo.py プロジェクト: FFranKKK122/ROTG
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num/2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==1:
                oneWon+=1
            elif gameResult==-1:
                twoWon+=1
            else:
                draws+=1
            
            #計算elo
            self.elo(gameResult)
            # bookkeeping + plot progress
            eps += 1
            #pgn log
            self.saveToPGN(gameResult,eps)

            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}| Win: {one}:{two}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td , one=oneWon ,two = twoWon)
            bar.next()

        print('half')
        print(oneWon, twoWon, draws)
        print('elo: player1: ',self.R1,'player2',self.R2)

        self.player1, self.player2 = self.player2, self.player1
        
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==-1:
                oneWon+=1                
            elif gameResult==1:
                twoWon+=1
            else:
                draws+=1
            
            #計算elo
            self.elo(gameResult * -1) 
            # bookkeeping + plot progress
            eps += 1
            #pgn log
            self.saveToPGN(gameResult * -1,eps)

            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}| Win: {one}:{two}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td , one=oneWon ,two = twoWon)
            bar.next()
            
        bar.finish()
        print('elo: player1: ',self.R1,'player2',self.R2)

        file_name = self.player1_name + '_vs_' + self.player2_name + '.pgn'
        self.saveToScript(file_name)

        return oneWon, twoWon, draws
コード例 #24
0
ファイル: Coach.py プロジェクト: xphoniex/alphazero-quoridor
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)
                trainStats = [0, 0, 0]
                for _, _, res in iterationTrainExamples:
                    trainStats[res] += 1
                print trainStats

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins > 0 and float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
コード例 #25
0
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num / 2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult == 1:
                oneWon += 1
            elif gameResult == -1:
                twoWon += 1
            else:
                draws += 1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps + 1,
                maxeps=maxeps,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        self.player1, self.player2 = self.player2, self.player1
        black_start = (oneWon, twoWon, draws)
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult == -1:
                oneWon += 1
            elif gameResult == 1:
                twoWon += 1
            else:
                draws += 1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps + 1,
                maxeps=num,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        white_start = (oneWon - black_start[0], twoWon - black_start[1],
                       draws - black_start[2])
        print('')
        print(
            'Neural network as Black - Wins of (NN Won,Opponent Won,Draw) :' +
            str(black_start))
        print(
            'Neural network as White - Wins of (NN Won,Opponent Won,Draw) :' +
            str(white_start))
        bar.finish()

        return oneWon, twoWon, draws
コード例 #26
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        begining = 1
        if self.args.load_model == True:

            self.loadTrainExamples()
            file = open(self.args.trainExampleCheckpoint + "loopinformation",
                        "r+")
            lines = file.readlines()
            begining = lines[0]
            file.close()

        for i in range(int(begining), self.args.numIters + 1):

            fileLoopInformation = open(
                self.args.trainExampleCheckpoint + "loopinformation", "w+")
            fileLoopInformation.write(str(i))
            fileLoopInformation.close()

            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

            eps_time = AverageMeter()
            bar = Bar('Self Play', max=self.args.numEps)
            end = time.time()

            for eps in range(self.args.numEps):
                iterationTrainExamples += self.executeEpisode()

                # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                    eps=eps + 1,
                    maxeps=self.args.numEps,
                    et=eps_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td)
                bar.next()
            bar.finish()

            # save the iteration examples to the history
            self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            filename = "AlphaZerocurent" + str(i) + "temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + \
                       ":dim" + str(self.game.n) + ".pth.tar"

            self.nnet.train(trainExamples)

            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename=filename)

            self.mcts.clear()
            del self.mcts
            self.mcts = MCTS(self.game, self.nnet, self.args,
                             mcts=True)  # reset search tree
コード例 #27
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        optimizer = optim.Adam(self.nnet.parameters())

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch+1))
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples)/args.batch_size):
                sample_ids = np.random.randint(len(examples), size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
                target_pis = torch.FloatTensor(np.array(pis))
                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                # predict
                if args.cuda:
                    boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda()
                boards, target_pis, target_vs = Variable(boards), Variable(target_pis), Variable(target_vs)

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output
                out_pi, out_v = self.nnet(boards)
                l_pi = self.loss_pi(target_pis, out_pi)

                l_v = self.loss_v(target_vs, out_v)
                total_loss = l_pi + l_v

                # record loss
                pi_losses.update(l_pi.data[0], boards.size(0))
                v_losses.update(l_v.data[0], boards.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                            batch=batch_idx,
                            size=int(len(examples)/args.batch_size),
                            data=data_time.avg,
                            bt=batch_time.avg,
                            total=bar.elapsed_td,
                            eta=bar.eta_td,
                            lpi=pi_losses.avg,
                            lv=v_losses.avg,
                            )
                bar.next()
            bar.finish()
コード例 #28
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters+1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            print(str(self.game.innerN) + "x" + str(self.game.innerM))
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()
    
                for eps in range(self.args.numEps):
                    # self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                    self.mcts = MCTS(self.nnet, self.args)   # reset search tree
                    iterationTrainExamples += self.executeEpisode()

    
                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                               total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history 
                self.trainExamplesHistory.append(iterationTrainExamples)
                
            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
            self.saveTrainExamples(i-1)
            
            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            tempfile =  'temp.pth.tar'
            bestfile =  'best.pth.tar'

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=tempfile)
            self.nnet.train(trainExamples)

            if self.arenaEnabled:
                self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)

                pmcts = MCTS(self.pnet, self.args)
                nmcts = MCTS(self.nnet, self.args)

                print('PITTING AGAINST PREVIOUS VERSION')
                # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                #               lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
                arena = Arena(lambda x, y: pmcts.getActionProb(x, y, temp=0),
                           lambda x, y: nmcts.getActionProb(x, y, temp=0), self.game)
                pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
                if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)
                else:
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=bestfile)
コード例 #29
0
ファイル: pretrain_coupled_vae.py プロジェクト: vglsd/thesis
def main():
    # global args
    args = parser.parse_args()

    # <editor-fold desc="Initialization">
    if args.comment == "test":
        print("WARNING: name is test!!!\n\n")

    # now = datetime.datetime.now()
    # current_date = now.strftime("%m-%d-%H-%M")

    assert args.text_criterion in ("MSE", "Cosine", "Hinge",
                                   "NLLLoss"), 'Invalid Loss Function'
    assert args.cm_criterion in ("MSE", "Cosine",
                                 "Hinge"), 'Invalid Loss Function'

    assert args.common_emb_ratio <= 1.0 and args.common_emb_ratio >= 0

    mask = int(args.common_emb_ratio * args.hidden_size)

    cuda = args.cuda
    if cuda == 'true':
        cuda = True
    else:
        cuda = False

    if args.load_model == "NONE":
        keep_loading = False
        # model_path = args.model_path + current_date + "/"
        model_path = args.model_path + args.comment + "/"
    else:
        keep_loading = True
        model_path = args.model_path + args.load_model + "/"

    result_path = args.result_path
    if result_path == "NONE":
        result_path = model_path + "results/"

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    #</editor-fold>

    # <editor-fold desc="Image Preprocessing">

    # Image preprocessing //ATTENTION
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    #</editor-fold>

    # <editor-fold desc="Creating Embeddings">

    # Load vocabulary wrapper.
    print("Loading Vocabulary...")
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Load Embeddings
    emb_size = args.word_embedding_size
    emb_path = args.embedding_path
    if args.embedding_path[-1] == '/':
        emb_path += 'glove.6B.' + str(emb_size) + 'd.txt'

    print("Loading Embeddings...")
    emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size)

    glove_emb = nn.Embedding(emb.size(0), emb.size(1))

    # Freeze weighs
    if args.fixed_embeddings == "true":
        glove_emb.weight.requires_grad = False

    # </editor-fold>

    # <editor-fold desc="Data-Loaders">

    # Build data loader
    print("Building Data Loader For Test Set...")
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    print("Building Data Loader For Validation Set...")
    val_loader = get_loader(args.valid_dir,
                            args.valid_caption_path,
                            vocab,
                            transform,
                            args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    # </editor-fold>

    # <editor-fold desc="Network Initialization">

    print("Setting up the Networks...")
    coupled_vae = CoupledVAE(glove_emb,
                             len(vocab),
                             hidden_size=args.hidden_size,
                             latent_size=args.latent_size,
                             batch_size=args.batch_size)

    if cuda:
        coupled_vae = coupled_vae.cuda()

    # </editor-fold>

    # </editor-fold>

    # <editor-fold desc="Optimizers">
    print("Setting up the Optimizers...")

    vae_optim = optim.Adam(coupled_vae.parameters(),
                           lr=args.learning_rate,
                           betas=(0.5, 0.999),
                           weight_decay=0.00001)

    # </editor-fold desc="Optimizers">

    train_swapped = False  # Reverse 2

    step = 0

    with open(os.path.join(result_path, "losses.csv"), "w") as text_file:
        text_file.write("Epoch, Img, Txt, CM\n")

    for epoch in range(args.num_epochs):

        # <editor-fold desc = "Epoch Initialization"?

        # TRAINING TIME
        print('EPOCH ::: TRAINING ::: ' + str(epoch + 1))
        batch_time = AverageMeter()
        txt_losses = AverageMeter()
        img_losses = AverageMeter()
        cm_losses = AverageMeter()
        end = time.time()

        bar = Bar('Training Net', max=len(data_loader))

        if keep_loading:
            suffix = "-" + str(epoch) + "-" + args.load_model + ".pkl"
            try:
                coupled_vae.load_state_dict(
                    torch.load(
                        os.path.join(args.model_path, 'coupled_vae' + suffix)))
            except FileNotFoundError:
                print("Didn't find any models switching to training")
                keep_loading = False

        if not keep_loading:

            # Set training mode
            coupled_vae.train()

            # </editor-fold desc = "Epoch Initialization"?

            train_swapped = not train_swapped
            for i, (images, captions, lengths) in enumerate(data_loader):

                if i == len(data_loader) - 1:
                    break

                images = to_var(images)
                captions = to_var(captions)
                lengths = to_var(
                    torch.LongTensor(lengths))  # print(captions.size())

                # Forward, Backward and Optimize
                vae_optim.zero_grad()


                img_out, img_mu, img_logv, img_z, txt_out, txt_mu, txt_logv, txt_z = \
                                                                 coupled_vae(images, captions, lengths, train_swapped)

                img_rc_loss = img_vae_loss(
                    img_out, images, img_mu,
                    img_logv) / (args.batch_size * args.crop_size**2)

                NLL_loss, KL_loss, KL_weight = seq_vae_loss(
                    txt_out, captions, lengths, txt_mu, txt_logv, "logistic",
                    step, 0.0025, 2500)
                txt_rc_loss = (NLL_loss + KL_weight *
                               KL_loss) / torch.sum(lengths).float()

                txt_losses.update(txt_rc_loss.data[0], args.batch_size)
                img_losses.update(img_rc_loss.data[0], args.batch_size)

                loss = img_rc_loss + txt_rc_loss

                loss.backward()
                vae_optim.step()
                step += 1

                if i % args.image_save_interval == 0:
                    subdir_path = os.path.join(
                        result_path, str(i / args.image_save_interval))

                    if os.path.exists(subdir_path):
                        pass
                    else:
                        os.makedirs(subdir_path)

                    for im_idx in range(3):
                        # im_or = (images[im_idx].cpu().data.numpy().transpose(1,2,0))*255
                        # im = (img_out[im_idx].cpu().data.numpy().transpose(1,2,0))*255
                        im_or = (images[im_idx].cpu().data.numpy().transpose(
                            1, 2, 0) / 2 + .5) * 255
                        im = (img_out[im_idx].cpu().data.numpy().transpose(
                            1, 2, 0) / 2 + .5) * 255
                        # im = img_out[im_idx].cpu().data.numpy().transpose(1,2,0)*255

                        filename_prefix = os.path.join(subdir_path,
                                                       str(im_idx))
                        scipy.misc.imsave(filename_prefix + '_original.A.jpg',
                                          im_or)
                        scipy.misc.imsave(filename_prefix + '.A.jpg', im)

                        txt_or = " ".join([
                            vocab.idx2word[c]
                            for c in captions[im_idx].cpu().data.numpy()
                        ])
                        _, generated = torch.topk(txt_out[im_idx], 1)
                        txt = " ".join([
                            vocab.idx2word[c]
                            for c in generated[:, 0].cpu().data.numpy()
                        ])

                        with open(filename_prefix + "_captions.txt",
                                  "w") as text_file:
                            text_file.write("Epoch %d\n" % epoch)
                            text_file.write("Original: %s\n" % txt_or)
                            text_file.write("Generated: %s" % txt)

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

                # plot progress
                bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_Img: {img_l:.3f}| Loss_Txt: {txt_l:.3f} | Loss_CM: {cm_l:.4f}'.format(
                    batch=i,
                    size=len(data_loader),
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    img_l=img_losses.avg,
                    txt_l=txt_losses.avg,
                    cm_l=cm_losses.avg,
                )
                bar.next()

            # </editor-fold desc = "Logging">

            bar.finish()

            with open(os.path.join(result_path, "losses.csv"),
                      "a") as text_file:
                text_file.write("{}, {}, {}, {}\n".format(
                    epoch, img_losses.avg, txt_losses.avg, cm_losses.avg))

            # <editor-fold desc = "Saving the models"?
            # Save the models
            print('\n')
            print('Saving the models in {}...'.format(model_path))
            torch.save(
                coupled_vae.state_dict(),
                os.path.join(model_path, 'coupled_vae' % (epoch + 1)) + ".pkl")
コード例 #30
0
def train(model, optimizer, epoch, di, args, criterion=nn.NLLLoss()):
    # switch to train mode
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    precis = AverageMeter()
    recall = AverageMeter()
    f1 = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=args.batches_per_epoch)
    batch_idx = 0

    all_preds = np.array([])
    all_targets = np.array([])

    while batch_idx < args.batches_per_epoch:
        seq_batch, gene_batch, target_batch = di.sample_train_batch(
            args.batch_size)
        seq_batch = torch.from_numpy(seq_batch)
        gene_batch = torch.FloatTensor(gene_batch)
        targets = torch.from_numpy(target_batch)

        # measure data loading time
        data_time.update(time.time() - end)

        # predict
        if args.cuda:
            seq_batch, gene_batch, targets = seq_batch.contiguous().cuda(
            ), gene_batch.contiguous().cuda(), targets.cuda(async=True)
        seq_batch, gene_batch, targets = Variable(seq_batch), Variable(
            gene_batch), Variable(targets)

        # compute output
        outputs = model(seq_batch, gene_batch)
        loss = criterion(outputs, targets)

        # concat to all_preds, all_targets
        index = Variable(torch.LongTensor([1]))
        if args.cuda:
            index = index.cuda()
        all_preds = np.concatenate(
            (all_preds,
             torch.index_select(outputs, 1,
                                index=index).view(-1).cpu().data.numpy()))
        all_targets = np.concatenate((all_targets, targets.cpu().data.numpy()))

        # measure accuracy and record loss
        p, r, f = eval(outputs.data, targets.data, args)
        precis.update(p, seq_batch.size(0))
        recall.update(r, seq_batch.size(0))
        f1.update(f, seq_batch.size(0))
        losses.update(loss.item(), seq_batch.size(0))
        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        batch_idx += 1

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | prec: {precis:.3f} | rec: {recall:.3f} | f1: {f1:.3f}'.format(
            batch=batch_idx,
            size=args.batches_per_epoch,
            data=data_time.avg,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            precis=precis.avg,
            recall=recall.avg,
            f1=f1.avg,
        )
        bar.next()
    bar.finish()

    # compute train auprc/auc for direct comparison to test
    train_auprc = sklearn.metrics.average_precision_score(
        all_targets, all_preds)
    train_auc = sklearn.metrics.roc_auc_score(all_targets, all_preds)
    print('train auprc: {auprc: .3f} | train auc: {auc: .3f}'.format(
        auprc=train_auprc,
        auc=train_auc,
    ))

    return (losses.avg, f1.avg)
コード例 #31
0
            if args.encoder_type == 'transformer':
                sent1 = sent1.cuda()
                sent2 = sent2.cuda()
                sent1_posembinput = sent1_posembinput.cuda()
                sent2_posembinput = sent2_posembinput.cuda()
            elif args.encoder_type == 'decomposable':
                sent1 = sent1.cuda()
                sent2 = sent2.cuda()
            if args.encoder_type == 'rnn':
                if len(encoder_init_hidden):
                    encoder_init_hidden = [x.cuda() for x in encoder_init_hidden]
                else:
                    encoder_init_hidden = encoder_init_hidden.cuda()

        # measure data loading time
        data_time.update(time.time() - end)

        # compute output
        if args.encoder_type == 'decomposable':
            softmax_outputs = model(
                sent1=sent1,
                sent2=sent2,
            )
        else:
            softmax_outputs = model(
                encoder_init_hidden=encoder_init_hidden,
                encoder_input=sent1,
                encoder_pos_emb_input=sent1_posembinput,
                encoder_unsort=unsort1,
                decoder_input=sent2,
                decoder_pos_emb_input=sent2_posembinput,
コード例 #32
0
    def playGames(self, num, profile, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        if self.replay:
            self.playGame()
            return None

        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num/2)
        oneWon = 0
        twoWon = 0
        draws = 0
        oneWhiteWon = 0     # number of times the first player won as white
        oneBlackWon = 0     # number of times the first player won as black
        twoWhiteWon = 0     # number of times the second player won as white
        twoBlackWon = 0     # number of times the second player won as black
        if profile:
            prof = cProfile.Profile()
            prof.enable()
        for _ in range(num):
            gameResult = None
            while gameResult is None:
                gameResult = self.playGame(verbose=verbose)
            if gameResult==1:
                oneWon+=1
                oneBlackWon+=1
            elif gameResult==-1:
                twoWon+=1
                twoWhiteWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()

        self.player1, self.player2 = self.player2, self.player1
        
        for _ in range(num):
            gameResult = None
            while gameResult is None:
                gameResult = self.playGame(verbose=verbose)
            if gameResult==-1:
                oneWon+=1
                oneWhiteWon+=1
            elif gameResult==1:
                twoWon+=1
                twoBlackWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
            
        bar.finish()
        if profile:
            prof.disable()
            prof.print_stats(sort=2)

        return oneWon, twoWon, draws, oneWhiteWon, oneBlackWon, twoWhiteWon, twoBlackWon
コード例 #33
0
def evaluate_model(model,
                   args,
                   di,
                   labels_avail=True,
                   type='test',
                   mode='report'):

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    # switch to evaluate mode
    model.eval()

    if type == 'test':
        num_examples, chrms, cell_types = di.num_test_examples, di.test_chrms, di.test_cell_types
    elif type == 'validation':
        num_examples, chrms, cell_types = di.num_validation_examples, di.validation_chrms, di.validation_cell_types
    else:
        raise Exception("type is one of [train, validation, test]")

    end = time.time()
    max_batches = int(
        math.ceil(num_examples /
                  (di.eval_subsample * args.batch_size))) + (len(chrms) *
                                                             len(cell_types))
    bar = Bar('Processing', max=max_batches)
    # + |test_chrms|*|test_cell_types| is to account for subsampling starting from each chromosome in the worst case
    batch_idx = 0
    all_preds = []

    for seq_batch, gene_batch in di.eval_generator(args.batch_size, type):
        data_time.update(time.time() - end)
        seq_batch = torch.from_numpy(seq_batch)
        gene_batch = torch.FloatTensor(gene_batch)
        if args.cuda:
            seq_batch, gene_batch = seq_batch.contiguous().cuda(
            ), gene_batch.contiguous().cuda()
        seq_batch, gene_batch = Variable(seq_batch, volatile=True), Variable(
            gene_batch, volatile=True)

        # compute output
        outputs = model(seq_batch, gene_batch)
        index = Variable(torch.LongTensor([1]))
        if args.cuda:
            index = index.cuda()
        all_preds.append(
            torch.index_select(outputs, 1,
                               index=index).view(-1).cpu().data.numpy())

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        batch_idx += 1

        # plot progress
        bar.suffix = '({batch}/{size}) | Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}'.format(
            batch=batch_idx,
            size=max_batches,
            data=data_time.avg,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
        )
        bar.next()
    bar.finish()

    all_preds = np.concatenate(all_preds)

    if mode == 'save_preds':
        ctype_chr_pred_dict, *_ = di.populate_ctype_chr_pred_dict(
            cell_types, chrms, all_preds, ret_labels=labels_avail)

        # assumes outputs are log-softmaxed, taking exponents
        for ctype in ctype_chr_pred_dict:
            for chrm in ctype_chr_pred_dict[ctype]:
                ctype_chr_pred_dict[ctype][chrm]['preds'] = np.exp(
                    ctype_chr_pred_dict[ctype][chrm]['preds'])

        print('ALL PREDICTIONS READY, SAVING THEM')
        matrix_preds = flatten_dict_of_dicts(ctype_chr_pred_dict)

        joblib.dump(ctype_chr_pred_dict,
                    os.path.join(args.checkpoint, type + '_preds.joblib'))
        joblib.dump(
            matrix_preds,
            os.path.join(args.checkpoint, type + '_matrix_preds.joblib'))

        if labels_avail:
            matrix_labels = flatten_dict_of_dicts(ctype_chr_pred_dict,
                                                  'labels')
            joblib.dump(
                matrix_labels,
                os.path.join(args.checkpoint, type + '_matrix_labels.joblib'))

    elif mode == 'report':
        print('ALL PREDICTIONS READY, PREPARING PLOTS')
        di.evaluate_model(all_preds, type, args.checkpoint,
                          args.report_filename)

    else:
        raise Exception("mode is one of [report, save_preds]")
コード例 #34
0
def test(model, optimizer, epoch, di, args, criterion=nn.NLLLoss()):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    precis = AverageMeter()
    recall = AverageMeter()
    f1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    bar = Bar('Processing', max=args.batches_per_test_epoch)
    batch_idx = 0
    all_preds = np.array([])
    all_targets = np.array([])

    while batch_idx < args.batches_per_test_epoch:
        # measure data loading time
        data_time.update(time.time() - end)
        seq_batch, gene_batch, target_batch = di.sample_validation_batch(
            args.batch_size)
        seq_batch = torch.from_numpy(seq_batch)
        gene_batch = torch.FloatTensor(gene_batch)
        targets = torch.from_numpy(target_batch)
        if args.cuda:
            seq_batch, gene_batch, targets = seq_batch.contiguous().cuda(
            ), gene_batch.contiguous().cuda(), targets.cuda()
        seq_batch, gene_batch, targets = Variable(
            seq_batch,
            volatile=True), Variable(gene_batch,
                                     volatile=True), Variable(targets)

        # compute output
        outputs = model(seq_batch, gene_batch)
        loss = criterion(outputs, targets)

        # concat to all_preds, all_targets
        index = Variable(torch.LongTensor([1]))
        if args.cuda:
            index = index.cuda()
        all_preds = np.concatenate(
            (all_preds,
             torch.index_select(outputs, 1,
                                index=index).view(-1).cpu().data.numpy()))
        all_targets = np.concatenate((all_targets, targets.cpu().data.numpy()))

        # measure accuracy and record loss
        p, r, f = eval(outputs.data, targets.data, args)
        auprc = sklearn.metrics.average_precision_score(all_targets, all_preds)
        auc = sklearn.metrics.roc_auc_score(all_targets, all_preds)
        precis.update(p, seq_batch.size(0))
        recall.update(r, seq_batch.size(0))
        f1.update(f, seq_batch.size(0))
        losses.update(loss.item(), seq_batch.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        batch_idx += 1
        # plot progress
        bar.suffix = '({batch}/{size}) | Loss: {loss:.4f} | precis: {precis:.3f} | recall: {recall:.3f} | f1: {f1:.3f} | auprc: {auprc:.3f} | auc: {auc:.3f}'.format(
            batch=batch_idx,
            size=args.batches_per_test_epoch,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            loss=losses.avg,
            precis=precis.avg,
            recall=recall.avg,
            f1=f1.avg,
            auprc=auprc,
            auc=auc,
        )
        bar.next()
    bar.finish()

    val_results = {'preds': all_preds, 'labels': all_targets}
    joblib.dump(val_results,
                os.path.join(args.checkpoint, 'validation_results.joblib'))

    return (losses.avg, auprc)