Ejemplo n.º 1
0
    def executeEpisodes(self, game, nnet, args, iteration):
        """ Executes a number of episodes specified in args """
        self.game = game
        self.nnet = nnet
        self.args = args

        self.folder = self.args.folder
        
        iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

        eps_time = AverageMeter()
        bar = Bar('Self Play', max=self.args.numEps)
        end = time.time()

        for eps in range(self.args.numEps):
            #print("episode:", eps+1, " of ", self.args.numEps)
            self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
            self.mcts.debug.folder = os.path.join("Debug", str(iteration)+"-"+str(eps))
            iterationTrainExamples += self.executeEpisode(eps)
            # print MCTS stats after we end up with MCTS instance
            self.mcts.print_stats()

            # bookkeeping + plot progress
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
        bar.finish()
        
        return iterationTrainExamples
Ejemplo n.º 2
0
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        scores = []
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            scores.append(gameResult)
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
        bar.finish()

        return scores
Ejemplo n.º 3
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))

                # predict and compute gradient and do SGD step
                input_dict = {
                    self.nnet.input_boards: boards,
                    self.nnet.target_pis: pis,
                    self.nnet.target_vs: vs,
                    self.nnet.dropout: args.dropout,
                    self.nnet.isTraining: True
                }

                # measure data loading time
                data_time.update(time.time() - end)

                # record loss
                self.sess.run(self.nnet.train_step, feed_dict=input_dict)
                pi_loss, v_loss = self.sess.run(
                    [self.nnet.loss_pi, self.nnet.loss_v],
                    feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / args.batch_size),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
Ejemplo n.º 4
0
    def gen_samples(self, iteration, proc_num):
        print('------ITER ' + str(iteration) + '------')

        iterationTrainExamples = deque([], maxlen=self.args["maxlenOfQueue"])
        eps_time = AverageMeter()
        bar = Bar('Self Play',
                  max=self.args["numEps"] // self.args["genFilesPerIteration"])
        end = time.time()

        for eps in range(self.args["numEps"] //
                         self.args["genFilesPerIteration"]):
            self.mcts = MCTS(self.game, self.nnet,
                             self.args)  # reset search tree

            iterationTrainExamples += self.executeEpisode()

            # bookkeeping + plot progress
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps + 1,
                maxeps=self.args["numEps"] //
                self.args["genFilesPerIteration"],
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()
        bar.finish()

        self.saveTrainExamples(iteration - 1, proc_num, iterationTrainExamples)
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num/2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==1:
                oneWon+=1
            elif gameResult==-1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            if(self.displaybar):
                bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=maxeps, et=eps_time.avg,
                                                                                                        total=bar.elapsed_td, eta=bar.eta_td)
                bar.next()

        self.player1, self.player2 = self.player2, self.player1
        
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==-1:
                oneWon+=1                
            elif gameResult==1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            if(self.displaybar):
                bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=num, et=eps_time.avg,
                                                                                                        total=bar.elapsed_td, eta=bar.eta_td)
                bar.next()
            
        bar.finish()

        return oneWon, twoWon, draws
Ejemplo n.º 6
0
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num/2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==1:
                oneWon+=1
            elif gameResult==-1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()

        self.player1, self.player2 = self.player2, self.player1
        
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==-1:
                oneWon+=1                
            elif gameResult==1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=num, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
            
        bar.finish()

        return oneWon, twoWon, draws
Ejemplo n.º 7
0
    def playGames(self, num):
        """
        plays a number of games
        :param num: number of games, has to be divisible by 6 for fair games
        :return: the summed scores of each agent
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        max_scores = num * 4

        num = int(num / 6)
        # oneWon = 0
        # twoWon = 0
        # draws = 0
        scores = [0, 0]
        for lonely_player in [1, 2]:
            for lonely_turn in range(3):
                for _ in range(num):
                    if scores[
                            0] < self.args.updateThreshold * max_scores and scores[
                                1] < self.args.updateThreshold * max_scores:
                        self.game.reset_logic()
                        print("New Game")
                        print("Lonely Player: " + str(lonely_player))
                        print("Lonely Turn: " + str(lonely_turn + 1))
                        gameResult = self.playGame(lonely_player, lonely_turn)
                        print("RESULTS")
                        print(gameResult)
                        for t in range(3):
                            # if bool(p == lonely_player) != bool(t != lonely_turn):
                            if t == lonely_turn:
                                scores[lonely_player - 1] += gameResult[t]
                            else:
                                scores[2 - lonely_player] += gameResult[t]

                    print("CUMMULATED RESULTS:")
                    print(scores)
                    # bookkeeping + plot progress
                    eps += 1
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=maxeps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()

        bar.finish()

        return scores
def train(trainloader, model, criterion, optimizer, epoch, use_cuda):
    # switch to train mode
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=len(trainloader))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # measure data loading time
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda(async=True)
        inputs, targets = torch.autograd.Variable(
            inputs), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # measure accuracy and record loss
        prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
        losses.update(loss.data[0], inputs.size(0))
        top1.update(prec1[0], inputs.size(0))
        top5.update(prec5[0], inputs.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format(
            batch=batch_idx + 1,
            size=len(trainloader),
            data=data_time.avg,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            top1=top1.avg,
            top5=top5.avg,
        )
        bar.next()
    bar.finish()
    return (losses.avg, top1.avg)
Ejemplo n.º 9
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch+1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples)/args.batch_size):
                sample_ids = np.random.randint(len(examples), size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))

                # predict and compute gradient and do SGD step
                input_dict = {self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True}

                # measure data loading time
                data_time.update(time.time() - end)

                # record loss
                self.sess.run(self.nnet.train_step, feed_dict=input_dict)
                pi_loss, v_loss = self.sess.run([self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                            batch=batch_idx,
                            size=int(len(examples)/args.batch_size),
                            data=data_time.avg,
                            bt=batch_time.avg,
                            total=bar.elapsed_td,
                            eta=bar.eta_td,
                            lpi=pi_losses.avg,
                            lv=v_losses.avg,
                            )
                bar.next()
            bar.finish()
Ejemplo n.º 10
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        trainExamples = deque([], maxlen=self.args.maxlenOfQueue)
        for i in range(self.args.numIters):
            # bookkeeping
            print('------ITER ' + str(i+1) + '------')
            eps_time = AverageMeter()
            bar = Bar('Self Play', max=self.args.numEps)
            end = time.time()

            for eps in range(self.args.numEps):
                self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                trainExamples += self.executeEpisode()                

                # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                           total=bar.elapsed_td, eta=bar.eta_td)
                bar.next()
            bar.finish()

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            pnet = self.nnet.__class__(self.game)
            pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            pmcts = MCTS(self.game, pnet, self.args)
            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins) + ' ; DRAWS : ' + str(draws))
            if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet = pnet

            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='checkpoint_' + str(i) + '.pth.tar')
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')                
Ejemplo n.º 11
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        losses = [[], []]
        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))

                # predict and compute gradient and do SGD step
                input_dict = {
                    self.nnet.input_boards: boards,
                    self.nnet.target_pis: pis,
                    self.nnet.target_vs: vs,
                    self.nnet.dropout: args.dropout,
                    self.nnet.isTraining: True
                }

                # measure data loading time
                data_time.update(time.time() - end)

                # record loss
                self.sess.run(self.nnet.train_step, feed_dict=input_dict)
                pi_loss, v_loss = self.sess.run(
                    [self.nnet.loss_pi, self.nnet.loss_v],
                    feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                losses[0].append(pi_loss)
                losses[1].append(v_loss)
                batch_idx += 1
                # measure elapsed time
                batch_time.update(time.time() - end)

                bar.next()
            bar.finish()
        return losses
Ejemplo n.º 12
0
    def self_play(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        # TODO: parallelize this iterations
        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)
            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)
        self.aws_s3_sync()
Ejemplo n.º 13
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters+1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i>1:

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                reward_list = []
                count_list = []
                step_list = []

                for eps in range(self.args.numEps):

                    examples, step_count = self.executeEpisode()

                    self.nnet.train(examples)

                    step_list.append(step_count)
                    reward_list.append(examples[-1][2])
                    count_list.append(eps)

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                               total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()

                plt.scatter(count_list, reward_list, label = 'rewards_training')
                plt.savefig("fig/rewards_"+str(i)+".png")
                plt.close()
                plt.scatter(count_list, step_list, label = 'steps_training')
                plt.savefig("fig/steps_"+str(i)+".png")
                plt.close()
Ejemplo n.º 14
0
Archivo: Coach.py Proyecto: zhyack/SCC
    def learn_self_play_iter(self):
        iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)
        eps_time = AverageMeter()
        bar = Bar('Self Play', max=self.selfplaynum)
        end = time.time()

        for eps in range(self.selfplaynum):
            iterationTrainExamples += self.executeEpisode()

            # bookkeeping + plot progress
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.selfplaynum, et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
        bar.finish()
        self.trainExampleSelfPlay.extend(iterationTrainExamples)
        shuffle(self.trainExampleSelfPlay)
        print('Got %d replays through self-play.'%(len(self.trainExampleSelfPlay)))
        self.nnet.train(self.trainExampleSelfPlay, transform=True)
Ejemplo n.º 15
0
    def train(self, examples, transform=False, models=[0]):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            batch_time = AverageMeter()
            losses = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                sample_examples = [examples[i] for i in sample_ids]
                for m in models:
                    loss = self.train_functions[m](sample_examples, transform)
                    if m == 0:
                        pi_losses.update(loss[0], len(sample_examples))
                        v_losses.update(loss[1], len(sample_examples))
                    else:
                        losses.update(loss, len(sample_examples))
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1
                bar.suffix = '({batch}/{size}) Total: {total:} | Loss: {loss:.3f} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / args.batch_size),
                    total=bar.elapsed_td,
                    loss=losses.avg,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
Ejemplo n.º 16
0
    def playGames(self, num, verbose=False):
        """
        Plays num games.
        Returns:
            solved: number of solved puzzles
            timed_out: number of timed_out puzzles
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num / 2)
        solved = 0
        timed_out = 0

        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult == 1:
                solved += 1
            else:
                timed_out += 1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps + 1,
                maxeps=maxeps,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        bar.finish()

        return solved, timed_out
Ejemplo n.º 17
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)
                trainStats = [0, 0, 0]
                for _, _, res in iterationTrainExamples:
                    trainStats[res] += 1
                print trainStats

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins > 0 and float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
Ejemplo n.º 18
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        learn_time = time.time()
        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            learn_iter_time = time.time()
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            #self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            self.nnet.train(trainExamples)

            print('Following is played by PureNNt:')
            purenntplayer = PureNNtPlayer(self.game,
                                          self.nnet,
                                          self.args,
                                          temp=0).play
            arena = Arena(purenntplayer, self.game)
            print('PureNNt Real Performance:',
                  arena.playGames(self.args.arenaCompare))

            print('Following is played by NNtBasedMCTS:')
            nntbasedmctsplayer = NNtBasedMCTSPlayer(
                self.game,
                self.nnet,
                self.args,
                temp=0,
                percentile=self.r_percentile).play
            arena = Arena(nntbasedmctsplayer, self.game)
            print('NNtBasedMCTS Real Performance:',
                  arena.playGames(self.args.arenaCompare))

            print('ACCEPTING NEW MODEL DIRECTLY')
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename=self.getCheckpointFile(i))
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='best.pth.tar')
Ejemplo n.º 19
0
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num / 2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult == 1:
                oneWon += 1
            elif gameResult == -1:
                twoWon += 1
            else:
                draws += 1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps + 1,
                maxeps=maxeps,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        self.player1, self.player2 = self.player2, self.player1
        black_start = (oneWon, twoWon, draws)
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult == -1:
                oneWon += 1
            elif gameResult == 1:
                twoWon += 1
            else:
                draws += 1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps + 1,
                maxeps=num,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        white_start = (oneWon - black_start[0], twoWon - black_start[1],
                       draws - black_start[2])
        print('')
        print(
            'Neural network as Black - Wins of (NN Won,Opponent Won,Draw) :' +
            str(black_start))
        print(
            'Neural network as White - Wins of (NN Won,Opponent Won,Draw) :' +
            str(white_start))
        bar.finish()

        return oneWon, twoWon, draws
Ejemplo n.º 20
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)
    
                tracker = ParallelRuntimes(self.args.mcts_workers)
                bar = Bar('Self Play', max=self.args.numEps)
    
                # Multiprocess self-play
                proccesses = []
                work_queue = mp.Queue()
                done_queue = mp.Queue()

                print("[Master] Spawning Workers...")

                # Spawn workers
                for ep in range(self.args.mcts_workers):
                    tup = (work_queue, done_queue, ep)
                    proc = mp.Process(target=self.coach_worker, args=tup)
                    proc.start()

                    proccesses.append(proc)

                print("[Master] Adding work...")

                # Add work to queue
                for eps in range(self.args.numEps):
                    data = dict()
                    data["i"] = eps
                    data["game"] = copy.deepcopy(self.game)

                    work_queue.put(data)

                print("[Master] Waiting for results...")

                # Wait for results to come in
                for ep in range(self.args.numEps):
                    runtime, examples = done_queue.get()
                    
                    # Drop 80% of draws
                    to_add = False
                    loss_rate = self.args.filter_draw_rate
                    if abs(examples[0][2]) != 1:
                        if random.random() >= loss_rate:
                            to_add = True
                    else:
                        to_add = True

                    if to_add:
                        iterationTrainExamples += examples

                    tracker.update(runtime)
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                                  eps=ep + 1, maxeps=self.args.numEps, et=tracker.avg(), total=bar.elapsed_td, 
                                  eta=tracker.eta(ep + 1, self.args.numEps))
                    bar.next()

                print("[Master] Killing workers...")

                # Kill workers
                for p in proccesses:
                    p.terminate()
                    p.join()

                print("[Master] iter={} adding {} examples".format(i, len(iterationTrainExamples)))
                self.trainExamplesHistory.append(iterationTrainExamples)

                bar.finish()

                
            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
            self.saveTrainExamples(i)
            
            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')

            # normal network, don't use parallel code
            self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            pmcts = MCTS(copy.deepcopy(self.game), self.pnet, self.args)
            
            self.nnet.train(trainExamples)

            nmcts = MCTS(copy.deepcopy(self.game), self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION (player1 = previous, player2 = new)')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), 
                          self.game, num_workers=self.args.mcts_workers)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
            if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')

                # Load so all nnets are updated accordingly
                self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
Ejemplo n.º 21
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        begining = 1
        if self.args.load_model == True:

            self.loadTrainExamples()
            file = open(self.args.trainExampleCheckpoint + "loopinformation",
                        "r+")
            lines = file.readlines()
            begining = lines[0]
            file.close()

        for i in range(int(begining), self.args.numIters + 1):

            fileLoopInformation = open(
                self.args.trainExampleCheckpoint + "loopinformation", "w+")
            fileLoopInformation.write(str(i))
            fileLoopInformation.close()

            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

            eps_time = AverageMeter()
            bar = Bar('Self Play', max=self.args.numEps)
            end = time.time()

            for eps in range(self.args.numEps):
                iterationTrainExamples += self.executeEpisode()

                # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                    eps=eps + 1,
                    maxeps=self.args.numEps,
                    et=eps_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td)
                bar.next()
            bar.finish()

            # save the iteration examples to the history
            self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            filename = "AlphaZerocurent" + str(i) + "temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + \
                       ":dim" + str(self.game.n) + ".pth.tar"

            self.nnet.train(trainExamples)

            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename=filename)

            self.mcts.clear()
            del self.mcts
            self.mcts = MCTS(self.game, self.nnet, self.args,
                             mcts=True)  # reset search tree
Ejemplo n.º 22
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters+1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            print(str(self.game.innerN) + "x" + str(self.game.innerM))
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()
    
                for eps in range(self.args.numEps):
                    # self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                    self.mcts = MCTS(self.nnet, self.args)   # reset search tree
                    iterationTrainExamples += self.executeEpisode()

    
                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                               total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history 
                self.trainExamplesHistory.append(iterationTrainExamples)
                
            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
            self.saveTrainExamples(i-1)
            
            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            tempfile =  'temp.pth.tar'
            bestfile =  'best.pth.tar'

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=tempfile)
            self.nnet.train(trainExamples)

            if self.arenaEnabled:
                self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)

                pmcts = MCTS(self.pnet, self.args)
                nmcts = MCTS(self.nnet, self.args)

                print('PITTING AGAINST PREVIOUS VERSION')
                # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                #               lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
                arena = Arena(lambda x, y: pmcts.getActionProb(x, y, temp=0),
                           lambda x, y: nmcts.getActionProb(x, y, temp=0), self.game)
                pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
                if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)
                else:
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=bestfile)
Ejemplo n.º 23
0
def main():
    # global args
    args = parser.parse_args()

    # <editor-fold desc="Initialization">
    if args.comment == "test":
        print("WARNING: name is test!!!\n\n")

    # now = datetime.datetime.now()
    # current_date = now.strftime("%m-%d-%H-%M")

    assert args.text_criterion in ("MSE", "Cosine", "Hinge",
                                   "NLLLoss"), 'Invalid Loss Function'
    assert args.cm_criterion in ("MSE", "Cosine",
                                 "Hinge"), 'Invalid Loss Function'

    assert args.common_emb_ratio <= 1.0 and args.common_emb_ratio >= 0

    mask = int(args.common_emb_ratio * args.hidden_size)

    cuda = args.cuda
    if cuda == 'true':
        cuda = True
    else:
        cuda = False

    if args.load_model == "NONE":
        keep_loading = False
        # model_path = args.model_path + current_date + "/"
        model_path = args.model_path + args.comment + "/"
    else:
        keep_loading = True
        model_path = args.model_path + args.load_model + "/"

    result_path = args.result_path
    if result_path == "NONE":
        result_path = model_path + "results/"

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    #</editor-fold>

    # <editor-fold desc="Image Preprocessing">

    # Image preprocessing //ATTENTION
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    #</editor-fold>

    # <editor-fold desc="Creating Embeddings">

    # Load vocabulary wrapper.
    print("Loading Vocabulary...")
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Load Embeddings
    emb_size = args.word_embedding_size
    emb_path = args.embedding_path
    if args.embedding_path[-1] == '/':
        emb_path += 'glove.6B.' + str(emb_size) + 'd.txt'

    print("Loading Embeddings...")
    emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size)

    glove_emb = nn.Embedding(emb.size(0), emb.size(1))

    # Freeze weighs
    if args.fixed_embeddings == "true":
        glove_emb.weight.requires_grad = False

    # </editor-fold>

    # <editor-fold desc="Data-Loaders">

    # Build data loader
    print("Building Data Loader For Test Set...")
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    print("Building Data Loader For Validation Set...")
    val_loader = get_loader(args.valid_dir,
                            args.valid_caption_path,
                            vocab,
                            transform,
                            args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    # </editor-fold>

    # <editor-fold desc="Network Initialization">

    print("Setting up the Networks...")
    coupled_vae = CoupledVAE(glove_emb,
                             len(vocab),
                             hidden_size=args.hidden_size,
                             latent_size=args.latent_size,
                             batch_size=args.batch_size)

    if cuda:
        coupled_vae = coupled_vae.cuda()

    # </editor-fold>

    # </editor-fold>

    # <editor-fold desc="Optimizers">
    print("Setting up the Optimizers...")

    vae_optim = optim.Adam(coupled_vae.parameters(),
                           lr=args.learning_rate,
                           betas=(0.5, 0.999),
                           weight_decay=0.00001)

    # </editor-fold desc="Optimizers">

    train_swapped = False  # Reverse 2

    step = 0

    with open(os.path.join(result_path, "losses.csv"), "w") as text_file:
        text_file.write("Epoch, Img, Txt, CM\n")

    for epoch in range(args.num_epochs):

        # <editor-fold desc = "Epoch Initialization"?

        # TRAINING TIME
        print('EPOCH ::: TRAINING ::: ' + str(epoch + 1))
        batch_time = AverageMeter()
        txt_losses = AverageMeter()
        img_losses = AverageMeter()
        cm_losses = AverageMeter()
        end = time.time()

        bar = Bar('Training Net', max=len(data_loader))

        if keep_loading:
            suffix = "-" + str(epoch) + "-" + args.load_model + ".pkl"
            try:
                coupled_vae.load_state_dict(
                    torch.load(
                        os.path.join(args.model_path, 'coupled_vae' + suffix)))
            except FileNotFoundError:
                print("Didn't find any models switching to training")
                keep_loading = False

        if not keep_loading:

            # Set training mode
            coupled_vae.train()

            # </editor-fold desc = "Epoch Initialization"?

            train_swapped = not train_swapped
            for i, (images, captions, lengths) in enumerate(data_loader):

                if i == len(data_loader) - 1:
                    break

                images = to_var(images)
                captions = to_var(captions)
                lengths = to_var(
                    torch.LongTensor(lengths))  # print(captions.size())

                # Forward, Backward and Optimize
                vae_optim.zero_grad()


                img_out, img_mu, img_logv, img_z, txt_out, txt_mu, txt_logv, txt_z = \
                                                                 coupled_vae(images, captions, lengths, train_swapped)

                img_rc_loss = img_vae_loss(
                    img_out, images, img_mu,
                    img_logv) / (args.batch_size * args.crop_size**2)

                NLL_loss, KL_loss, KL_weight = seq_vae_loss(
                    txt_out, captions, lengths, txt_mu, txt_logv, "logistic",
                    step, 0.0025, 2500)
                txt_rc_loss = (NLL_loss + KL_weight *
                               KL_loss) / torch.sum(lengths).float()

                txt_losses.update(txt_rc_loss.data[0], args.batch_size)
                img_losses.update(img_rc_loss.data[0], args.batch_size)

                loss = img_rc_loss + txt_rc_loss

                loss.backward()
                vae_optim.step()
                step += 1

                if i % args.image_save_interval == 0:
                    subdir_path = os.path.join(
                        result_path, str(i / args.image_save_interval))

                    if os.path.exists(subdir_path):
                        pass
                    else:
                        os.makedirs(subdir_path)

                    for im_idx in range(3):
                        # im_or = (images[im_idx].cpu().data.numpy().transpose(1,2,0))*255
                        # im = (img_out[im_idx].cpu().data.numpy().transpose(1,2,0))*255
                        im_or = (images[im_idx].cpu().data.numpy().transpose(
                            1, 2, 0) / 2 + .5) * 255
                        im = (img_out[im_idx].cpu().data.numpy().transpose(
                            1, 2, 0) / 2 + .5) * 255
                        # im = img_out[im_idx].cpu().data.numpy().transpose(1,2,0)*255

                        filename_prefix = os.path.join(subdir_path,
                                                       str(im_idx))
                        scipy.misc.imsave(filename_prefix + '_original.A.jpg',
                                          im_or)
                        scipy.misc.imsave(filename_prefix + '.A.jpg', im)

                        txt_or = " ".join([
                            vocab.idx2word[c]
                            for c in captions[im_idx].cpu().data.numpy()
                        ])
                        _, generated = torch.topk(txt_out[im_idx], 1)
                        txt = " ".join([
                            vocab.idx2word[c]
                            for c in generated[:, 0].cpu().data.numpy()
                        ])

                        with open(filename_prefix + "_captions.txt",
                                  "w") as text_file:
                            text_file.write("Epoch %d\n" % epoch)
                            text_file.write("Original: %s\n" % txt_or)
                            text_file.write("Generated: %s" % txt)

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

                # plot progress
                bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_Img: {img_l:.3f}| Loss_Txt: {txt_l:.3f} | Loss_CM: {cm_l:.4f}'.format(
                    batch=i,
                    size=len(data_loader),
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    img_l=img_losses.avg,
                    txt_l=txt_losses.avg,
                    cm_l=cm_losses.avg,
                )
                bar.next()

            # </editor-fold desc = "Logging">

            bar.finish()

            with open(os.path.join(result_path, "losses.csv"),
                      "a") as text_file:
                text_file.write("{}, {}, {}, {}\n".format(
                    epoch, img_losses.avg, txt_losses.avg, cm_losses.avg))

            # <editor-fold desc = "Saving the models"?
            # Save the models
            print('\n')
            print('Saving the models in {}...'.format(model_path))
            torch.save(
                coupled_vae.state_dict(),
                os.path.join(model_path, 'coupled_vae' % (epoch + 1)) + ".pkl")
Ejemplo n.º 24
0
def evaluate_model(model,
                   args,
                   di,
                   labels_avail=True,
                   type='test',
                   mode='report'):

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    # switch to evaluate mode
    model.eval()

    if type == 'test':
        num_examples, chrms, cell_types = di.num_test_examples, di.test_chrms, di.test_cell_types
    elif type == 'validation':
        num_examples, chrms, cell_types = di.num_validation_examples, di.validation_chrms, di.validation_cell_types
    else:
        raise Exception("type is one of [train, validation, test]")

    end = time.time()
    max_batches = int(
        math.ceil(num_examples /
                  (di.eval_subsample * args.batch_size))) + (len(chrms) *
                                                             len(cell_types))
    bar = Bar('Processing', max=max_batches)
    # + |test_chrms|*|test_cell_types| is to account for subsampling starting from each chromosome in the worst case
    batch_idx = 0
    all_preds = []

    for seq_batch, gene_batch in di.eval_generator(args.batch_size, type):
        data_time.update(time.time() - end)
        seq_batch = torch.from_numpy(seq_batch)
        gene_batch = torch.FloatTensor(gene_batch)
        if args.cuda:
            seq_batch, gene_batch = seq_batch.contiguous().cuda(
            ), gene_batch.contiguous().cuda()
        seq_batch, gene_batch = Variable(seq_batch, volatile=True), Variable(
            gene_batch, volatile=True)

        # compute output
        outputs = model(seq_batch, gene_batch)
        index = Variable(torch.LongTensor([1]))
        if args.cuda:
            index = index.cuda()
        all_preds.append(
            torch.index_select(outputs, 1,
                               index=index).view(-1).cpu().data.numpy())

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        batch_idx += 1

        # plot progress
        bar.suffix = '({batch}/{size}) | Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}'.format(
            batch=batch_idx,
            size=max_batches,
            data=data_time.avg,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
        )
        bar.next()
    bar.finish()

    all_preds = np.concatenate(all_preds)

    if mode == 'save_preds':
        ctype_chr_pred_dict, *_ = di.populate_ctype_chr_pred_dict(
            cell_types, chrms, all_preds, ret_labels=labels_avail)

        # assumes outputs are log-softmaxed, taking exponents
        for ctype in ctype_chr_pred_dict:
            for chrm in ctype_chr_pred_dict[ctype]:
                ctype_chr_pred_dict[ctype][chrm]['preds'] = np.exp(
                    ctype_chr_pred_dict[ctype][chrm]['preds'])

        print('ALL PREDICTIONS READY, SAVING THEM')
        matrix_preds = flatten_dict_of_dicts(ctype_chr_pred_dict)

        joblib.dump(ctype_chr_pred_dict,
                    os.path.join(args.checkpoint, type + '_preds.joblib'))
        joblib.dump(
            matrix_preds,
            os.path.join(args.checkpoint, type + '_matrix_preds.joblib'))

        if labels_avail:
            matrix_labels = flatten_dict_of_dicts(ctype_chr_pred_dict,
                                                  'labels')
            joblib.dump(
                matrix_labels,
                os.path.join(args.checkpoint, type + '_matrix_labels.joblib'))

    elif mode == 'report':
        print('ALL PREDICTIONS READY, PREPARING PLOTS')
        di.evaluate_model(all_preds, type, args.checkpoint,
                          args.report_filename)

    else:
        raise Exception("mode is one of [report, save_preds]")
Ejemplo n.º 25
0
    def playGames(self, num, profile, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        if self.replay:
            self.playGame()
            return None

        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num/2)
        oneWon = 0
        twoWon = 0
        draws = 0
        oneWhiteWon = 0     # number of times the first player won as white
        oneBlackWon = 0     # number of times the first player won as black
        twoWhiteWon = 0     # number of times the second player won as white
        twoBlackWon = 0     # number of times the second player won as black
        if profile:
            prof = cProfile.Profile()
            prof.enable()
        for _ in range(num):
            gameResult = None
            while gameResult is None:
                gameResult = self.playGame(verbose=verbose)
            if gameResult==1:
                oneWon+=1
                oneBlackWon+=1
            elif gameResult==-1:
                twoWon+=1
                twoWhiteWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()

        self.player1, self.player2 = self.player2, self.player1
        
        for _ in range(num):
            gameResult = None
            while gameResult is None:
                gameResult = self.playGame(verbose=verbose)
            if gameResult==-1:
                oneWon+=1
                oneWhiteWon+=1
            elif gameResult==1:
                twoWon+=1
                twoBlackWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
            
        bar.finish()
        if profile:
            prof.disable()
            prof.print_stats(sort=2)

        return oneWon, twoWon, draws, oneWhiteWon, oneBlackWon, twoWhiteWon, twoBlackWon
Ejemplo n.º 26
0
def train(model, optimizer, epoch, di, args, criterion=nn.NLLLoss()):
    # switch to train mode
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    precis = AverageMeter()
    recall = AverageMeter()
    f1 = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=args.batches_per_epoch)
    batch_idx = 0

    all_preds = np.array([])
    all_targets = np.array([])

    while batch_idx < args.batches_per_epoch:
        seq_batch, gene_batch, target_batch = di.sample_train_batch(
            args.batch_size)
        seq_batch = torch.from_numpy(seq_batch)
        gene_batch = torch.FloatTensor(gene_batch)
        targets = torch.from_numpy(target_batch)

        # measure data loading time
        data_time.update(time.time() - end)

        # predict
        if args.cuda:
            seq_batch, gene_batch, targets = seq_batch.contiguous().cuda(
            ), gene_batch.contiguous().cuda(), targets.cuda(async=True)
        seq_batch, gene_batch, targets = Variable(seq_batch), Variable(
            gene_batch), Variable(targets)

        # compute output
        outputs = model(seq_batch, gene_batch)
        loss = criterion(outputs, targets)

        # concat to all_preds, all_targets
        index = Variable(torch.LongTensor([1]))
        if args.cuda:
            index = index.cuda()
        all_preds = np.concatenate(
            (all_preds,
             torch.index_select(outputs, 1,
                                index=index).view(-1).cpu().data.numpy()))
        all_targets = np.concatenate((all_targets, targets.cpu().data.numpy()))

        # measure accuracy and record loss
        p, r, f = eval(outputs.data, targets.data, args)
        precis.update(p, seq_batch.size(0))
        recall.update(r, seq_batch.size(0))
        f1.update(f, seq_batch.size(0))
        losses.update(loss.item(), seq_batch.size(0))
        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        batch_idx += 1

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | prec: {precis:.3f} | rec: {recall:.3f} | f1: {f1:.3f}'.format(
            batch=batch_idx,
            size=args.batches_per_epoch,
            data=data_time.avg,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            precis=precis.avg,
            recall=recall.avg,
            f1=f1.avg,
        )
        bar.next()
    bar.finish()

    # compute train auprc/auc for direct comparison to test
    train_auprc = sklearn.metrics.average_precision_score(
        all_targets, all_preds)
    train_auc = sklearn.metrics.roc_auc_score(all_targets, all_preds)
    print('train auprc: {auprc: .3f} | train auc: {auc: .3f}'.format(
        auprc=train_auprc,
        auc=train_auc,
    ))

    return (losses.avg, f1.avg)
Ejemplo n.º 27
0
def test(model, optimizer, epoch, di, args, criterion=nn.NLLLoss()):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    precis = AverageMeter()
    recall = AverageMeter()
    f1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    bar = Bar('Processing', max=args.batches_per_test_epoch)
    batch_idx = 0
    all_preds = np.array([])
    all_targets = np.array([])

    while batch_idx < args.batches_per_test_epoch:
        # measure data loading time
        data_time.update(time.time() - end)
        seq_batch, gene_batch, target_batch = di.sample_validation_batch(
            args.batch_size)
        seq_batch = torch.from_numpy(seq_batch)
        gene_batch = torch.FloatTensor(gene_batch)
        targets = torch.from_numpy(target_batch)
        if args.cuda:
            seq_batch, gene_batch, targets = seq_batch.contiguous().cuda(
            ), gene_batch.contiguous().cuda(), targets.cuda()
        seq_batch, gene_batch, targets = Variable(
            seq_batch,
            volatile=True), Variable(gene_batch,
                                     volatile=True), Variable(targets)

        # compute output
        outputs = model(seq_batch, gene_batch)
        loss = criterion(outputs, targets)

        # concat to all_preds, all_targets
        index = Variable(torch.LongTensor([1]))
        if args.cuda:
            index = index.cuda()
        all_preds = np.concatenate(
            (all_preds,
             torch.index_select(outputs, 1,
                                index=index).view(-1).cpu().data.numpy()))
        all_targets = np.concatenate((all_targets, targets.cpu().data.numpy()))

        # measure accuracy and record loss
        p, r, f = eval(outputs.data, targets.data, args)
        auprc = sklearn.metrics.average_precision_score(all_targets, all_preds)
        auc = sklearn.metrics.roc_auc_score(all_targets, all_preds)
        precis.update(p, seq_batch.size(0))
        recall.update(r, seq_batch.size(0))
        f1.update(f, seq_batch.size(0))
        losses.update(loss.item(), seq_batch.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        batch_idx += 1
        # plot progress
        bar.suffix = '({batch}/{size}) | Loss: {loss:.4f} | precis: {precis:.3f} | recall: {recall:.3f} | f1: {f1:.3f} | auprc: {auprc:.3f} | auc: {auc:.3f}'.format(
            batch=batch_idx,
            size=args.batches_per_test_epoch,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            loss=losses.avg,
            precis=precis.avg,
            recall=recall.avg,
            f1=f1.avg,
            auprc=auprc,
            auc=auc,
        )
        bar.next()
    bar.finish()

    val_results = {'preds': all_preds, 'labels': all_targets}
    joblib.dump(val_results,
                os.path.join(args.checkpoint, 'validation_results.joblib'))

    return (losses.avg, auprc)
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        optimizer = optim.Adam(self.nnet.parameters())
        vloss_hist = []  # set list for easy look at value loss and policy loss
        ploss_hist = []
        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                agent_num, obs, pis, vs, next_obs = list(
                    zip(*[examples[i] for i in sample_ids]))
                obs = torch.FloatTensor(np.array(obs).astype(np.float64))
                target_pis = torch.FloatTensor(np.array(pis))
                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                # predict
                if args.cuda:
                    obs, target_pis, target_vs = obs.contiguous().cuda(
                    ), target_pis.contiguous().cuda(), target_vs.contiguous(
                    ).cuda()

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output
                out_pi, out_v = self.nnet(obs)
                l_pi = self.loss_pi(target_pis, out_pi)
                l_v = self.loss_v(target_vs, out_v)
                vloss_hist.append(l_v)
                ploss_hist.append(l_pi)
                total_loss = l_pi + l_v

                # record loss
                pi_losses.update(l_pi.item(), obs.size(0))
                v_losses.update(l_v.item(), obs.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / args.batch_size),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
            torch.save(self.nnet.state_dict(), self.args.env_name + '.pth')
        return vloss_hist, ploss_hist
Ejemplo n.º 29
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        #Generate a fixed sensing matrix if option is toggled to True.
        #1)A is fixed. Also set arena_game_args.sensing_matrix to be equal to that of coach.game_args so the arena uses the same sensing matrix. 
        #2)the folder which saves the fixed sensing matrix is empty
        if self.args['fixed_matrix'] == True:
            if self.args['load_existing_matrix'] == True:
                self.game_args.sensing_matrix = np.load(self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')
                self.arena_game_args.sensing_matrix = np.load(self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')
                
                #FOR TESTING-------------------------------------------------------
                #print(self.game_args.sensing_matrix)
                #END TESTING-------------------------------------------------------
                
            else: #if not loading an existing matrix in self.args['fixed_matrix_filepath'], then generate a new sensing matrix of given type self.args['matrix_type']
                self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) 
                self.arena_game_args.sensing_matrix = self.game_args.sensing_matrix
                #Save the fixed matrix
                self.game_args.save_Matrix(self.args['fixed_matrix_filepath'])
                
                #FOR TESTING-------------------------------------------------------
                #print(self.game_args.sensing_matrix)
                #END TESTING-------------------------------------------------------
            
        for i in range(1, self.args['numIters']+1):
            print('------ITER ' + str(i) + '------')
            if not self.skipFirstSelfPlay or i>1: #default of self.skipFirstSelfPlay is False. If loading training from file then skipFirstSelfPlay is set to True. skipFirstSelfPlay allows us to load the latest nn_model with latest set of TrainingExamples
                iterationTrainExamples = deque([], maxlen=self.args['maxlenOfQueue'])
                #bookkeeping objects contained in pytorch_classification.utils
                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args['numEps'])
                end = time.time()
                #IMPORTANT PART OF THE CODE. GENERATE NEW A AND NEW y HERE. EACH SELF-PLAY GAME HAS DIFFERENT A AND y. 
                #-----------------------------------------------------
                for eps in range(self.args['numEps']):
                    #Initialize a new game by setting A, x, y, and then execute a single game of self play with self.executeEpisode()
                    if self.args['fixed_matrix'] == False: #repeatedly generate sensing matrices if we are not fixing the sensing matrix. 
                        self.game_args.generateSensingMatrix(self.args['m'], self.args['n'], self.args['matrix_type']) #generate a new sensing matrix
                    self.game_args.generateNewObsVec(self.args['x_type'], self.args['sparsity'])#generate a new observed vector y. This assumes a matrix has been loaded in self.game_args!!!
                    self.mcts = MCTS(self.game, self.nnet, self.args, self.game_args, self.skip_nnet)#create new search tree for each game we play
                    
                    #TESTING-------------------------
                    #print('The generated sparse vector x has sparsity: ' + str(self.game_args.game_iter))
                    #--------------------------------
                    
                    #TESTING--------------------------
                    #print('Starting self-play game iteration: ' + str(eps))
                    #start_game = time.time()
                    #--------------------------------
                    
                    iterationTrainExamples += self.executeEpisode() #Play a new game with newly generated y. iterationTrainExamples is a deque containing states each generated self play game
                    
                    #TESTING--------------------------
                    #end_game = time.time()
                    #print('Total time to play game ' + str(eps) + ' is: ' + str(end_game-start_game))
                    #-----------------------------------------------------
                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args['numEps'], et=eps_time.avg, total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history 
                #self.trainExamplesHistory is a list of deques, where each deque contains all the states from numEps number of self-play games
                self.trainExamplesHistory.append(iterationTrainExamples)
            
            #Jump to here on the first iteration if we loaded an existing file into self.trainExamplesHistory from method loadTrainExamples below.    
            if len(self.trainExamplesHistory) > self.args['numItersForTrainExamplesHistory']:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file by calling saveTrainExamples method
            # The examples were collected using the model from the previous iteration, so (i-1)  
            self.saveTrainExamples(i-1) #save examples to self.args['checkpoint'] folder with given iteration name of i-1
            
            # shuffle examples before training
            #trainExamples is the list form of trainExamplesHistory. Note that trainExamplesHistory is a list of deques,
            #where each deque contains training examples. trainExamples gets rid of the deque, and instead puts all training
            #samples in a single list, shuffled
            trainExamples = []
            for e in self.trainExamplesHistory: #Each e is a deque
                trainExamples.extend(e)
            shuffle(trainExamples)
            
            #The Arena--------------------------------------------------------
            if self.args['Arena'] == True:
                self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='temp') #copy old neural network into new one
                self.pnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp')
            
                #convert trainExamples into a format recognizable by Neural Network and train
                trainExamples = self.nnet.constructTraining(trainExamples)
                self.nnet.train(trainExamples[0], trainExamples[1])#Train the new neural network self.nnet. The weights are now updated
            
                #Pit the two neural networks self.pnet and self.nnet in the arena            
                print('PITTING AGAINST PREVIOUS VERSION')
            
                arena = Arena(self.pnet, self.nnet, self.game, self.args, self.arena_game_args) #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it. 
                pwins, nwins, draws = arena.playGames()
            
                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
                if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args['updateThreshold']:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(folder=self.args['network_checkpoint'], filename='temp')
                else:#saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1))
                    self.nnet.save_checkpoint(folder=self.args['network_checkpoint'], filename='best')
            #-----------------------------------------------------------------
            
            else: #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1).  
                print('TRAINING NEW NEURAL NETWORK...')
                trainExamples = self.nnet.constructTraining(trainExamples)
                
                #FOR TESTING-----------------------------------------------------
                #print('trainExamples feature arrays: ' + str(trainExamples[0]))
                #print('trainExamples label arrays: ' + str(trainExamples[1]))
                #END TESTING-----------------------------------------------------
                    
                self.nnet.train(trainExamples[0], trainExamples[1], folder = self.args['network_checkpoint'], filename = 'trainHistDict' + str(i-1))    
                
                #FOR TESTING-----------------------------------------------------
                #weights = self.nnet.nnet.model.get_weights()
                #min_max = []
                #for layer_weights in weights:
                    #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape)
                    #layer_weights_min = np.amin(layer_weights)
                    #layer_weights_max = np.amax(layer_weights)
                    #min_max.append([layer_weights_min, layer_weights_max])
                #print('')
                #print('The smallest and largest weights of each layer are: ')
                #for pair in min_max:
                    #print(pair)
                #print('')
                #END TESTING-----------------------------------------------------
                      
                self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename='nnet_checkpoint' + str(i-1))
                self.nnet.save_checkpoint(folder = self.args['network_checkpoint'], filename = 'best')
Ejemplo n.º 30
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        optimizer = optim.Adam(self.nnet.parameters())

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch+1))
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples)/args.batch_size):
                sample_ids = np.random.randint(len(examples), size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
                target_pis = torch.FloatTensor(np.array(pis))
                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                # predict
                if args.cuda:
                    boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda()
                boards, target_pis, target_vs = Variable(boards), Variable(target_pis), Variable(target_vs)

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output
                out_pi, out_v = self.nnet(boards)
                l_pi = self.loss_pi(target_pis, out_pi)

                l_v = self.loss_v(target_vs, out_v)
                total_loss = l_pi + l_v

                # record loss
                pi_losses.update(l_pi.data[0], boards.size(0))
                v_losses.update(l_v.data[0], boards.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                            batch=batch_idx,
                            size=int(len(examples)/args.batch_size),
                            data=data_time.avg,
                            bt=batch_time.avg,
                            total=bar.elapsed_td,
                            eta=bar.eta_td,
                            lpi=pi_losses.avg,
                            lv=v_losses.avg,
                            )
                bar.next()
            bar.finish()
Ejemplo n.º 31
0
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num/2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==1:
                oneWon+=1
            elif gameResult==-1:
                twoWon+=1
            else:
                draws+=1
            
            #計算elo
            self.elo(gameResult)
            # bookkeeping + plot progress
            eps += 1
            #pgn log
            self.saveToPGN(gameResult,eps)

            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}| Win: {one}:{two}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td , one=oneWon ,two = twoWon)
            bar.next()

        print('half')
        print(oneWon, twoWon, draws)
        print('elo: player1: ',self.R1,'player2',self.R2)

        self.player1, self.player2 = self.player2, self.player1
        
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==-1:
                oneWon+=1                
            elif gameResult==1:
                twoWon+=1
            else:
                draws+=1
            
            #計算elo
            self.elo(gameResult * -1) 
            # bookkeeping + plot progress
            eps += 1
            #pgn log
            self.saveToPGN(gameResult * -1,eps)

            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}| Win: {one}:{two}'.format(eps=eps, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td , one=oneWon ,two = twoWon)
            bar.next()
            
        bar.finish()
        print('elo: player1: ',self.R1,'player2',self.R2)

        file_name = self.player1_name + '_vs_' + self.player2_name + '.pgn'
        self.saveToScript(file_name)

        return oneWon, twoWon, draws
Ejemplo n.º 32
0
                batch_size=args.batch_size,
            )

        # measure accuracy and record loss
        acc_batch = model_pipeline_pytorch.compute_accuracy(
            outputs=softmax_outputs.data,
            targets=targets.data,
        )
        acc.update(acc_batch, args.batch_size)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        batch_idx += 1

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s'\
        '| Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc:.3f}'\
            .format(
                batch=batch_idx,
                size=args.test_batches_per_epoch,
                data=data_time.avg,
                bt=batch_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td,
                loss=losses.avg,
                acc=acc.avg,
            )
        bar.next()
    bar.finish()
Ejemplo n.º 33
0
    def train(self, examples):
        """
        form (board, pi, v)
        """
        optimizer = optim.Adam(self.nnet.parameters())

        for epoch in range(self.args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net',
                      max=int(len(examples) / self.args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples) / self.args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=self.args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
                target_pis = torch.FloatTensor(np.array(pis))
                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                # predict
                if self.args.cuda:
                    boards, target_pis, target_vs = boards.contiguous().cuda(
                    ), target_pis.contiguous().cuda(), target_vs.contiguous(
                    ).cuda()
                boards, target_pis, target_vs = Variable(boards), Variable(
                    target_pis), Variable(target_vs)

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output
                out_pi, out_v = self.nnet(boards)
                l_pi = self.loss_pi(target_pis, out_pi)
                l_v = self.loss_v(target_vs, out_v)
                total_loss = l_pi + l_v

                # record loss
                pi_losses.update(l_pi.data[0], boards.size(0))
                v_losses.update(l_v.data[0], boards.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / self.args.batch_size),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
Ejemplo n.º 34
0
def main():
    # global args
    args = parser.parse_args()

    # <editor-fold desc="Initialization">
    if args.comment == "NONE":
        args.comment = args.method

    validate = args.validate == "true"

    if args.method == "coupled_vae_gan":
        trainer = coupled_vae_gan_trainer.coupled_vae_gan_trainer
    elif args.method == "coupled_vae":
        trainer = coupled_vae_trainer.coupled_vae_trainer
    elif args.method == "wgan":
        trainer = wgan_trainer.wgan_trainer
    elif args.method == "seq_wgan":
        trainer = seq_wgan_trainer.wgan_trainer
    elif args.method == "skip_thoughts":
        trainer = skipthoughts_vae_gan_trainer.coupled_vae_gan_trainer
    else:
        assert False, "Invalid method"

    # now = datetime.datetime.now()
    # current_date = now.strftime("%m-%d-%H-%M")

    assert args.text_criterion in ("MSE", "Cosine", "Hinge",
                                   "NLLLoss"), 'Invalid Loss Function'
    assert args.cm_criterion in ("MSE", "Cosine",
                                 "Hinge"), 'Invalid Loss Function'

    assert args.common_emb_ratio <= 1.0 and args.common_emb_ratio >= 0

    #</editor-fold>

    # <editor-fold desc="Image Preprocessing">

    # Image preprocessing //ATTENTION
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((.5, .5, .5), (.5, .5, .5))
        # transforms.Normalize((0.485, 0.456, 0.406),
        #                      (0.229, 0.224, 0.225))
    ])

    #</editor-fold>

    # <editor-fold desc="Creating Embeddings">
    if args.dataset != "coco":
        args.vocab_path = "./data/cub_vocab.pkl"

    # Load vocabulary wrapper.
    print("Loading Vocabulary...")
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Load Embeddings
    emb_size = args.word_embedding_size
    emb_path = args.embedding_path
    if args.embedding_path[-1] == '/':
        emb_path += 'glove.6B.' + str(emb_size) + 'd.txt'

    print("Loading Embeddings...")

    use_glove = args.use_glove == "true"
    if use_glove:
        emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size)
        word_emb = nn.Embedding(emb.size(0), emb.size(1))
        word_emb.weight = nn.Parameter(emb)
    else:
        word_emb = nn.Embedding(len(vocab), emb_size)

    # Freeze weighs
    if args.fixed_embeddings == "true":
        word_emb.weight.requires_grad = True

    # </editor-fold>

    # <editor-fold desc="Data-Loaders">

    # Build data loader
    print("Building Data Loader For Test Set...")
    if args.dataset == 'coco':
        data_loader = get_loader(args.image_dir,
                                 args.caption_path,
                                 vocab,
                                 transform,
                                 args.batch_size,
                                 shuffle=True,
                                 num_workers=args.num_workers)

        print("Building Data Loader For Validation Set...")
        val_loader = get_loader(args.valid_dir,
                                args.valid_caption_path,
                                vocab,
                                transform,
                                args.batch_size,
                                shuffle=True,
                                num_workers=args.num_workers)

    else:
        data_path = "data/cub.h5"
        dataset = Text2ImageDataset(data_path,
                                    split=0,
                                    vocab=vocab,
                                    transform=transform)
        data_loader = DataLoader(dataset,
                                 batch_size=args.batch_size,
                                 shuffle=True,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_fn)

        dataset_val = Text2ImageDataset(data_path,
                                        split=1,
                                        vocab=vocab,
                                        transform=transform)
        val_loader = DataLoader(dataset_val,
                                batch_size=args.batch_size,
                                shuffle=True,
                                num_workers=args.num_workers,
                                collate_fn=collate_fn)

    # </editor-fold>            txt_rc_loss = self.networks["coupled_vae"].text_reconstruction_loss(captions, txt2txt_out, lengths)

    # <editor-fold desc="Network Initialization">

    print("Setting up the trainer...")
    model_trainer = trainer(args, word_emb, vocab)

    #  <\editor-fold desc="Network Initialization">

    for epoch in range(args.num_epochs):

        # <editor-fold desc = "Epoch Initialization"?

        # TRAINING TIME
        print('EPOCH ::: TRAINING ::: ' + str(epoch + 1))
        batch_time = AverageMeter()
        end = time.time()

        bar = Bar(args.method if args.comment == "NONE" else args.method +
                  "/" + args.comment,
                  max=len(data_loader))

        model_trainer.set_train_models()
        model_trainer.create_losses_meter(model_trainer.losses)

        for i, (images, captions, lengths) in enumerate(data_loader):
            if model_trainer.load_models(epoch):
                break

            # if i == 1:
            if i == len(data_loader) - 1:
                break

            images = to_var(images)
            # captions = to_var(captions[:,1:])
            captions = to_var(captions)
            # lengths = to_var(torch.LongTensor(lengths) - 1)            # print(captions.size())
            lengths = to_var(
                torch.LongTensor(lengths))  # print(captions.size())

            model_trainer.forward(epoch, images, captions, lengths,
                                  not i % args.image_save_interval)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if not model_trainer.iteration % args.log_step:
                # plot progress
                bar.suffix = bcolors.HEADER
                # bar.suffix += '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}\n'.format(
                bar.suffix += '({batch}/{size}) Iter: {bt:} | Time: {total:}-{eta:}\n'.format(
                    batch=i,
                    size=len(data_loader),
                    # bt=batch_time.val,
                    bt=model_trainer.iteration,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                )
                bar.suffix += bcolors.ENDC

                cnt = 0
                for l_name, l_value in sorted(model_trainer.losses.items(),
                                              key=lambda x: x[0]):
                    cnt += 1
                    bar.suffix += ' | {name}: {val:.3f}'.format(
                        name=l_name,
                        val=l_value.avg,
                    )
                    if not cnt % 5:
                        bar.suffix += "\n"

                bar.next()

        # </editor-fold desc = "Logging">

        bar.finish()

        if validate:
            print('EPOCH ::: VALIDATION ::: ' + str(epoch + 1))
            batch_time = AverageMeter()
            end = time.time()
            barName = args.method if args.comment == "NONE" else args.method + "/" + args.comment
            barName = "VAL:" + barName
            bar = Bar(barName, max=len(val_loader))

            model_trainer.set_eval_models()
            model_trainer.create_metrics_meter(model_trainer.metrics)

            for i, (images, captions, lengths) in enumerate(val_loader):
                # if not model_trainer.keep_loading and not model_trainer.iteration % args.model:
                #     model_trainer.save_models(epoch)

                if i == len(val_loader) - 1:
                    break

                images = to_var(images)
                captions = to_var(captions[:, 1:])
                # lengths = to_var(torch.LongTensor(lengths - 1))            # print(captions.size())

                model_trainer.evaluate(epoch, images, captions, lengths,
                                       i == 0)

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

                # plot progress
                bar.suffix = bcolors.HEADER
                # bar.suffix += '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}\n'.format(
                bar.suffix += '({batch}/{size}) Iter: {bt:} | Time: {total:}-{eta:}\n'.format(
                    batch=i,
                    size=len(val_loader),
                    # bt=batch_time.val,
                    bt=model_trainer.iteration,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                )
                bar.suffix += bcolors.ENDC

                cnt = 0
                for l_name, l_value in sorted(model_trainer.metrics.items(),
                                              key=lambda x: x[0]):
                    cnt += 1
                    bar.suffix += ' | {name}: {val:.3f}'.format(
                        name=l_name,
                        val=l_value.avg,
                    )
                    if not cnt % 5:
                        bar.suffix += "\n"

                bar.next()

            bar.finish()

        # model_trainer.validate(val_loader)
    model_trainer.save_models(-1)