Ejemplo n.º 1
0
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num/2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==1:
                oneWon+=1
            elif gameResult==-1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()

        self.player1, self.player2 = self.player2, self.player1
        
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==-1:
                oneWon+=1                
            elif gameResult==1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=num, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
            
        bar.finish()

        return oneWon, twoWon, draws
Ejemplo n.º 2
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):  #numIters = 1
            # bookkeeping
            # print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                # print ('Coach.py==>learn ', 'self.skipFirstSelfPlay: ', self.skipFirstSelfPlay)
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):  #number of epiodes=2
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    self.gamecount += 1
                    iterationTrainExamples += self.executeEpisode()
                    # print ('Coach.py==>learn ', 'added to iterationTrainExamples deque self.executeEpisode(): ', self.executeEpisode())

                    # bookkeeping + plot progress :surag
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(
                    self.trainExamplesHistory
            ) > self.args.numItersForTrainExamplesHistory:  #numItersForTrainExamplesHistory:
                # print('Coach.py==>learn ',' BEFORE REMOVING self.trainExamplesHistory: ', self.trainExamplesHistory)
                # print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
                # print('Coach.py==>learn ',' AFTER REMOVING self.trainExamplesHistory: ', self.trainExamplesHistory)

            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            print("TOTAL GAMES PLAYED: ", self.gamecount)
            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins > 0 and float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximum length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        vlosss_hist = []
        ploss_hist = []
        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.env, self.nnet, self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg,
                        total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            pmcts = MCTS(self.env, self.pnet, self.args)

            ploss, vloss = self.nnet.train(trainExamples)
            ploss_hist += ploss
            vlosss_hist += vloss
            nmcts = MCTS(self.env, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            example_pmcts = self.executeEpisode(mcts2=pmcts)
            example_nmcts = self.executeEpisode(mcts2=nmcts)
            pwins = 0
            nwins = 0
            for x in example_pmcts:
                if x[0] in range(self.args.left_agent):
                    if x[3] == 1:
                        pwins += 1

            for x in example_nmcts:
                if x[0] in range(self.args.left_agent):
                    if x[3] == 1:
                        nwins += 1

            print('NEW/PREV WINS : %d / %d' % (nwins, pwins))
            if pwins + nwins == 0 or float(nwins) / (pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename='best.pth.tar')
        return vlosss_hist, ploss_hist
Ejemplo n.º 4
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v) || past data
        """

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs = list(
                    zip(*[examples[i] for i in sample_ids])
                )  #boards,possible winning on each position, winning result

                # predict and compute gradient and do SGD step
                input_dict = {
                    self.nnet.input_boards: boards,
                    self.nnet.target_pis: pis,
                    self.nnet.target_vs: vs,
                    self.nnet.dropout: args.dropout,
                    self.nnet.isTraining: True
                }

                # measure data loading time
                data_time.update(time.time() - end)

                # record loss
                self.sess.run(self.nnet.train_step, feed_dict=input_dict)
                pi_loss, v_loss = self.sess.run(
                    [self.nnet.loss_pi, self.nnet.loss_v],
                    feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / args.batch_size),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
Ejemplo n.º 5
0
def main():
    print("Initializing...")
    # global args
    args = parser.parse_args()

    now = datetime.datetime.now()
    current_date = now.strftime("%m-%d-%H-%M")

    assert args.text_criterion in ("MSE", "Cosine",
                                   "Hinge"), 'Invalid Loss Function'
    assert args.cm_criterion in ("MSE", "Cosine",
                                 "Hinge"), 'Invalid Loss Function'

    mask = args.common_emb_size
    assert mask <= args.hidden_size

    cuda = args.cuda
    if cuda == 'true':
        cuda = True
    else:
        cuda = False

    # Image preprocessing //ATTENTION
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    result_path = args.result_path
    model_path = args.model_path + current_date + "/"

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(model_path):
        print("Creating model path on", model_path)
        os.makedirs(model_path)

    # Load vocabulary wrapper.
    print("Loading Vocabulary...")
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Load Embeddings
    emb_size = args.embedding_size
    emb_path = args.embedding_path
    if args.embedding_path[-1] == '/':
        emb_path += 'glove.6B.' + str(emb_size) + 'd.txt'

    print("Loading Embeddings...")
    emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size)

    glove_emb = Embeddings(emb_size, len(vocab.word2idx),
                           vocab.word2idx["<pad>"])
    glove_emb.word_lut.weight.data.copy_(emb)
    glove_emb.word_lut.weight.requires_grad = False

    # glove_emb = nn.Embedding(emb.size(0), emb.size(1))
    # glove_emb = embedding(emb.size(0), emb.size(1))
    # glove_emb.weight = nn.Parameter(emb)

    # Freeze weighs
    # if args.fixed_embeddings == "true":
    # glove_emb.weight.requires_grad = False

    # Build data loader
    print("Building Data Loader For Test Set...")
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    print("Building Data Loader For Validation Set...")
    val_loader = get_loader(args.valid_dir,
                            args.valid_caption_path,
                            vocab,
                            transform,
                            args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    print("Setting up the Networks...")

    encoder_Img = ImageEncoder(img_dimension=args.crop_size,
                               feature_dimension=args.hidden_size)
    decoder_Img = ImageDecoder(img_dimension=args.crop_size,
                               feature_dimension=args.hidden_size)

    if cuda:
        encoder_Img = encoder_Img.cuda()
        decoder_Img = decoder_Img.cuda()

    # Losses and Optimizers
    print("Setting up the Objective Functions...")
    img_criterion = nn.MSELoss()
    # txt_criterion = nn.MSELoss(size_average=True)

    if cuda:
        img_criterion = img_criterion.cuda()
    # txt_criterion = nn.CrossEntropyLoss()

    #     gen_params = chain(generator_A.parameters(), generator_B.parameters())
    print("Setting up the Optimizers...")
    # img_params = chain(decoder_Img.parameters(), encoder_Img.parameters())
    img_params = list(decoder_Img.parameters()) + list(
        encoder_Img.parameters())

    # ATTENTION: Check betas and weight decay
    # ATTENTION: Check why valid_params fails on image networks with out of memory error

    img_optim = optim.Adam(
        img_params, lr=0.001)  #,betas=(0.5, 0.999), weight_decay=0.00001)
    # img_enc_optim = optim.Adam(encoder_Img.parameters(), lr=args.learning_rate)#betas=(0.5, 0.999), weight_decay=0.00001)
    # img_dec_optim = optim.Adam(decoder_Img.parameters(), lr=args.learning_rate)#betas=(0.5,0.999), weight_decay=0.00001)

    train_images = False  # Reverse 2
    for epoch in range(args.num_epochs):

        # TRAINING TIME
        print('EPOCH ::: TRAINING ::: ' + str(epoch + 1))
        batch_time = AverageMeter()
        img_losses = AverageMeter()
        txt_losses = AverageMeter()
        cm_losses = AverageMeter()
        end = time.time()

        bar = Bar('Training Net', max=len(data_loader))

        # Set training mode
        encoder_Img.train()
        decoder_Img.train()

        train_images = True
        for i, (images, captions, lengths) in enumerate(data_loader):
            # ATTENTION REMOVE
            if i == 6450:
                break

            # Set mini-batch dataset
            images = to_var(images)
            captions = to_var(captions)

            # target = pack_padded_sequence(captions, lengths, batch_first=True)[0]
            # captions, lengths = pad_sequences(captions, lengths)
            # images = torch.FloatTensor(images)

            captions = captions.transpose(0, 1).unsqueeze(2)
            lengths = torch.LongTensor(lengths)  # print(captions.size())

            # Forward, Backward and Optimize
            # img_optim.zero_grad()
            # img_dec_optim.zero_grad()
            # img_enc_optim.zero_grad()
            encoder_Img.zero_grad()
            decoder_Img.zero_grad()

            # txt_params.zero_grad()
            # txt_dec_optim.zero_grad()
            # txt_enc_optim.zero_grad()

            # Image Auto_Encoder Forward

            img_encoder_outputs, Iz = encoder_Img(images)

            IzI = decoder_Img(img_encoder_outputs)

            img_rc_loss = img_criterion(IzI, images)

            # Text Auto Encoder Forward

            # target = target[:-1] # exclude last target from inputs

            img_loss = img_rc_loss

            img_losses.update(img_rc_loss.data[0], args.batch_size)
            txt_losses.update(0, args.batch_size)
            cm_losses.update(0, args.batch_size)

            # Image Network Training and Backpropagation

            img_loss.backward()
            img_optim.step()

            if i % args.image_save_interval == 0:
                subdir_path = os.path.join(result_path,
                                           str(i / args.image_save_interval))

                if os.path.exists(subdir_path):
                    pass
                else:
                    os.makedirs(subdir_path)

                for im_idx in range(3):
                    im_or = (images[im_idx].cpu().data.numpy().transpose(
                        1, 2, 0) / 2 + .5) * 255
                    im = (IzI[im_idx].cpu().data.numpy().transpose(1, 2, 0) / 2
                          + .5) * 255

                    filename_prefix = os.path.join(subdir_path, str(im_idx))
                    scipy.misc.imsave(filename_prefix + '_original.A.jpg',
                                      im_or)
                    scipy.misc.imsave(filename_prefix + '.A.jpg', im)

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            # plot progress
            bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_Img: {img_l:.3f}| Loss_Txt: {txt_l:.3f} | Loss_CM: {cm_l:.4f}'.format(
                batch=i,
                size=len(data_loader),
                bt=batch_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td,
                img_l=img_losses.avg,
                txt_l=txt_losses.avg,
                cm_l=cm_losses.avg,
            )
            bar.next()
        bar.finish()

        # Save the models
        print('\n')
        print('Saving the models in {}...'.format(model_path))
        torch.save(
            decoder_Img.state_dict(),
            os.path.join(model_path, 'decoder-img-%d-' % (epoch + 1)) +
            current_date + ".pkl")
        torch.save(
            encoder_Img.state_dict(),
            os.path.join(model_path, 'encoder-img-%d-' % (epoch + 1)) +
            current_date + ".pkl")
Ejemplo n.º 6
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        self.game.prune_prob = self.args.prune_starting_prob
        train_black = self.args.train_black_first

        for i in range(1, self.args.numIters+1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.args.skip_first_self_play or i>1:
                iterationTrainExamples_white = deque([], maxlen=self.args.maxlenOfQueue)
                iterationTrainExamples_black = deque([], maxlen=self.args.maxlenOfQueue)
    
                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                if self.args.profile_coach:
                    prof = cProfile.Profile()
                    prof.enable()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.white_nnet, self.black_nnet, self.args)   # reset search tree

                    white_examples, black_examples = self.executeEpisode()

                    iterationTrainExamples_white += white_examples
                    iterationTrainExamples_black += black_examples

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps + 1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                               total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()
                if self.args.profile_coach:
                    prof.disable()
                    prof.print_stats(sort=2)

                # save the iteration examples to the history 
                self.trainExamplesHistory_white.append(iterationTrainExamples_white)
                self.trainExamplesHistory_black.append(iterationTrainExamples_black)
                
            while len(self.trainExamplesHistory_white) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory_white), " => remove the oldest trainExamples")
                self.trainExamplesHistory_white.pop(0)
                self.trainExamplesHistory_black.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
            self.saveTrainExamples(i-1)

            # training new network, keeping a copy of the old one
            self.white_nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp_white.pth.tar')
            self.black_nnet.save_checkpoint(folder=self.args.checkpoint, filename='temp_black.pth.tar')
            self.white_pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_white.pth.tar')
            self.black_pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_black.pth.tar')

            pmcts = MCTS(self.game, self.white_pnet, self.black_pnet, self.args)

            if not self.args.train_both:
                if train_black:
                    # shuffle examples before training
                    trainExamples = []
                    for e in self.trainExamplesHistory_black:
                        trainExamples.extend(e)
                    shuffle(trainExamples)
                    self.black_nnet.train(trainExamples)
                else:
                    # shuffle examples before training
                    trainExamples = []
                    for e in self.trainExamplesHistory_white:
                        trainExamples.extend(e)
                    shuffle(trainExamples)
                    self.white_nnet.train(trainExamples)
            else:
                # shuffle examples before training
                trainExamples = []
                for e in self.trainExamplesHistory_black:
                    trainExamples.extend(e)
                shuffle(trainExamples)
                self.black_nnet.train(trainExamples)

                # shuffle examples before training
                trainExamples = []
                for e in self.trainExamplesHistory_white:
                    trainExamples.extend(e)
                shuffle(trainExamples)
                self.white_nnet.train(trainExamples)

            nmcts = MCTS(self.game, self.white_nnet, self.black_nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda board, turn_player: np.argmax(pmcts.getActionProb(board, turn_player, temp=0)),
                          lambda board, turn_player: np.argmax(nmcts.getActionProb(board, turn_player, temp=0)),
                          self.game)
            pwins, nwins, draws, pwins_white, pwins_black, nwins_white, nwins_black \
                = arena.playGames(self.args.arenaCompare, self.args.profile_arena)

            print('NEW/PREV WINS (white, black) : (%d,%d) / (%d,%d) ; DRAWS : %d' % (nwins_white, nwins_black, pwins_white, pwins_black, draws))

            if pwins+nwins == 0 or float(nwins)/(pwins+nwins) < self.args.updateThreshold \
                    or nwins_black < pwins_black or nwins_white < pwins_white:
                print('REJECTING NEW MODEL')
                if not self.args.train_both:
                    if train_black:
                        self.black_nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_black.pth.tar')
                    else:
                        self.white_nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_white.pth.tar')
                else:
                    self.black_nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_black.pth.tar')
                    self.white_nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp_white.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                if not self.args.train_both:
                    if train_black:
                        # self.black_nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i, Player.black))
                        self.black_nnet.save_checkpoint(folder=self.args.checkpoint, filename='best_black.pth.tar')
                        # if nwins_white == 0 or nwins_black / nwins_white >= self.args.train_other_network_threshold:
                        #     train_black = False
                        print("training white neural net next")
                        train_black = False
                    else:
                        # self.white_nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i, Player.white))
                        self.white_nnet.save_checkpoint(folder=self.args.checkpoint, filename='best_white.pth.tar')
                        # if nwins_black == 0 or nwins_white / nwins_black > self.args.train_other_network_threshold:
                        #     train_black = True
                        print("training black neural net next")
                        train_black = True
                else:
                    self.black_nnet.save_checkpoint(folder=self.args.checkpoint, filename='best_black.pth.tar')
                    self.white_nnet.save_checkpoint(folder=self.args.checkpoint, filename='best_white.pth.tar')
                self.game.prune_prob += self.args.prune_prob_gain_per_iteration
                self.args.arenaCompare = math.floor(self.args.arenaCompare * 1.05)
            # self.args.numEps = math.floor(self.args.numEps * 1.1)
            self.args.numMCTSSims = math.floor(self.args.numMCTSSims * 1.1)
            print("prune probability: " + str(self.game.prune_prob) + ", episodes: " + str(self.args.numEps) +
                  ", sims: " + str(self.args.numMCTSSims) + ", arena compare: " + str(self.args.arenaCompare))
Ejemplo n.º 7
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):  #for number of rounds
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque(
                    [], maxlen=self.args.maxlenOfQueue
                )  #remove the previous training example

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(
                        self.args.numEps):  #for each self-play of this rounds
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree

                    #reutrn [(canonicalBoard,pi,v), (canonicalBoard,pi,v)]
                    # v is the result
                    selfPlayResult = self.executeEpisode()
                    #play one game, adding the gaming history
                    iterationTrainExamples += selfPlayResult

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            #self-play finished, updating the move history
            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(
                    0)  #remove the oldest gaming history
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)  #adding new move record
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(
                folder=self.args.checkpoint,
                filename='temp.pth.tar')  #save the previous net
            self.pnet.load_checkpoint(
                folder=self.args.checkpoint,
                filename='temp.pth.tar')  #read the previous net
            pmcts = MCTS(self.game, self.pnet,
                         self.args)  #reset previous models' mcts

            #using new data to train the new model
            self.nnet.train(
                trainExamples)  #trin the network with new move record
            nmcts = MCTS(self.game, self.nnet,
                         self.args)  #rest new models' mcts

            #OLD VS NEW
            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(
                lambda board, turn: np.argmax(
                    pmcts.getActionProb(board, turn, temp=0)),
                lambda board, turn: np.argmax(
                    nmcts.getActionProb(board, turn, temp=0)), self.game)
            pwins, nwins, draws = arena.playGames(
                self.args.arenaCompare)  #playing new mode against old models

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins > 0 and float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                #OLD WIN!
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(
                    folder=self.args.checkpoint, filename='temp.pth.tar'
                )  #using previous mode, as it beat new model
            else:
                #NEW WIN!
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(
                    folder=self.args.checkpoint, filename='best.pth.tar'
                )  #save the new model, as this is the best
Ejemplo n.º 8
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """

        for epoch in range(args.epochs):
            # print('EPOCH ::: ' + str(epoch+1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            # bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))

                # predict and compute gradient and do SGD step
                input_dict = {
                    self.nnet.input_boards: boards,
                    self.nnet.target_pis: pis,
                    self.nnet.target_vs: vs,
                    self.nnet.dropout: args.dropout,
                    self.nnet.isTraining: True
                }

                # measure data loading time
                data_time.update(time.time() - end)

                # record loss
                self.sess.run(self.nnet.train_step, feed_dict=input_dict)
                pi_loss, v_loss = self.sess.run(
                    [self.nnet.loss_pi, self.nnet.loss_v],
                    feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1
    def train(self, batches, train_steps):
        self.nnet.train()

        data_time = AverageMeter()
        batch_time = AverageMeter()
        pi_losses = AverageMeter()
        v_losses = AverageMeter()
        end = time()

        #print(f'Current LR: {self.scheduler.get_lr()[0]}')
        bar = Bar(f'Training Net', max=train_steps)
        current_step = 0
        while current_step < train_steps:
            for batch_idx, batch in enumerate(batches):
                if current_step == train_steps:
                    break
                current_step += 1
                boards, target_pis, target_vs = batch

                # predict
                if args.cuda:
                    boards, target_pis, target_vs = boards.contiguous().cuda(
                    ), target_pis.contiguous().cuda(), target_vs.contiguous().cuda()

                # measure data loading time
                data_time.update(time() - end)

                # compute output
                out_pi, out_v = self.nnet(boards)
                l_pi = self.loss_pi(target_pis, out_pi)
                l_v = self.loss_v(target_vs, out_v)
                total_loss = l_pi + l_v
                # record loss
                pi_losses.update(l_pi.item(), boards.size(0))
                v_losses.update(l_v.item(), boards.size(0))

                # compute gradient and do SGD step
                self.optimizer.zero_grad()
                total_loss.backward()
                self.optimizer.step()

                # measure elapsed time
                batch_time.update(time() - end)
                end = time()

                # plot progress
                bar.suffix = '({step}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    step=current_step,
                    size=train_steps,
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
        self.scheduler.step(pi_losses.avg+v_losses.avg)
        bar.finish()
        print()

        return pi_losses.avg, v_losses.avg
Ejemplo n.º 10
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v, turn) || past data
        """

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs, turns = list(
                    zip(*[examples[i] for i in sample_ids])
                )  #boards,possible winning on each position, winning result
                turns = [[turn] for turn in turns]

                # for i in range(len(boards)):
                #     actual_turn = turns[i][0]
                #     player = 1 if actual_turn %2 ==0 else -1
                #     board = boards[i]
                #     if player == 1:
                #         board[0:2, :][board[0:2, :]==0] = 3
                #     else:
                #         board[6:8, :][board[0:2, :]==0] = 3
                # #     # print(actual_turn)
                # #     # print(np.array(boards[i]).reshape(8,8))
                # #     # a = input()

                # predict and compute gradient and do SGD step
                train_input_dict = {
                    self.nnet.input_boards: boards,  #input X
                    self.nnet.turn: turns,
                    self.nnet.target_pis: pis,  #for calculating loss
                    self.nnet.target_vs: vs,  #for calculating loss
                    self.nnet.dropout: args.dropout,
                    self.nnet.isTraining: True
                }

                # measure data loading time
                data_time.update(time.time() - end)

                # record loss and do the training
                #training
                self.sess.run(self.nnet.train_step, feed_dict=train_input_dict)
                #record loss value
                pi_loss, v_loss = self.sess.run(
                    [self.nnet.loss_pi, self.nnet.loss_v],
                    feed_dict=train_input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / args.batch_size),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
Ejemplo n.º 11
0
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num / 2)
        oneWon = 0
        twoWon = 0
        draws = 0
        gameResults = []
        self.player1, self.player2 = self.player1, self.player1
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            #if gameResult==1:
            #    oneWon+=1
            #elif gameResult==-1:
            #    twoWon+=1
            #else:
            #    draws+=1
            # bookkeeping + plot progress
            gameResults.append(gameResult)
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps + 1,
                maxeps=maxeps,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        self.player1, self.player2 = self.player2, self.player2
        gameResults2 = []
        for _ in range(num):
            gameResult2 = self.playGame(verbose=verbose)
            #if gameResult==-1:
            #    oneWon+=1
            #elif gameResult==1:
            #    twoWon+=1
            #else:
            #    draws+=1
            # bookkeeping + plot progress

            gameResults2.append(gameResult2)
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps + 1,
                maxeps=num,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        bar.finish()
        finalScore1 = np.sum(gameResults) / float(len(gameResults))
        finalScore2 = np.sum(gameResults2) / float(len(gameResults2))
        return finalScore1, finalScore2  #oneWon, twoWon, draws
Ejemplo n.º 12
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch+1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples)/args.batch_size):
                sample_ids = np.random.randint(len(examples), size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))

                # predict and compute gradient and do SGD step
                input_dict = {self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True}

                # measure data loading time
                data_time.update(time.time() - end)

                # record loss
                self.sess.run(self.nnet.train_step, feed_dict=input_dict)
                pi_loss, v_loss = self.sess.run([self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                            batch=batch_idx,
                            size=int(len(examples)/args.batch_size),
                            data=data_time.avg,
                            bt=batch_time.avg,
                            total=bar.elapsed_td,
                            eta=bar.eta_td,
                            lpi=pi_losses.avg,
                            lv=v_losses.avg,
                            )
                bar.next()
            bar.finish()
Ejemplo n.º 13
0
    def train(self, examples):
        """
        This function trains the neural network with examples obtained from
        self-play.

        Input:
            examples: a list of training examples, where each example is of form
                      (board, pi, v). pi is the MCTS informed policy vector for
                      the given board, and v is its value. The examples has
                      board in its canonical form.
        """
        optimizer = optim.Adam(self.nnet.parameters())
        for epoch in range(args.epochs):
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples)/args.batch_size):
                sample_ids = np.random.randint(len(examples), size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
                target_pis = torch.FloatTensor(np.array(pis))
                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                boards, target_pis, target_vs = Variable(boards), Variable(target_pis), Variable(target_vs)
                data_time.update(time.time() - end)

                out_pi, out_v = self.nnet(boards)
                l_pi = self.loss_pi(target_pis, out_pi) #loss function schrijven
                l_v = self.loss_v(target_vs, out_v)     #loss function schrijven
                total_loss = l_pi + l_v

                pi_losses.update(l_pi.data[0], boards.size(0))
                v_losses.update(l_v.data[0], boards.size(0))

                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                            batch=batch_idx,
                            size=int(len(examples)/args.batch_size),
                            data=data_time.avg,
                            bt=batch_time.avg,
                            total=bar.elapsed_td,
                            eta=bar.eta_td,
                            lpi=pi_losses.avg,
                            lv=v_losses.avg,
                            )
                bar.next()
            bar.finish()
        pass
Ejemplo n.º 14
0
def train(model, dm, loss_criterion, optimizer, args):
    model.train()  # switch to train mode

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    p_micro = AverageMeter()
    r_micro = AverageMeter()
    f_micro = AverageMeter()
    p_macro = AverageMeter()
    r_macro = AverageMeter()
    f_macro = AverageMeter()
    s_macro = AverageMeter()
    mAP_micro = AverageMeter()
    mAP_macro = AverageMeter()
    acc = AverageMeter()

    end = time.time()

    bar = Bar('Processing', max=args.batches_per_epoch)
    batch_idx = 0

    while batch_idx < args.batches_per_epoch:
        # sample batch
        (des, des_unsort, ind, ind_unsort, act, act_unsort,
         targets) = dm.sample_train_batch(batch_size=args.batch_size,
                                          embed1=model.glove_embed,
                                          embed2=model.other_embed,
                                          use_cuda=args.cuda)
        encoder_init_hidden = model.encoder.initHidden(
            batch_size=args.batch_size)

        if args.cuda:
            model = model.cuda()
            targets = targets.cuda()
            if len(encoder_init_hidden):
                encoder_init_hidden = [x.cuda() for x in encoder_init_hidden]
            else:
                encoder_init_hidden = encoder_init_hidden.cuda()
            loss_criterion = loss_criterion.cuda()

        # measure data loading timeult
        data_time.update(time.time() - end)

        # compute output
        logit_output = model(des_embed=des,
                             des_unsort=des_unsort,
                             ind_embed=ind,
                             ind_unsort=ind_unsort,
                             act_embed=act,
                             act_unsort=act_unsort,
                             encoder_init_hidden=encoder_init_hidden,
                             batch_size=args.batch_size)
        loss = loss_criterion(logit_output, targets)

        # measure precision, recall, fscore, support and record loss
        batch_p_micro, batch_r_micro, batch_f_micro, batch_s_micro, batch_p_macro, batch_r_macro, batch_f_macro\
            , batch_s_macro, batch_mAP_micro, batch_mAP_macro, batch_acc = compute_metrics(logit=logit_output, target=targets)

        p_macro.update(batch_p_macro, args.batch_size)
        p_micro.update(batch_p_micro, args.batch_size)
        r_macro.update(batch_r_macro, args.batch_size)
        r_micro.update(batch_r_micro, args.batch_size)
        f_macro.update(batch_f_macro, args.batch_size)
        f_micro.update(batch_f_micro, args.batch_size)
        s_macro.update(batch_s_macro, args.batch_size)
        mAP_micro.update(batch_mAP_micro, args.batch_size)
        mAP_macro.update(batch_mAP_macro, args.batch_size)
        acc.update(batch_acc, args.batch_size)

        losses.update(loss.item(), args.batch_size)

        # compute gradient
        optimizer.zero_grad()
        loss.backward()

        # optimizer step
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        batch_idx += 1

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s' \
                     '| Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc:.3f} ' \
                     '| P: {p:.3f}| R: {r:.3f}| F: {f:.3f}| mAP mic: {mAP:.3f}|' \
            .format(
            batch=batch_idx,
            size=args.batches_per_epoch,
            data=data_time.avg,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            acc=acc.avg,
            p=p_micro.avg,
            r=r_micro.avg,
            f=f_micro.avg,
            mAP=mAP_micro.avg,
        )
        bar.next()
    bar.finish()

    return losses.avg, acc.avg
Ejemplo n.º 15
0
def main():
    # global args
    args = parser.parse_args()

    # <editor-fold desc="Initialization">
    if args.comment == "test":
        print("WARNING: name is test!!!\n\n")

    # now = datetime.datetime.now()
    # current_date = now.strftime("%m-%d-%H-%M")

    assert args.text_criterion in ("MSE", "Cosine", "Hinge",
                                   "NLLLoss"), 'Invalid Loss Function'
    assert args.cm_criterion in ("MSE", "Cosine",
                                 "Hinge"), 'Invalid Loss Function'

    assert args.common_emb_ratio <= 1.0 and args.common_emb_ratio >= 0

    #</editor-fold>

    # <editor-fold desc="Image Preprocessing">

    # Image preprocessing //ATTENTION
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        # transforms.Normalize((.5,.5,.5),
        #                      (.5, .5, .5))
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    #</editor-fold>

    # <editor-fold desc="Creating Embeddings">

    # Load vocabulary wrapper.
    print("Loading Vocabulary...")
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Load Embeddings
    emb_size = args.word_embedding_size
    emb_path = args.embedding_path
    if args.embedding_path[-1] == '/':
        emb_path += 'glove.6B.' + str(emb_size) + 'd.txt'

    print("Loading Embeddings...")
    emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size)

    glove_emb = nn.Embedding(emb.size(0), emb.size(1))

    # Freeze weighs
    if args.fixed_embeddings == "true":
        glove_emb.weight.requires_grad = False

    # </editor-fold>

    # <editor-fold desc="Data-Loaders">

    # Build data loader
    print("Building Data Loader For Test Set...")
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    print("Building Data Loader For Validation Set...")
    val_loader = get_loader(args.valid_dir,
                            args.valid_caption_path,
                            vocab,
                            transform,
                            args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    # </editor-fold>

    # <editor-fold desc="Network Initialization">

    print("Setting up the trainer...")
    model_trainer = trainer(args, glove_emb, vocab)

    #  <\editor-fold desc="Network Initialization">

    for epoch in range(args.num_epochs):

        # <editor-fold desc = "Epoch Initialization"?

        # TRAINING TIME
        print('EPOCH ::: TRAINING ::: ' + str(epoch + 1))
        batch_time = AverageMeter()
        cm_losses = AverageMeter()
        end = time.time()

        bar = Bar('Training Net', max=len(data_loader))

        for i, (images, captions, lengths) in enumerate(data_loader):

            if i == len(data_loader) - 1:
                break

            images = to_var(images)
            captions = to_var(captions)
            lengths = to_var(
                torch.LongTensor(lengths))  # print(captions.size())

            img_rc_loss, txt_rc_loss = model_trainer.train(
                images, captions, lengths, not i % args.image_save_interval)

            txt_losses.update(txt_rc_loss.data[0], args.batch_size)
            img_losses.update(img_rc_loss.data[0], args.batch_size)
            # cm_losses.update(cm_loss.data[0], args.batch_size)

            batch_time.update(time.time() - end)
            end = time.time()

            # plot progress
            bar_suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                batch=i,
                size=len(data_loader),
                bt=batch_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td,
            )

            bar.next()

        # </editor-fold desc = "Logging">

        bar.finish()
        model_trainer.save_losses(epoch, img_losses.avg, txt_losses.avg)
        model_trainer.save_models(epoch)
Ejemplo n.º 16
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        optimizer = optim.Adam(self.nnet.parameters())

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch+1))
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples)/args.batch_size):
                sample_ids = np.random.randint(len(examples), size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
                target_pis = torch.FloatTensor(np.array(pis))
                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                # predict
                if args.cuda:
                    boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda()
                boards, target_pis, target_vs = Variable(boards), Variable(target_pis), Variable(target_vs)

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output
                out_pi, out_v = self.nnet(boards)
                l_pi = self.loss_pi(target_pis, out_pi)

                l_v = self.loss_v(target_vs, out_v)
                total_loss = l_pi + l_v

                # record loss
                pi_losses.update(l_pi.data[0], boards.size(0))
                v_losses.update(l_v.data[0], boards.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                            batch=batch_idx,
                            size=int(len(examples)/args.batch_size),
                            data=data_time.avg,
                            bt=batch_time.avg,
                            total=bar.elapsed_td,
                            eta=bar.eta_td,
                            lpi=pi_losses.avg,
                            lv=v_losses.avg,
                            )
                bar.next()
            bar.finish()
Ejemplo n.º 17
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    #sample one sentences from the database (yelp)
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            print(np.shape(trainExamples))

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            #pwins, nwins, draws = arena.playGames(self.args.arenaCompare)
            finalScore1, finalScore2 = arena.playGames(self.args.arenaCompare)
            with open("output.txt", "a") as text_file:
                text_file.write('Score NN1 : %.2f ; Score NN2 : %.2f\n' %
                                (finalScore1, finalScore1))

            print('Score NN1 : %.2f ; Score NN2 : %.2f' %
                  (finalScore1, finalScore1))
            if finalScore1 > finalScore2:  #and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
Ejemplo n.º 18
0
    def train(self, iteration=None, board=None, numeps=None):

        # bookkeeping

        # examples of the iteration
        numeps = self.args.numEps

        if not self.skipFirstSelfPlay or iteration > 1:
            iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

            eps_time = AverageMeter()
            bar = Bar('Self Play', max=numeps)
            end = time.time()
            #for clif_state in self.board
            for eps in range(numeps):
                self.mcts = MCTS(self.game, self.nnet,
                                 self.args)  # reset search tree
                if board is None:
                    iterationTrainExamples += self.executeEpisode()
                else:
                    iterationTrainExamples += self.executeEpisode(board)
                #print iterationTrainExamples[0]

                # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                    eps=eps + 1,
                    maxeps=numeps,
                    et=eps_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td)
                bar.next()
            bar.finish()

            # save the iteration examples to the history
            self.trainExamplesHistory.append(iterationTrainExamples)

        if len(self.trainExamplesHistory
               ) > self.args.numItersForTrainExamplesHistory:
            print("len(trainExamplesHistory) =",
                  len(self.trainExamplesHistory),
                  " => remove the oldest trainExamples")
            self.trainExamplesHistory.pop(0)
        # backup history to a file
        # NB! the examples were collected using the model from the previous iteration, so (i-1)
        self.saveTrainExamples(iteration - 1)

        # shuffle examlpes before training
        trainExamples = []
        for e in self.trainExamplesHistory:
            trainExamples.extend(e)
        shuffle(trainExamples)

        # training new network, keeping a copy of the old one
        self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                  filename='temp.pth.tar')
        self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                  filename='temp.pth.tar')
        pmcts = MCTS(self.game, self.pnet, self.args)

        self.nnet.train(trainExamples)
        nmcts = MCTS(self.game, self.nnet, self.args)

        print('PITTING AGAINST PREVIOUS VERSION')
        arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                      lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                      self.game)
        pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

        print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
        if pwins + nwins > 0 and float(nwins) / (
                pwins + nwins) < self.args.updateThreshold:
            print('REJECTING NEW MODEL')
            self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
        else:
            print('ACCEPTING NEW MODEL')
            self.nnet.save_checkpoint(
                folder=self.args.checkpoint,
                filename=self.getCheckpointFile(iteration))
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='best.pth.tar')
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        iterHistory = {'ITER': [], 'ITER_DETAIL': [], 'PITT_RESTULT': []}

        for i in range(1, self.args.numIters + 1):
            iterHistory['ITER'].append(i)
            # bookkeeping
            print(
                '###########################ITER:{}###########################'
                .format(str(i)))
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)
                eps_time = AverageMeter()
                if self.display == 1:
                    bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    # print("{}th Episode:".format(eps+1))
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()

                    if self.display == 1:
                        bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                            eps=eps + 1,
                            maxeps=self.args.numEps,
                            et=eps_time.avg,
                            total=bar.elapsed_td,
                            eta=bar.eta_td)
                        bar.next()
                if self.display == 1:

                    bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                # print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            trainLog = self.nnet.train(trainExamples)
            if self.keepLog:
                trainLog.to_csv(self.logPath +
                                'ITER_{}_TRAIN_LOG.csv'.format(i))
            iterHistory['ITER_DETAIL'].append(
                self.logPath + 'ITER_{}_TRAIN_LOG.csv'.format(i))
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins > 0 and float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                iterHistory['PITT_RESTULT'].append('R')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                iterHistory['PITT_RESTULT'].append('A')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')

            pd.DataFrame(data=iterHistory).to_csv(self.logPath +
                                                  'ITER_LOG.csv')
Ejemplo n.º 20
0
Archivo: run.py Proyecto: tund/HydraNet
def train(trainloader, model, criterion, optimizer, epoch, sample_wts,
          use_cuda):
    # switch to train mode
    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = [AverageMeter() for i in range(args.n_heads)]
    losses_avg = AverageMeter()
    top1 = [AverageMeter() for i in range(args.n_heads)]
    top5 = [AverageMeter() for i in range(args.n_heads)]
    top1_avg = AverageMeter()
    top5_avg = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=len(trainloader))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # measure data loading time
        #         print('.', end='')
        data_time.update(time.time() - end)

        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda(async=True)
        inputs, targets = torch.autograd.Variable(
            inputs), torch.autograd.Variable(targets)

        # compute output
        outputs = model(inputs)
        optimizer.zero_grad()
        for head_idx in range(args.n_heads):
            loss = criterion(outputs[head_idx], targets)
            loss = (loss * sample_wts[head_idx][:loss.shape[0]] /
                    sample_wts[head_idx][:loss.shape[0]].sum()).sum()

            # measure accuracy and record loss
            prec1, prec5 = accuracy(outputs[head_idx].data,
                                    targets.data,
                                    topk=(1, 5))
            if float(torch.__version__[:3]) < 0.5:
                losses[head_idx].update(loss.data[0], inputs.size(0))
                top1[head_idx].update(prec1[0], inputs.size(0))
                top5[head_idx].update(prec5[0], inputs.size(0))
            else:
                losses[head_idx].update(loss.data, inputs.size(0))
                top1[head_idx].update(prec1, inputs.size(0))
                top5[head_idx].update(prec5, inputs.size(0))

            # compute gradient and do SGD step
            loss.backward(retain_graph=True)
        losses_avg.update(
            sum([h.avg for h in losses]) / len(losses), inputs.size(0))
        top1_avg.update(sum([h.avg for h in top1]) / len(top1), inputs.size(0))
        top5_avg.update(sum([h.avg for h in top5]) / len(top5), inputs.size(0))
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_avg: {loss:.4f} | top1_avg: {top1: .4f} | top5_avg: {top5: .4f}'.format(
            batch=batch_idx + 1,
            size=len(trainloader),
            data=data_time.avg,
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses_avg.avg,
            top1=top1_avg.avg,
            top5=top5_avg.avg,
        )
        bar.next()
    bar.finish()
    wandb.log(
        {
            "top1": [h.avg for h in top1],
            "top1_avg": top1_avg.avg,
            "top5": [h.avg for h in top5],
            "top5_avg": top5_avg.avg,
            "losses": [h.avg for h in losses],
            "losses_avg": losses_avg.avg
        },
        step=epoch)
    return (losses_avg.avg, top1_avg.avg)
Ejemplo n.º 21
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        trainExamples = deque([], maxlen=self.args.maxlenOfQueue)
        for i in range(self.args.numIters):
            # bookkeeping
            print('------ITER ' + str(i + 1) + '------')
            eps_time = AverageMeter()
            bar = Bar('Self Play', max=self.args.numEps)
            end = time.time()

            for eps in range(self.args.numEps):
                trainExamples += self.executeEpisode()

                # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                    eps=eps + 1,
                    maxeps=self.args.numEps,
                    et=eps_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td)
                bar.next()
            bar.finish()

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pnet = self.nnet.__class__(self.game)
            pnet.load_checkpoint(folder=self.args.checkpoint,
                                 filename='temp.pth.tar')
            pmcts = MCTS(self.game, pnet, self.args)
            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins))
            if float(nwins) / (pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet = pnet

            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='checkpoint_' + str(i) +
                                          '.pth.tar')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
                self.mcts = MCTS(self.game, self.nnet,
                                 self.args)  # reset search tree
Ejemplo n.º 22
0
Archivo: run.py Proyecto: tund/HydraNet
def test(testloader, model, criterion, epoch, use_cuda):
    global best_acc

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    bar = Bar('Processing', max=len(testloader))
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(testloader):
            # measure data loading time
            data_time.update(time.time() - end)

            if use_cuda:
                inputs, targets = inputs.cuda(), targets.cuda()

            # compute output
            outputs = model(inputs)
            outputs_sum = outputs[0].cuda()
            for i in range(1, args.n_heads):
                outputs_sum = torch.add(outputs_sum, outputs[i].cuda())
            outputs = outputs_sum / args.n_heads
            loss = criterion(outputs, targets)
            loss = torch.mean(loss)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(outputs.data, targets.data, topk=(1, 5))
            if float(torch.__version__[:3]) < 0.5:
                losses.update(loss.data[0], inputs.size(0))
                top1.update(prec1[0], inputs.size(0))
                top5.update(prec5[0], inputs.size(0))
            else:
                losses.update(loss.data, inputs.size(0))
                top1.update(prec1, inputs.size(0))
                top5.update(prec5, inputs.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            # plot progress
            bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | top1: {top1: .4f} | top5: {top5: .4f}'.format(
                batch=batch_idx + 1,
                size=len(testloader),
                data=data_time.avg,
                bt=batch_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td,
                loss=losses.avg,
                top1=top1.avg,
                top5=top5.avg,
            )
            bar.next()
    bar.finish()
    wandb.log(
        {
            "top1 test": top1.avg,
            "top5 test": top5.avg,
            "losses test": losses.avg
        },
        step=epoch)
    return (losses.avg, top1.avg)
Ejemplo n.º 23
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        #Generate a fixed sensing matrix if option is toggled to True.
        #1)A is fixed. Also set arena_game_args.sensing_matrix to be equal to that of coach.game_args so the arena uses the same sensing matrix.
        #2)the folder which saves the fixed sensing matrix is empty
        if self.args['fixed_matrix'] == True:
            if self.args['load_existing_matrix'] == True:
                self.game_args.sensing_matrix = np.load(
                    self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')
                self.arena_game_args.sensing_matrix = np.load(
                    self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')

                #FOR TESTING-------------------------------------------------------
                #print(self.game_args.sensing_matrix)
                #END TESTING-------------------------------------------------------

            else:  #if not loading an existing matrix in self.args['fixed_matrix_filepath'], then generate a new sensing matrix of given type self.args['matrix_type']
                self.game_args.generateSensingMatrix(self.args['m'],
                                                     self.args['n'],
                                                     self.args['matrix_type'])
                self.arena_game_args.sensing_matrix = self.game_args.sensing_matrix
                #Save the fixed matrix
                self.game_args.save_Matrix(self.args['fixed_matrix_filepath'])

                #FOR TESTING-------------------------------------------------------
                #print(self.game_args.sensing_matrix)
                #END TESTING-------------------------------------------------------

        for i in range(1, self.args['numIters'] + 1):
            print('------ITER ' + str(i) + '------')
            if not self.skipFirstSelfPlay or i > 1:  #default of self.skipFirstSelfPlay is False. If loading training from file then skipFirstSelfPlay is set to True. skipFirstSelfPlay allows us to load the latest nn_model with latest set of TrainingExamples
                iterationTrainExamples = deque(
                    [], maxlen=self.args['maxlenOfQueue'])
                #bookkeeping objects contained in pytorch_classification.utils
                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args['numEps'])
                end = time.time()
                #IMPORTANT PART OF THE CODE. GENERATE NEW A AND NEW y HERE. EACH SELF-PLAY GAME HAS DIFFERENT A AND y.
                #-----------------------------------------------------
                for eps in range(self.args['numEps']):
                    #Initialize a new game by setting A, x, y, and then execute a single game of self play with self.executeEpisode()
                    if self.args[
                            'fixed_matrix'] == False:  #repeatedly generate sensing matrices if we are not fixing the sensing matrix.
                        self.game_args.generateSensingMatrix(
                            self.args['m'], self.args['n'],
                            self.args['matrix_type']
                        )  #generate a new sensing matrix
                    self.game_args.generateNewObsVec(
                        self.args['x_type'], self.args['sparsity']
                    )  #generate a new observed vector y. This assumes a matrix has been loaded in self.game_args!!!
                    self.mcts = MCTS(
                        self.game, self.nnet, self.args, self.game_args,
                        self.skip_nnet
                    )  #create new search tree for each game we play

                    #TESTING-------------------------
                    #print('The generated sparse vector x has sparsity: ' + str(self.game_args.game_iter))
                    #--------------------------------

                    #TESTING--------------------------
                    #print('Starting self-play game iteration: ' + str(eps))
                    #start_game = time.time()
                    #--------------------------------

                    iterationTrainExamples += self.executeEpisode(
                    )  #Play a new game with newly generated y. iterationTrainExamples is a deque containing states each generated self play game

                    #TESTING--------------------------
                    #end_game = time.time()
                    #print('Total time to play game ' + str(eps) + ' is: ' + str(end_game-start_game))
                    #-----------------------------------------------------
                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args['numEps'],
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                #self.trainExamplesHistory is a list of deques, where each deque contains all the states from numEps number of self-play games
                self.trainExamplesHistory.append(iterationTrainExamples)

            #Jump to here on the first iteration if we loaded an existing file into self.trainExamplesHistory from method loadTrainExamples below.
            if len(self.trainExamplesHistory
                   ) > self.args['numItersForTrainExamplesHistory']:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file by calling saveTrainExamples method
            # The examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(
                i - 1
            )  #save examples to self.args['checkpoint'] folder with given iteration name of i-1

            # shuffle examples before training
            #trainExamples is the list form of trainExamplesHistory. Note that trainExamplesHistory is a list of deques,
            #where each deque contains training examples. trainExamples gets rid of the deque, and instead puts all training
            #samples in a single list, shuffled
            trainExamples = []
            for e in self.trainExamplesHistory:  #Each e is a deque
                trainExamples.extend(e)
            shuffle(trainExamples)

            #The Arena--------------------------------------------------------
            if self.args['Arena'] == True:
                self.nnet.save_checkpoint(
                    folder=self.args['network_checkpoint'],
                    filename='temp')  #copy old neural network into new one
                self.pnet.load_checkpoint(
                    folder=self.args['network_checkpoint'], filename='temp')

                #convert trainExamples into a format recognizable by Neural Network and train
                trainExamples = self.nnet.constructTraining(trainExamples)
                self.nnet.train(
                    trainExamples[0], trainExamples[1]
                )  #Train the new neural network self.nnet. The weights are now updated

                #Pit the two neural networks self.pnet and self.nnet in the arena
                print('PITTING AGAINST PREVIOUS VERSION')

                arena = Arena(
                    self.pnet, self.nnet, self.game, self.args,
                    self.arena_game_args
                )  #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it.
                pwins, nwins, draws = arena.playGames()

                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                      (nwins, pwins, draws))
                if pwins + nwins > 0 and float(nwins) / (
                        pwins + nwins) < self.args['updateThreshold']:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(
                        folder=self.args['network_checkpoint'],
                        filename='temp')
                else:  #saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(
                        folder=self.args['network_checkpoint'],
                        filename='nnet_checkpoint' + str(i - 1))
                    self.nnet.save_checkpoint(
                        folder=self.args['network_checkpoint'],
                        filename='best')
            #-----------------------------------------------------------------

            else:  #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1).
                print('TRAINING NEW NEURAL NETWORK...')
                trainExamples = self.nnet.constructTraining(trainExamples)

                #FOR TESTING-----------------------------------------------------
                #print('trainExamples feature arrays: ' + str(trainExamples[0]))
                #print('trainExamples label arrays: ' + str(trainExamples[1]))
                #END TESTING-----------------------------------------------------

                self.nnet.train(trainExamples[0],
                                trainExamples[1],
                                folder=self.args['network_checkpoint'],
                                filename='trainHistDict' + str(i - 1))

                #FOR TESTING-----------------------------------------------------
                #weights = self.nnet.nnet.model.get_weights()
                #min_max = []
                #for layer_weights in weights:
                #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape)
                #layer_weights_min = np.amin(layer_weights)
                #layer_weights_max = np.amax(layer_weights)
                #min_max.append([layer_weights_min, layer_weights_max])
                #print('')
                #print('The smallest and largest weights of each layer are: ')
                #for pair in min_max:
                #print(pair)
                #print('')
                #END TESTING-----------------------------------------------------

                self.nnet.save_checkpoint(
                    folder=self.args['network_checkpoint'],
                    filename='nnet_checkpoint' + str(i - 1))
                self.nnet.save_checkpoint(
                    folder=self.args['network_checkpoint'], filename='best')
Ejemplo n.º 24
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration

            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)

            shuffle(trainExamples)
            #             shuffle(np.transpose(trainExamples ,(0,2,3,1)))

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            #             print(trainExamples ,np.shape(trainExamples))

            loss = self.nnet.train(trainExamples)
            print(loss, "loosss")
            losses = np.load("losses_array.npy")
            self.losses = np.hstack(
                (losses, [[sum(loss[0]) / len(loss[0])],
                          [sum(loss[1]) / len(loss[1])],
                          [(sum(loss[0]) + sum(loss[1])) / len(loss[0])]]))

            #             clear_output(wait=True)

            print("================================================")
            print(self.losses)

            plt.plot(self.losses[2], 'k')
            plt.plot(self.losses[1], 'k:')
            plt.plot(self.losses[0], 'k--')
            plt.legend([
                'train_overall_loss', 'train_value_loss', 'train_policy_loss'
            ],
                       loc='lower left')

            display.clear_output(wait=True)
            display.display(pl.gcf())
            pl.gcf().clear()
            time.sleep(1.0)
            print('\n')
            np.save("losses_array.npy", self.losses)

            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            #             arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
            #                           lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)

            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins == 0 or float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                # self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
Ejemplo n.º 25
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        learn_time = time.time()
        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            learn_iter_time = time.time()
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            #self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            self.nnet.train(trainExamples)

            print('Following is played by PureNNt:')
            purenntplayer = PureNNtPlayer(self.game,
                                          self.nnet,
                                          self.args,
                                          temp=0).play
            arena = Arena(purenntplayer, self.game)
            print('PureNNt Real Performance:',
                  arena.playGames(self.args.arenaCompare))

            print('Following is played by NNtBasedMCTS:')
            nntbasedmctsplayer = NNtBasedMCTSPlayer(
                self.game,
                self.nnet,
                self.args,
                temp=0,
                percentile=self.r_percentile).play
            arena = Arena(nntbasedmctsplayer, self.game)
            print('NNtBasedMCTS Real Performance:',
                  arena.playGames(self.args.arenaCompare))

            print('ACCEPTING NEW MODEL DIRECTLY')
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename=self.getCheckpointFile(i))
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='best.pth.tar')
Ejemplo n.º 26
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        if self.log_summary:
            tf.gfile.MakeDirs(FLAGS_log_dir)
            summary_writer = tf.summary.FileWriter(
                FLAGS_log_dir + ('/train/iter-%02d' % self.global_iter),
                self.nnet.graph)
        self.global_iter += 1
        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))

                # predict and compute gradient and do SGD step
                input_dict = {
                    self.nnet.input_boards: boards,
                    self.nnet.target_pis: pis,
                    self.nnet.target_vs: vs,
                    self.nnet.dropout: args.dropout,
                    self.nnet.isTraining: True
                }

                # measure data loading time
                data_time.update(time.time() - end)

                if self.log_summary:
                    run_options = tf.RunOptions(
                        trace_level=tf.RunOptions.FULL_TRACE)
                    run_metadata = tf.RunMetadata()
                    summary_str, _ = self.sess.run(
                        [self.nnet.summary_merged, self.nnet.train_step],
                        feed_dict=input_dict,
                        options=run_options,
                        run_metadata=run_metadata)
                    summary_writer.add_run_metadata(
                        run_metadata, 'step%05d' % self.global_batch_idx)
                    summary_writer.add_summary(summary_str,
                                               self.global_batch_idx)
                else:
                    self.sess.run(self.nnet.train_step, feed_dict=input_dict)

                # record loss
                pi_loss, v_loss = self.sess.run(
                    [self.nnet.loss_pi, self.nnet.loss_v],
                    feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1
                self.global_batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / args.batch_size),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
        if self.log_summary:
            summary_writer.close()
Ejemplo n.º 27
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        optimizer = optim.Adam(self.nnet.parameters())

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch+1))
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples)/args.batch_size):
                sample_ids = np.random.randint(len(examples), size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
                target_pis = torch.FloatTensor(np.array(pis))
                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                # predict
                if args.cuda:
                    boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda()
                boards, target_pis, target_vs = Variable(boards), Variable(target_pis), Variable(target_vs)

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output
                out_pi, out_v = self.nnet(boards)
                l_pi = self.loss_pi(target_pis, out_pi)
                l_v = self.loss_v(target_vs, out_v)
                total_loss = l_pi + l_v

                # record loss
                pi_losses.update(l_pi.data[0], boards.size(0))
                v_losses.update(l_v.data[0], boards.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                            batch=batch_idx,
                            size=int(len(examples)/args.batch_size),
                            data=data_time.avg,
                            bt=batch_time.avg,
                            total=bar.elapsed_td,
                            eta=bar.eta_td,
                            lpi=pi_losses.avg,
                            lv=v_losses.avg,
                            )
                bar.next()
            bar.finish()
Ejemplo n.º 28
0
def train(model, optimizer, epoch, di, args, loss_criterion):
    model.train()  # switch to train mode

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=args.batches_per_epoch)
    batch_idx = 0

    while batch_idx < args.batches_per_epoch:
        # sample batch
        if args.encoder_type == 'transformer':
            sent1, sent1_posembinput, sent2, sent2_posembinput, targets = \
                di.sample_train_batch(use_cuda=args.cuda)
            unsort1, unsort2 = None, None
            encoder_init_hidden = None
        elif args.encoder_type == 'rnn':
            sent1, sent2, unsort1, unsort2, targets = di.sample_train_batch(
                encoder_embed=model.embed,
                decoder_embed=model.embed,
                use_cuda=args.cuda,
            )
            sent1_posembinput, sent2_posembinput = None, None
            encoder_init_hidden = model.encoder.initHidden(
                batch_size=args.batch_size)
        elif args.encoder_type == 'decomposable':
            sent1, sent2, targets = \
                di.sample_train_batch(use_cuda=args.cuda)
            unsort1, unsort2 = None, None
            encoder_init_hidden = None

        if args.cuda:
            model = model.cuda()
            targets = targets.cuda(async=True)
            if args.encoder_type == 'transformer':
                sent1 = sent1.cuda()
                sent2 = sent2.cuda()
                sent1_posembinput = sent1_posembinput.cuda()
                sent2_posembinput = sent2_posembinput.cuda()
            elif args.encoder_type == 'decomposable':
                sent1 = sent1.cuda()
                sent2 = sent2.cuda()
            if args.encoder_type == 'rnn':
                if len(encoder_init_hidden):
                    encoder_init_hidden = [
                        x.cuda() for x in encoder_init_hidden
                    ]
                else:
                    encoder_init_hidden = encoder_init_hidden.cuda()
            loss_criterion = loss_criterion.cuda()

        # measure data loading timeult
        data_time.update(time.time() - end)

        # compute output
        if args.encoder_type == 'decomposable':
            softmax_outputs = model(
                sent1=sent1,
                sent2=sent2,
            )
        else:
            softmax_outputs = model(
                encoder_init_hidden=encoder_init_hidden,
                encoder_input=sent1,
                encoder_pos_emb_input=sent1_posembinput,
                encoder_unsort=unsort1,
                decoder_input=sent2,
                decoder_pos_emb_input=sent2_posembinput,
                decoder_unsort=unsort2,
                batch_size=args.batch_size,
            )
        loss = loss_criterion(softmax_outputs, targets)

        # measure accuracy and record loss
        acc_batch = compute_accuracy(
            outputs=softmax_outputs.data,
            targets=targets.data,
        )
        acc.update(acc_batch, args.batch_size)
        losses.update(loss.data[0], len(sent1))

        # compute gradient
        optimizer.zero_grad()
        loss.backward()

        if args.encoder_type == 'decomposable':
            grad_norm = 0.
            para_norm = 0.
            for m in model.modules():
                if isinstance(m, nn.Linear):
                    grad_norm += m.weight.grad.data.norm()**2
                    para_norm += m.weight.data.norm()**2
                    if m.bias is not None:
                        grad_norm += m.bias.grad.data.norm()**2
                        para_norm += m.bias.data.norm()**2
            grad_norm**0.5
            para_norm**0.5
            shrinkage = args.max_norm / grad_norm
            if shrinkage < 1:
                for m in model.modules():
                    if isinstance(m, nn.Linear):
                        m.weight.grad.data = m.weight.grad.data * shrinkage

        # optimizer step
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        batch_idx += 1

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s'\
        '| Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc:.3f}'\
            .format(
                batch=batch_idx,
                size=args.batches_per_epoch,
                data=data_time.avg,
                bt=batch_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td,
                loss=losses.avg,
                acc=acc.avg,
            )
        bar.next()
    bar.finish()

    return losses.avg, acc.avg
Ejemplo n.º 29
0
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num / 2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult == 1:
                oneWon += 1
            elif gameResult == -1:
                twoWon += 1
            else:
                draws += 1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps,
                maxeps=maxeps,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        self.player1, self.player2 = self.player2, self.player1

        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult == -1:
                oneWon += 1
            elif gameResult == 1:
                twoWon += 1
            else:
                draws += 1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps,
                maxeps=maxeps,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        bar.finish()

        return oneWon, twoWon, draws
Ejemplo n.º 30
0
def train_squad(model, optimizer, epoch, di, args, loss_criterion):
    model.train()  # switch to train mode

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=args.batches_per_epoch)
    batch_idx = 0

    while batch_idx < args.batches_per_epoch:
        # sample batch
        if args.encoder_type == 'rnn':
            (
                a1_packed_tensor,
                a1_idx_unsort,
                a2_packed_tensor,
                a2_idx_unsort,
                q_packed_tensor,
                q_idx_unsort,
                targets,
            ) = di.sample_train_batch(
                encoder_embed=model.embed,
                decoder_embed=model.embed,
                use_cuda=args.cuda,
            )
            encoder_init_hidden = model.encoder.initHidden(
                batch_size=args.batch_size)
        else:
            raise Exception("{} not supported".format(args.encoder_type))

        if args.cuda:
            model = model.cuda()
            targets = targets.cuda(async=True)
            if len(encoder_init_hidden):
                encoder_init_hidden = [x.cuda() for x in encoder_init_hidden]
            else:
                encoder_init_hidden = encoder_init_hidden.cuda()
            loss_criterion = loss_criterion.cuda()

        # measure data loading timeult
        data_time.update(time.time() - end)

        # compute output
        softmax_outputs = model(
            a1_packed_tensor=a1_packed_tensor,
            a1_idx_unsort=a1_idx_unsort,
            a2_packed_tensor=a2_packed_tensor,
            a2_idx_unsort=a2_idx_unsort,
            q_packed_tensor=q_packed_tensor,
            q_idx_unsort=q_idx_unsort,
            encoder_init_hidden=encoder_init_hidden,
            batch_size=args.batch_size,
        )
        loss = loss_criterion(softmax_outputs, targets)

        # measure accuracy and record loss
        acc_batch = compute_accuracy(
            outputs=softmax_outputs.data,
            targets=targets.data,
        )
        acc.update(acc_batch, args.batch_size)
        losses.update(loss.data[0], args.batch_size)

        # compute gradient
        optimizer.zero_grad()
        loss.backward()

        # optimizer step
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        batch_idx += 1

        # plot progress
        bar.suffix = \
            '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | '\
            'Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | '\
            'Acc: {acc:.3f}'\
            .format(
                batch=batch_idx,
                size=args.batches_per_epoch,
                data=data_time.avg,
                bt=batch_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td,
                loss=losses.avg,
                acc=acc.avg,
            )
        bar.next()
    bar.finish()

    return losses.avg, acc.avg
Ejemplo n.º 31
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximum length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                  filename='best.pth.tar')
        pmcts = MCTS(self.game, self.pnet, self.args)

        print('PITTING AGAINST PREVIOUS VERSION - RANDOM')
        #arena = Arena(lambda xt: np.argmax(pmcts.getActionProb(xt, temp=0)),
        #              lambda xt: np.argmax(pmcts.getActionProb(xt, temp=0)), self.game)
        n_actions = self.game.n * self.game.n + 1

        arena = Arena(
            lambda xt: np.random.choice(
                n_actions, 1, p=pmcts.getActionProb(xt, temp=0.2)),
            lambda xt: np.random.choice(
                n_actions, 1, p=pmcts.getActionProb(xt, temp=-0.2)), self.game)

        arena.display = self.game.display
        for i in range(7):
            arena.playGame(verbose=False, rnd=0)
        print()
        for i in range(7):
            arena.playGame(verbose=False, rnd=0)
        print()
        for i in range(7):
            arena.playGame(verbose=False, rnd=0)
        print()
        for i in range(7):
            arena.playGame(verbose=False, rnd=0)
        exit(0)

        for i in range(1, self.args.numIters + 1):

            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(
                lambda x: np.argmax(pmcts.getActionProb(x, temp=0.3)),
                lambda x: np.argmax(nmcts.getActionProb(x, temp=0.3)),
                self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))

            print('PITTING AGAINST PREVIOUS VERSION - RANDOM')
            arena = Arena(
                lambda x: np.argmax(pmcts.getActionProb(x, temp=0.3)),
                lambda x: np.argmax(nmcts.getActionProb(x, temp=0.3)),
                self.game)
            for i in range(24):
                arena.playGame(verbose=False, rnd=4 * i)

            if pwins + nwins == 0 or float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
Ejemplo n.º 32
0
def test(model, epoch, di, args, loss_criterion):
    global best_acc

    # switch to evaluate mode
    model.eval()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    acc = AverageMeter()
    end = time.time()

    bar = Bar('Processing', max=args.test_batches_per_epoch)
    batch_idx = 0

    while batch_idx < args.test_batches_per_epoch:
        # sample batch
        if args.encoder_type == 'transformer':
            sent1, sent1_posembinput, sent2, sent2_posembinput, targets = \
                di.sample_dev_batch(use_cuda=args.cuda)
            unsort1, unsort2 = None, None
            encoder_init_hidden = None
        elif args.encoder_type == 'rnn':
            sent1, sent2, unsort1, unsort2, targets = di.sample_dev_batch(
                encoder_embed=model.embed,
                decoder_embed=model.embed,
                use_cuda=args.cuda,
            )
            sent1_posembinput, sent2_posembinput = None, None
            encoder_init_hidden = model.encoder.initHidden(
                batch_size=args.batch_size)
        elif args.encoder_type == 'decomposable':
            sent1, sent2, targets = \
                di.sample_dev_batch(use_cuda=args.cuda)
            unsort1, unsort2 = None, None
            encoder_init_hidden = None

        if args.cuda:
            model = model.cuda()
            targets = targets.cuda(async=True)
            if args.encoder_type == 'transformer':
                sent1 = sent1.cuda()
                sent2 = sent2.cuda()
                sent1_posembinput = sent1_posembinput.cuda()
                sent2_posembinput = sent2_posembinput.cuda()
            elif args.encoder_type == 'decomposable':
                sent1 = sent1.cuda()
                sent2 = sent2.cuda()
            if args.encoder_type == 'rnn':
                if len(encoder_init_hidden):
                    encoder_init_hidden = [
                        x.cuda() for x in encoder_init_hidden
                    ]
                else:
                    encoder_init_hidden = encoder_init_hidden.cuda()

        # measure data loading time
        data_time.update(time.time() - end)

        # compute output
        if args.encoder_type == 'decomposable':
            softmax_outputs = model(
                sent1=sent1,
                sent2=sent2,
            )
        else:
            softmax_outputs = model(
                encoder_init_hidden=encoder_init_hidden,
                encoder_input=sent1,
                encoder_pos_emb_input=sent1_posembinput,
                encoder_unsort=unsort1,
                decoder_input=sent2,
                decoder_pos_emb_input=sent2_posembinput,
                decoder_unsort=unsort2,
                batch_size=args.batch_size,
            )
        loss = loss_criterion(softmax_outputs, targets)

        # measure accuracy and record loss
        acc_batch = compute_accuracy(
            outputs=softmax_outputs.data,
            targets=targets.data,
        )
        acc.update(acc_batch, args.batch_size)
        losses.update(loss.data[0], len(sent1))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        batch_idx += 1

        # plot progress
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s'\
        '| Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Acc: {acc:.3f}'\
            .format(
                batch=batch_idx,
                size=args.test_batches_per_epoch,
                data=data_time.avg,
                bt=batch_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td,
                loss=losses.avg,
                acc=acc.avg,
            )
        bar.next()
    bar.finish()
    return losses.avg, acc.avg
Ejemplo n.º 33
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters+1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            print(str(self.game.innerN) + "x" + str(self.game.innerM))
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()
    
                for eps in range(self.args.numEps):
                    # self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                    self.mcts = MCTS(self.nnet, self.args)   # reset search tree
                    iterationTrainExamples += self.executeEpisode()

    
                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                               total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history 
                self.trainExamplesHistory.append(iterationTrainExamples)
                
            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
            self.saveTrainExamples(i-1)
            
            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            tempfile =  'temp.pth.tar'
            bestfile =  'best.pth.tar'

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=tempfile)
            self.nnet.train(trainExamples)

            if self.arenaEnabled:
                self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)

                pmcts = MCTS(self.pnet, self.args)
                nmcts = MCTS(self.nnet, self.args)

                print('PITTING AGAINST PREVIOUS VERSION')
                # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                #               lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
                arena = Arena(lambda x, y: pmcts.getActionProb(x, y, temp=0),
                           lambda x, y: nmcts.getActionProb(x, y, temp=0), self.game)
                pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
                if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)
                else:
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=bestfile)
Ejemplo n.º 34
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        epochswin = []  # count the number of wins at every epoch of the network against the preceding version
        epochdraw = []  # count the number of draws at every epoch of the network against the preceding version
        epochswingreedy = []  # count the number of wins against greedy at every epoch
        epochswinrandom = []  # count the number of wins against random at every epoch
        epochsdrawgreedy = []  # count the number of draws against greedy at every epoch
        epochsdrawrandom = []  # count the number of wins against random at every epoch
        epochswinminmax = []  # count the number of wins against minmax at every epoch
        epochsdrawminmax = []  # count the number of draws against minmax at every epoch

        begining=1
        if self.args.load_model == True:
            file = open(self.args.trainExampleCheckpoint + "graphwins:iter" + str(self.args.numIters) + ":eps" + str(
                self.args.numEps) + ":dim" + str(self.game.n) + ".txt", "r+")
            lines = file.readlines()
            for index, line in enumerate(lines):
                for word in line.split():
                    if index == 0:
                        epochswin.append(word)
                    elif index == 1:
                        epochdraw.append(word)
            file.close()

            file = open(self.args.trainExampleCheckpoint + "graphwins:iter" + str(self.args.numIters) + ":eps" + str(
                self.args.numEps) + ":dim" + str(self.game.n) + ":greedyrandom.txt", "r+")
            lines = file.readlines()
            for index, line in enumerate(lines):
                for word in line.split():
                    if index == 0:
                        epochswingreedy.append(word)
                    elif index == 1:
                        epochsdrawgreedy.append(word)
                    elif index == 2:
                        epochswinrandom.append(word)
                    elif index == 3:
                        epochsdrawrandom.append(word)
                    elif index == 4:
                        epochswinminmax.append(word)
                    elif index == 5:
                        epochsdrawminmax.append(word)
            file.close()
            self.loadTrainExamples()

            file=open(self.args.trainExampleCheckpoint+"loopinformation","r+")
            lines=file.readlines()
            begining=lines[0]
            file.close()


        for i in range(int(begining), self.args.numIters + 1):

            fileLoopInformation = open(self.args.trainExampleCheckpoint + "loopinformation", "w+")
            fileLoopInformation.write(str(i))
            fileLoopInformation.close()

            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

            eps_time = AverageMeter()
            bar = Bar('Self Play', max=self.args.numEps)
            end = time.time()

            for eps in range(self.args.numEps):
                iterationTrainExamples += self.executeEpisode()

                # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps + 1,
                                                                                                           maxeps=self.args.numEps,
                                                                                                           et=eps_time.avg,
                                                                                                           total=bar.elapsed_td,
                                                                                                           eta=bar.eta_td)
                bar.next()
            bar.finish()

            # save the iteration examples to the history
            self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one

            filename = "curent"+str(i)+"temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + \
                       ":dim" + str(self.game.n) + ".pth.tar"
            filenameBest = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str(
                self.game.n) + ".pth.tar"
            print("path with filename "+filename)
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=filename)
            exists = os.path.isfile(filenameBest)
            if exists:
                self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=filenameBest)
            else:
                self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=filename)
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            filenameCurrent="currentforprocess:temp:iter" + str(self.args.numIters) + \
                            ":eps" + str(self.args.numEps) + ":dim" + str(self.game.n) + ".pth.tar"
            self.nnet.save_checkpoint(folder=self.args.checkpoint,filename=filenameCurrent)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game,nmcts,pmcts,evaluate=True,
                          name=self.args.name)

            pwins, nwins, draws = arena.playGames(self.args.arenaCompare, False)

            pmcts.clear()
            nmcts.clear()
            del pmcts
            del nmcts

            print(' ')
            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
            if i == 1:
                epochswin.append(pwins)
                epochdraw.append(0)

            epochswin.append(nwins)
            epochdraw.append(draws)
            self.writeLogsToFile(epochswin, epochdraw)

            ''' Get all the players and then pit them against the network. You need to modify here if you implement 
                more players
            '''
            (gp, rp, mp) = self.decidePlayers()

            if self.args.parallel == 0:


                nmcts1 = MCTS(self.game, self.nnet, self.args)
                nmcts2 = MCTS(self.game, self.nnet, self.args)
                nmcts3 = MCTS(self.game, self.nnet, self.args)

                arenagreedy = Arena(lambda x: np.argmax(nmcts1.getActionProb(x, temp=0)), gp, self.game,nmcts1
                                    ,name=self.args.name)
                arenarandom = Arena(lambda x: np.argmax(nmcts2.getActionProb(x, temp=0)), rp, self.game,nmcts2
                                    ,name=self.args.name)
                arenaminmax = Arena(lambda x: np.argmax(nmcts3.getActionProb(x, temp=0)), mp, self.game,nmcts3,
                                    evaluate=True,name=self.args.name)

                pwinsminmax, nwinsminmax, drawsminmax = arenaminmax.playGames(self.args.arenaCompare)
                print("minmax - "+str(pwinsminmax)+" "+str(nwinsminmax)+" "+str(drawsminmax))
                pwinsgreedy, nwinsgreedy, drawsgreedy = arenagreedy.playGames(self.args.arenaCompare)
                print("greedy - "+str(pwinsgreedy)+" "+str(nwinsgreedy)+" "+str(drawsgreedy))
                pwinsreandom, nwinsrandom, drawsrandom = arenarandom.playGames(self.args.arenaCompare)
                print("random - "+str(pwinsreandom)+" "+str(nwinsrandom)+" "+str(drawsrandom))

                nmcts1.clear()
                nmcts2.clear()
                nmcts3.clear()
                del nmcts1
                del nmcts2
                del nmcts3

            else:
                '''
                This will be used if you want to evaluate the network against the benchmarks in a parallel way
                '''

                self.args.update({'index': str(i)})

                p = self.parallel(self.args.arenaCompare)
                (pwinsminmax, nwinsminmax, drawsminmax) = p[0]  # self.parallel("minmax", self.args.arenaCompare)
                (pwinsgreedy, nwinsgreedy, drawsgreedy) = p[1]  # self.parallel("greedy",self.args.arenaCompare)
                (pwinsreandom, nwinsrandom, drawsrandom) = p[2]  # self.parallel("random",self.args.arenaCompare)

            epochsdrawgreedy.append(drawsgreedy)
            epochsdrawrandom.append(drawsrandom)
            epochswinrandom.append(pwinsreandom)
            epochswingreedy.append(pwinsgreedy)
            epochswinminmax.append(pwinsminmax)
            epochsdrawminmax.append(drawsminmax)

            self.writeLogsToFile(epochswingreedy, epochsdrawgreedy, epochswinrandom, epochsdrawrandom, epochswinminmax,
                                 epochsdrawminmax, training=False)

            if pwins + nwins == 0 or float(nwins) / (pwins + nwins) <= self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                filename = "curent"+str(i)+"temp:iter" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str(
                    self.game.n) + ".pth.tar"
                filenameBest = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str(
                    self.game.n) + ".pth.tar"
                exists = os.path.isfile(filenameBest)
                if exists:
                    self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=filenameBest)
                else:
                    self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=filename)

            else:
                print('ACCEPTING NEW MODEL')
                filename = "best" + str(self.args.numIters) + ":eps" + str(self.args.numEps) + ":dim" + str(
                    self.game.n) + ".pth.tar"
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=filename)
            self.mcts.clear()
            del self.mcts
            self.mcts = MCTS(self.game, self.nnet, self.args, mcts=True)  # reset search tree
        self.writeLogsToFile(epochswin, epochdraw, training=True)