def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num/2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==1:
                oneWon+=1
            elif gameResult==-1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=maxeps, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()

        self.player1, self.player2 = self.player2, self.player1
        
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult==-1:
                oneWon+=1                
            elif gameResult==1:
                twoWon+=1
            else:
                draws+=1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=num, et=eps_time.avg,
                                                                                                       total=bar.elapsed_td, eta=bar.eta_td)
            bar.next()
            
        bar.finish()

        return oneWon, twoWon, draws
Example #2
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch+1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples)/args.batch_size):
                sample_ids = np.random.randint(len(examples), size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))

                # predict and compute gradient and do SGD step
                input_dict = {self.nnet.input_boards: boards, self.nnet.target_pis: pis, self.nnet.target_vs: vs, self.nnet.dropout: args.dropout, self.nnet.isTraining: True}

                # measure data loading time
                data_time.update(time.time() - end)

                # record loss
                self.sess.run(self.nnet.train_step, feed_dict=input_dict)
                pi_loss, v_loss = self.sess.run([self.nnet.loss_pi, self.nnet.loss_v], feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                            batch=batch_idx,
                            size=int(len(examples)/args.batch_size),
                            data=data_time.avg,
                            bt=batch_time.avg,
                            total=bar.elapsed_td,
                            eta=bar.eta_td,
                            lpi=pi_losses.avg,
                            lv=v_losses.avg,
                            )
                bar.next()
            bar.finish()
Example #3
0
eps = 0


while test_list:
    a = np.random.binomial(1, 0.5)
    b = np.random.binomial(1, 0.5)
    
    if a == 1 and b == 1:
        test_list.pop()
        test_list.pop()
        
        eps_completed += 2
        bar.suffix  = '({eps}/{maxeps})'.format(eps = eps_completed, maxeps=length_list)
        bar.next()
        #bar.suffix  = '({eps}/{maxeps})'.format(eps = eps_completed, maxeps=length_list)
        time.sleep(3)
    
    elif a == 0 and b == 0:
        test_list.pop()
        
        eps_completed += 1
        #bar.suffix only controls the suffix output only
        bar.suffix  = '({eps}/{maxeps})'.format(eps = eps_completed, maxeps=length_list)
        #bar.next() controls the drawing of the completion bar only
        bar.next()
        #bar.suffix  = '({eps}/{maxeps})'.format(eps = eps_completed, maxeps=length_list)
        time.sleep(3)
    

bar.finish()
Example #4
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        optimizer = optim.Adam(self.nnet.parameters(), lr=self.lr)

        for epoch in range(self.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / self.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples) / self.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=self.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                fronts, mids, backs, cards = [], [], [], []
                for b in boards:
                    fronts.append(b[0])
                    mids.append(b[1])
                    backs.append(b[2])
                    cards.append(b[3])
                fronts = [b[0] for b in boards]
                mids = [b[1] for b in boards]
                backs = [b[2] for b in boards]
                cards = [b[3] for b in boards]

                fronts = torch.FloatTensor(fronts).to(device)
                mids = torch.FloatTensor(mids).to(device)
                backs = torch.FloatTensor(backs).to(device)
                cards = torch.FloatTensor(cards).to(device)
                target_pis = torch.FloatTensor(np.array(pis)).to(device)
                target_vs = torch.FloatTensor(np.array(vs).astype(
                    np.float64)).to(device)

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output
                out_pi, out_v = self.nnet(fronts, mids, backs, cards)
                l_pi = self.loss_pi(target_pis, out_pi)
                l_v = self.loss_v(target_vs, out_v)
                total_loss = l_pi + l_v

                # record loss
                pi_losses.update(l_pi.item(), fronts.size(0))
                v_losses.update(l_v.item(), fronts.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / self.batch_size),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        optimizer = optim.Adam(self.nnet.parameters())

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
                target_pis = torch.FloatTensor(np.array(pis))
                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                # predict
                if args.cuda:
                    boards, target_pis, target_vs = boards.contiguous().cuda(
                    ), target_pis.contiguous().cuda(), target_vs.contiguous(
                    ).cuda()
                boards, target_pis, target_vs = Variable(boards), Variable(
                    target_pis), Variable(target_vs)

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output

                #print(boards.size())

                out_pi = self.nnet(boards)
                out_v = self.vnet(boards)

                l_pi = self.loss_pi(target_pis, out_pi)
                l_v = self.loss_v(target_vs, out_v)

                # record loss
                pi_losses.update(l_pi.data[0], boards.size(0))
                v_losses.update(l_v.data[0], boards.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                l_pi.backward()
                optimizer.step()

                optimizer.zero_grad()
                l_v.backward()
                optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / args.batch_size),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
Example #6
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            # self.sess.run(tf.local_variables_initializer())
            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))

                # predict and compute gradient and do SGD step
                input_dict = {
                    self.nnet.input_boards: boards,
                    self.nnet.target_pis: pis,
                    self.nnet.target_vs: vs,
                    self.nnet.dropout: args.dropout,
                    self.nnet.isTraining: True
                }

                # measure data loading time
                data_time.update(time.time() - end)

                # print("dimension of boards: ", len(boards[0]))
                # print("pis l: ", len(pis[0]))
                # print("pis t: ", type(pis))

                # record loss
                self.sess.run(self.nnet.train_step, feed_dict=input_dict)
                # print("n1")

                pi_loss, v_loss = self.sess.run(
                    [self.nnet.loss_pi, self.nnet.loss_v],
                    feed_dict=input_dict)
                pi_losses.update(pi_loss, len(boards))
                v_losses.update(v_loss, len(boards))

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                    batch=batch_idx,
                    size=int(len(examples) / args.batch_size),
                    data=data_time.avg,
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    lpi=pi_losses.avg,
                    lv=v_losses.avg,
                )
                bar.next()
            bar.finish()
Example #7
0
def main():
    # global args
    args = parser.parse_args()

    # <editor-fold desc="Initialization">
    if args.comment == "test":
        print("WARNING: name is test!!!\n\n")

    # now = datetime.datetime.now()
    # current_date = now.strftime("%m-%d-%H-%M")

    assert args.text_criterion in ("MSE", "Cosine", "Hinge",
                                   "NLLLoss"), 'Invalid Loss Function'
    assert args.cm_criterion in ("MSE", "Cosine",
                                 "Hinge"), 'Invalid Loss Function'

    assert args.common_emb_ratio <= 1.0 and args.common_emb_ratio >= 0

    mask = int(args.common_emb_ratio * args.hidden_size)

    cuda = args.cuda
    if cuda == 'true':
        cuda = True
    else:
        cuda = False

    if args.load_model == "NONE":
        keep_loading = False
        # model_path = args.model_path + current_date + "/"
        model_path = args.model_path + args.comment + "/"
    else:
        keep_loading = True
        model_path = args.model_path + args.load_model + "/"

    result_path = args.result_path
    if result_path == "NONE":
        result_path = model_path + "results/"

    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    #</editor-fold>

    # <editor-fold desc="Image Preprocessing">

    # Image preprocessing //ATTENTION
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])

    inv_normalize = transforms.Normalize(
        mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.255],
        std=[1 / 0.229, 1 / 0.224, 1 / 0.255])
    #</editor-fold>

    # <editor-fold desc="Creating Embeddings">

    # Load vocabulary wrapper.
    print("Loading Vocabulary...")
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Load Embeddings
    emb_size = args.word_embedding_size
    emb_path = args.embedding_path
    if args.embedding_path[-1] == '/':
        emb_path += 'glove.6B.' + str(emb_size) + 'd.txt'

    print("Loading Embeddings...")
    emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size)

    # glove_emb = Embeddings(emb_size,len(vocab.word2idx),vocab.word2idx["<pad>"])
    # glove_emb.word_lut.weight.data.copy_(emb)
    # glove_emb.word_lut.weight.requires_grad = False

    glove_emb = nn.Embedding(emb.size(0), emb.size(1))
    # glove_emb = embedding(emb.size(0), emb.size(1))
    # glove_emb.weight = nn.Parameter(emb)

    # Freeze weighs
    # if args.fixed_embeddings == "true":
    # glove_emb.weight.requires_grad = False

    # </editor-fold>

    # <editor-fold desc="Data-Loaders">

    # Build data loader
    print("Building Data Loader For Test Set...")
    data_loader = get_loader(args.image_dir,
                             args.caption_path,
                             vocab,
                             transform,
                             args.batch_size,
                             shuffle=True,
                             num_workers=args.num_workers)

    print("Building Data Loader For Validation Set...")
    val_loader = get_loader(args.valid_dir,
                            args.valid_caption_path,
                            vocab,
                            transform,
                            args.batch_size,
                            shuffle=True,
                            num_workers=args.num_workers)

    # </editor-fold>

    # <editor-fold desc="Network Initialization">

    print("Setting up the Networks...")
    vae_Txt = SentenceVAE(glove_emb,
                          len(vocab),
                          hidden_size=args.hidden_size,
                          latent_size=args.latent_size,
                          batch_size=args.batch_size)
    vae_Img = ImgVAE(img_dimension=args.crop_size,
                     hidden_size=args.hidden_size,
                     latent_size=args.latent_size)

    if cuda:
        vae_Txt = vae_Txt.cuda()
        vae_Img = vae_Img.cuda()

    # </editor-fold>

    # <editor-fold desc="Losses">

    # Losses and Optimizers
    print("Setting up the Objective Functions...")
    img_criterion = nn.MSELoss()
    # txt_criterion = nn.MSELoss(size_average=True)
    if args.text_criterion == 'MSE':
        txt_criterion = nn.MSELoss()
    elif args.text_criterion == "Cosine":
        txt_criterion = nn.CosineEmbeddingLoss(size_average=False)
    elif args.text_criterion == "NLLLoss":
        txt_criterion = nn.NLLLoss()
    else:
        txt_criterion = nn.HingeEmbeddingLoss(size_average=False)

    if args.cm_criterion == 'MSE':
        cm_criterion = nn.MSELoss()
    elif args.cm_criterion == "Cosine":
        cm_criterion = nn.CosineEmbeddingLoss()
    else:
        cm_criterion = nn.HingeEmbeddingLoss()

    if cuda:
        img_criterion = img_criterion.cuda()
        txt_criterion = txt_criterion.cuda()
        cm_criterion = cm_criterion.cuda()
    # txt_criterion = nn.CrossEntropyLoss()

    # </editor-fold>

    # <editor-fold desc="Optimizers">
    print("Setting up the Optimizers...")

    img_optim = optim.Adam(vae_Img.parameters(),
                           lr=args.learning_rate,
                           betas=(0.5, 0.999),
                           weight_decay=0.00001)
    txt_optim = optim.Adam(vae_Txt.parameters(),
                           lr=args.learning_rate,
                           betas=(0.5, 0.999),
                           weight_decay=0.00001)

    # </editor-fold desc="Optimizers">

    train_images = True  # Reverse 2

    step = 0
    for epoch in range(args.num_epochs):

        # <editor-fold desc = "Epoch Initialization"?

        # TRAINING TIME
        print('EPOCH ::: TRAINING ::: ' + str(epoch + 1))
        batch_time = AverageMeter()
        txt_losses = AverageMeter()
        img_losses = AverageMeter()
        cm_losses = AverageMeter()
        end = time.time()

        bar = Bar('Training Net', max=len(data_loader))

        if keep_loading:
            suffix = "-" + str(epoch) + "-" + args.load_model + ".pkl"
            try:
                vae_Img.load_state_dict(
                    torch.load(
                        os.path.join(args.model_path, 'vae-img' + suffix)))
                vae_Txt.load_state_dict(
                    torch.load(
                        os.path.join(args.model_path, 'vae-txt' + suffix)))
            except FileNotFoundError:
                print("Didn't find any models switching to training")
                keep_loading = False

        if not keep_loading:

            # Set training mode
            vae_Txt.train()
            vae_Img.train()

            # </editor-fold desc = "Epoch Initialization"?

            # train_images = not train_images
            for i, (images, captions, lengths) in enumerate(data_loader):

                if i == len(data_loader) - 1:
                    break

                # <editor-fold desc = "Training Parameters Initiliazation"?

                # Set mini-batch dataset
                images = to_var(images)
                captions = to_var(captions)

                # captions = captions.transpose(0,1).unsqueeze(2)
                lengths = to_var(
                    torch.LongTensor(lengths))  # print(captions.size())

                # Forward, Backward and Optimize
                img_optim.zero_grad()
                txt_optim.zero_grad()

                # </editor-fold desc = "Training Parameters Initiliazation"?

                # <editor-fold desc = "Forward passes"?

                img_out, img_mu, img_logv, img_z = vae_Img(images)
                txt_out, txt_mu, txt_logv, txt_z = vae_Txt(captions, lengths)

                img_rc_loss = img_vae_loss(
                    img_out, images, img_mu,
                    img_logv) / (args.batch_size * args.crop_size**2)

                NLL_loss, KL_loss, KL_weight = seq_vae_loss(
                    txt_out, captions, lengths, txt_mu, txt_logv, "logistic",
                    step, 0.0025, 2500)

                txt_rc_loss = (NLL_loss + KL_weight *
                               KL_loss) / torch.sum(lengths).float()

                cm_loss = crossmodal_loss(txt_z, img_z, mask,
                                          args.cm_criterion, cm_criterion,
                                          args.negative_samples, epoch)

                # cm_loss += crossmodal_loss(txt_logv, img_logv, mask,
                #                           args.cm_criterion, cm_criterion,
                #                           args.negative_samples, epoch)

                # Computes the loss to be back-propagated
                img_loss = img_rc_loss * (
                    1 - args.cm_loss_weight) + cm_loss * args.cm_loss_weight
                txt_loss = txt_rc_loss * (
                    1 - args.cm_loss_weight) + cm_loss * args.cm_loss_weight
                # txt_loss = txt_rc_loss +  cm_loss * args.cm_loss_weight
                # img_loss = img_rc_loss + cm_loss * args.cm_loss_weight

                txt_losses.update(txt_rc_loss.data[0], args.batch_size)
                img_losses.update(img_rc_loss.data[0], args.batch_size)
                cm_losses.update(cm_loss.data[0], args.batch_size)
                # </editor-fold desc = "Loss accumulation"?

                # <editor-fold desc = "Back Propagation">
                # Half of the times we update one pipeline the others the other one
                if train_images:
                    # Image Network Training and Backpropagation

                    img_loss.backward()
                    img_optim.step()

                else:
                    # Text Nextwork Training & Back Propagation
                    txt_loss.backward()
                    txt_optim.step()

                    step += 1

                # train_images = not train_images
                # </editor-fold desc = "Back Propagation">

                # <editor-fold desc = "Logging">
                if i % args.image_save_interval == 0:
                    subdir_path = os.path.join(
                        result_path, str(i / args.image_save_interval))

                    if os.path.exists(subdir_path):
                        pass
                    else:
                        os.makedirs(subdir_path)

                    for im_idx in range(3):
                        # im_or = (inv_normalize([im_idx]).cpu().data.numpy().transpose(1,2,0))*255
                        # im = (inv_normalize([im_idx]).cpu().data.numpy().transpose(1,2,0))*255
                        im_or = (images[im_idx].cpu().data.numpy().transpose(
                            1, 2, 0) / 2 + .5) * 255
                        im = (img_out[im_idx].cpu().data.numpy().transpose(
                            1, 2, 0) / 2 + .5) * 255
                        # im = img_out[im_idx].cpu().data.numpy().transpose(1,2,0)*255

                        filename_prefix = os.path.join(subdir_path,
                                                       str(im_idx))
                        scipy.misc.imsave(filename_prefix + '_original.A.jpg',
                                          im_or)
                        scipy.misc.imsave(filename_prefix + '.A.jpg', im)

                        txt_or = " ".join([
                            vocab.idx2word[c]
                            for c in captions[im_idx].cpu().data.numpy()
                        ])
                        _, generated = torch.topk(txt_out[im_idx], 1)
                        txt = " ".join([
                            vocab.idx2word[c]
                            for c in generated[:, 0].cpu().data.numpy()
                        ])

                        with open(filename_prefix + "_captions.txt",
                                  "w") as text_file:
                            text_file.write("Original: %s\n" % txt_or)
                            text_file.write("Generated: %s" % txt)

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

                # plot progress
                bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_Img: {img_l:.3f}| Loss_Txt: {txt_l:.3f} | Loss_CM: {cm_l:.4f}'.format(
                    batch=i,
                    size=len(data_loader),
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    img_l=img_losses.avg,
                    txt_l=txt_losses.avg,
                    cm_l=cm_losses.avg,
                )
                bar.next()

            # </editor-fold desc = "Logging">

            bar.finish()

            # <editor-fold desc = "Saving the models"?
            # Save the models
            print('\n')
            print('Saving the models in {}...'.format(model_path))
            torch.save(
                vae_Img.state_dict(),
                os.path.join(model_path, 'vae-img-%d-' % (epoch + 1)) + ".pkl")
            torch.save(
                vae_Txt.state_dict(),
                os.path.join(model_path, 'vae-txt-%d-' % (epoch + 1)) + ".pkl")

            # </editor-fold desc = "Saving the models"?

        if args.validate == "true":
            validate(vae_Img, vae_Txt, val_loader, mask, 10)
Example #8
0
    def train(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        optimizer = optim.Adam(self.nnet.parameters())

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch+1))
            self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples)/args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples)/args.batch_size):
                sample_ids = np.random.randint(len(examples), size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                boards = torch.FloatTensor(np.array(boards).astype(np.float64))
                target_pis = torch.FloatTensor(np.array(pis))
                target_vs = torch.FloatTensor(np.array(vs).astype(np.float64))

                # predict
                if args.cuda:
                    boards, target_pis, target_vs = boards.contiguous().cuda(), target_pis.contiguous().cuda(), target_vs.contiguous().cuda()
                boards, target_pis, target_vs = Variable(boards), Variable(target_pis), Variable(target_vs)

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output
                out_pi, out_v = self.nnet(boards)
                l_pi = self.loss_pi(target_pis, out_pi)

                l_v = self.loss_v(target_vs, out_v)
                total_loss = l_pi + l_v

                # record loss
                pi_losses.update(l_pi.data[0], boards.size(0))
                v_losses.update(l_v.data[0], boards.size(0))

                # compute gradient and do SGD step
                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix  = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                            batch=batch_idx,
                            size=int(len(examples)/args.batch_size),
                            data=data_time.avg,
                            bt=batch_time.avg,
                            total=bar.elapsed_td,
                            eta=bar.eta_td,
                            lpi=pi_losses.avg,
                            lv=v_losses.avg,
                            )
                bar.next()
            bar.finish()
Example #9
0
def main():
    # global args
    args = parser.parse_args()

    # <editor-fold desc="Initialization">

    now = datetime.datetime.now()
    current_date = now.strftime("%m-%d-%H-%M")

    assert args.text_criterion in ("MSE","Cosine","Hinge","NLLLoss"), 'Invalid Loss Function'
    assert args.cm_criterion in ("MSE","Cosine","Hinge"), 'Invalid Loss Function'

    mask = int(args.common_emb_percentage * args.hidden_size)
    assert mask <= args.hidden_size

    cuda = args.cuda
    if cuda == 'true':
        cuda = True
    else:
        cuda = False

    if args.load_model == "NONE":
        keep_loading = True
        model_path = args.model_path + current_date + "/"
    else:
        keep_loading = False
        model_path = args.model_path + args.load_model + "/"

    result_path = args.result_path
    if result_path == "NONE":
        result_path = model_path + "results/"




    if not os.path.exists(result_path):
        os.makedirs(result_path)
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    #</editor-fold>

    # <editor-fold desc="Image Preprocessing">

    # Image preprocessing //ATTENTION
    # For normalization, see https://github.com/pytorch/vision#models
    transform = transforms.Compose([
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406),
                             (0.229, 0.224, 0.225))])

    #</editor-fold>

    # <editor-fold desc="Creating Embeddings">


    # Load vocabulary wrapper.
    print("Loading Vocabulary...")
    with open(args.vocab_path, 'rb') as f:
        vocab = pickle.load(f)

    # Load Embeddings
    emb_size = args.embedding_size
    emb_path = args.embedding_path
    if args.embedding_path[-1]=='/':
        emb_path += 'glove.6B.' + str(emb_size) + 'd.txt'

    print("Loading Embeddings...")
    emb = load_glove_embeddings(emb_path, vocab.word2idx, emb_size)

    # glove_emb = Embeddings(emb_size,len(vocab.word2idx),vocab.word2idx["<pad>"])
    # glove_emb.word_lut.weight.data.copy_(emb)
    # glove_emb.word_lut.weight.requires_grad = False

    glove_emb = nn.Embedding(emb.size(0), emb.size(1))
    # glove_emb = embedding(emb.size(0), emb.size(1))
    # glove_emb.weight = nn.Parameter(emb)


    # Freeze weighs
    # if args.fixed_embeddings == "true":
        # glove_emb.weight.requires_grad = False


    # </editor-fold>

    # <editor-fold desc="Data-Loaders">

    # Build data loader
    print("Building Data Loader For Test Set...")
    data_loader = get_loader(args.image_dir, args.caption_path, vocab,
                             transform, args.batch_size,
                             shuffle=True, num_workers=args.num_workers)

    print("Building Data Loader For Validation Set...")
    val_loader = get_loader(args.valid_dir, args.valid_caption_path, vocab,
                             transform, args.batch_size,
                             shuffle=True, num_workers=args.num_workers)

    # </editor-fold>

    # <editor-fold desc="Network Initialization">

    print("Setting up the Networks...")
    encoder_Txt = TextEncoder(glove_emb, num_layers=1, bidirectional=False, hidden_size=args.hidden_size)
    decoder_Txt = TextDecoder(glove_emb, len(vocab),  num_layers=1, bidirectional=False, hidden_size=args.hidden_size)
    # decoder_Txt = TextDecoder(encoder_Txt, glove_emb)
    # decoder_Txt = DecoderRNN(glove_emb, hidden_size=args.hidden_size)


    encoder_Img = ImageEncoder(img_dimension=args.crop_size,feature_dimension= args.hidden_size)
    decoder_Img = ImageDecoder(img_dimension=args.crop_size, feature_dimension= args.hidden_size)

    if cuda:
        encoder_Txt = encoder_Txt.cuda()
        decoder_Img = decoder_Img.cuda()

        encoder_Img = encoder_Img.cuda()
        decoder_Txt = decoder_Txt.cuda()

    # </editor-fold>

    # <editor-fold desc="Losses">

    # Losses and Optimizers
    print("Setting up the Objective Functions...")
    img_criterion = nn.MSELoss()
    # txt_criterion = nn.MSELoss(size_average=True)
    if args.text_criterion == 'MSE':
        txt_criterion = nn.MSELoss()
    elif args.text_criterion == "Cosine":
        txt_criterion = nn.CosineEmbeddingLoss(size_average=False)
    elif args.text_criterion == "NLLLoss":
        txt_criterion = nn.NLLLoss()
    else:
        txt_criterion = nn.HingeEmbeddingLoss(size_average=False)

    if args.cm_criterion == 'MSE':
        cm_criterion = nn.MSELoss()
    elif args.cm_criterion == "Cosine":
        cm_criterion = nn.CosineEmbeddingLoss()
    else:
        cm_criterion = nn.HingeEmbeddingLoss()


    if cuda:
        img_criterion = img_criterion.cuda()
        txt_criterion = txt_criterion.cuda()
        cm_criterion = cm_criterion.cuda()
    # txt_criterion = nn.CrossEntropyLoss()

    # </editor-fold>

    # <editor-fold desc="Optimizers">
    #     gen_params = chain(generator_A.parameters(), generator_B.parameters())
    print("Setting up the Optimizers...")
    # img_params = chain(decoder_Img.parameters(), encoder_Img.parameters())
    # txt_params = chain(decoder_Txt.decoder.parameters(), encoder_Txt.encoder.parameters())
    # img_params = list(decoder_Img.parameters()) + list(encoder_Img.parameters())
    # txt_params = list(decoder_Txt.decoder.parameters()) + list(encoder_Txt.encoder.parameters())

    # ATTENTION: Check betas and weight decay
    # ATTENTION: Check why valid_params fails on image networks with out of memory error

    # img_optim = optim.Adam(img_params, lr=0.0001, betas=(0.5, 0.999), weight_decay=0.00001)
    # txt_optim = optim.Adam(valid_params(txt_params), lr=0.0001,betas=(0.5, 0.999), weight_decay=0.00001)
    img_enc_optim = optim.Adam(encoder_Img.parameters(), lr=args.learning_rate)#betas=(0.5, 0.999), weight_decay=0.00001)
    img_dec_optim = optim.Adam(decoder_Img.parameters(), lr=args.learning_rate)#betas=(0.5,0.999), weight_decay=0.00001)
    txt_enc_optim = optim.Adam(valid_params(encoder_Txt.parameters()), lr=args.learning_rate)#betas=(0.5,0.999), weight_decay=0.00001)
    txt_dec_optim = optim.Adam(valid_params(decoder_Txt.parameters()), lr=args.learning_rate)#betas=(0.5,0.999), weight_decay=0.00001)

    # </editor-fold desc="Optimizers">

    train_images = False # Reverse 2

    for epoch in range(args.num_epochs):

        # <editor-fold desc = "Epoch Initialization"?

        # TRAINING TIME
        print('EPOCH ::: TRAINING ::: ' + str(epoch + 1))
        batch_time = AverageMeter()
        txt_losses = AverageMeter()
        img_losses = AverageMeter()
        cm_losses = AverageMeter()
        end = time.time()

        bar = Bar('Training Net', max=len(data_loader))

        if keep_loading:
            suffix = "-" + str(epoch) + "-" + args.load_model + ".pkl"
            try:
                encoder_Img.load_state_dict(torch.load(os.path.join(args.model_path,
                                        'encoder-img' + suffix)))
                encoder_Txt.load_state_dict(torch.load(os.path.join(args.model_path,
                                        'encoder-txt' + suffix)))
                decoder_Img.load_state_dict(torch.load(os.path.join(args.model_path,
                                        'decoder-img' + suffix)))
                decoder_Txt.load_state_dict(torch.load(os.path.join(args.model_path,
                                        'decoder-txt' + suffix)))
            except FileNotFoundError:
                print("Didn't find any models switching to training")
                keep_loading = False

        if not keep_loading:

            # Set training mode
            encoder_Img.train()
            decoder_Img.train()

            encoder_Txt.train()
            decoder_Txt.train()

            # </editor-fold desc = "Epoch Initialization"?

            train_images = not train_images
            for i, (images, captions, lengths) in enumerate(data_loader):

                if i == len(data_loader)-1:
                    break


                # <editor-fold desc = "Training Parameters Initiliazation"?

                # Set mini-batch dataset
                images = to_var(images)
                captions = to_var(captions)

                # target = pack_padded_sequence(captions, lengths, batch_first=True)[0]
                # captions, lengths = pad_sequences(captions, lengths)
                # images = torch.FloatTensor(images)

                captions = captions.transpose(0,1).unsqueeze(2)
                lengths = to_var(torch.LongTensor(lengths))            # print(captions.size())


                # Forward, Backward and Optimize
                # img_optim.zero_grad()
                img_dec_optim.zero_grad()
                img_enc_optim.zero_grad()
                # encoder_Img.zero_grad()
                # decoder_Img.zero_grad()

                # txt_params.zero_grad()
                txt_dec_optim.zero_grad()
                txt_enc_optim.zero_grad()
                # encoder_Txt.encoder.zero_grad()
                # decoder_Txt.decoder.zero_grad()

                # </editor-fold desc = "Training Parameters Initiliazation"?

                # <editor-fold desc = "Image AE"?

                # Image Auto_Encoder Forward
                mu, logvar  = encoder_Img(images)

                Iz = logvar
                # Iz = reparametrize(mu, logvar)
                IzI = decoder_Img(mu)

                img_rc_loss = img_criterion(IzI,images)
                # </editor-fold desc = "Image AE"?

                # <editor-fold desc = "Seq2Seq AE"?
                # Text Auto Encoder Forward

                # target = target[:-1] # exclude last target from inputs

                teacher_forcing_ratio = 0.5

                encoder_hidden = encoder_Txt.initHidden(args.batch_size)

                input_length = captions.size(0)
                target_length = captions.size(0)

                if cuda:
                    encoder_outputs = Variable(torch.zeros(input_length, args.batch_size, args.hidden_size).cuda())
                    decoder_outputs = Variable(torch.zeros(input_length, args.batch_size, len(vocab)).cuda())
                else:
                    encoder_outputs = Variable(torch.zeros(input_length, args.batch_size, args.hidden_size))
                    decoder_outputs = Variable(torch.zeros(input_length, args.batch_size, len(vocab)))

                txt_rc_loss = 0

                for ei in range(input_length):
                    encoder_output, encoder_hidden = encoder_Txt(
                    captions[ei,:], encoder_hidden)
                    encoder_outputs[ei] = encoder_output

                decoder_input = Variable(torch.LongTensor([vocab.word2idx['<start>']])).cuda()\
                    .repeat(args.batch_size,1)


                decoder_hidden = encoder_hidden

                use_teacher_forcing = True #if np.random.random() < teacher_forcing_ratio else False

                if use_teacher_forcing:
                    # Teacher forcing: Feed the target as the next input
                    for di in range(target_length-1):
                        decoder_output, decoder_hidden = decoder_Txt(
                        decoder_input, decoder_hidden) #, encoder_outputs)
                # txt_rc_loss += txt_criterion(decoder_output, captions[di].unsqueeze(1))

                        decoder_outputs[di] = decoder_output

                        decoder_input = captions[di+1]  # Teacher forcing

                else:
                # Without teacher forcing: use its own predictions as the next input
                    for di in range(target_length-1):
                        decoder_outputs, decoder_hidden = decoder_Txt(
                        decoder_input, decoder_hidden)
                        topv, topi = decoder_output.topk(1)
                        decoder_input = topi.squeeze().detach()  # detach from history as input

                        txt_rc_loss += txt_criterion(decoder_output, captions[di])
                # if decoder_input.item() == ("<end>"):
                #     break

                # Check start tokens etc
                txt_rc_loss, _, _, _ = masked_cross_entropy(
                decoder_outputs[:target_length-1].transpose(0, 1).contiguous(),
                                captions[1:,:,0].transpose(0, 1).contiguous(),
                                lengths - 1
                )


                # captions = captions[:-1,:,:]
                # lengths = lengths - 1
                # dec_state = None

                # Computes Cross-Modal Loss

                # Tz = encoder_hidden[0]
                Tz = encoder_output[:,0,:]

                txt =  Tz.narrow(1,0,mask)
                im = Iz.narrow(1,0,mask)

                if args.cm_criterion == 'MSE':
                    # cm_loss = cm_criterion(Tz.narrow(1,0,mask), Iz.narrow(1,0,mask))
                    cm_loss = mse_loss(txt, im)
                else:
                    cm_loss = cm_criterion(txt, im, \
                    Variable(torch.ones(im.size(0)).cuda()))

                # K - Negative Samples
                k = args.negative_samples
                neg_rate = (20-epoch)/20
                for _ in range(k):

                    if cuda:
                        perm = torch.randperm(args.batch_size).cuda()
                    else:
                        perm = torch.randperm(args.batch_size)

                    # if args.criterion == 'MSE':
                    #     cm_loss -= mse_loss(txt, im[perm])/k
                    # else:
                    #     cm_loss -= cm_criterion(txt, im[perm], \
                    #                            Variable(torch.ones(Tz.narrow(1,0,mask).size(0)).cuda()))/k

                    # sim  = (F.cosine_similarity(txt,txt[perm]) - 0.5)/2

                    if args.cm_criterion == 'MSE':
                        sim  = (F.cosine_similarity(txt,txt[perm]) - 1)/(2*k)
                        # cm_loss = cm_criterion(Tz.narrow(1,0,mask), Iz.narrow(1,0,mask))
                        cm_loss += mse_loss(txt, im[perm], sim)
                    else:
                        cm_loss += neg_rate * cm_criterion(txt, im[perm], \
                        Variable(-1*torch.ones(txt.size(0)).cuda()))/k


                # cm_loss = Variable(torch.max(torch.FloatTensor([-0.100]).cuda(), cm_loss.data))


                # Computes the loss to be back-propagated
                img_loss = img_rc_loss * (1 - args.cm_loss_weight) + cm_loss * args.cm_loss_weight
                txt_loss = txt_rc_loss * (1 - args.cm_loss_weight) + cm_loss * args.cm_loss_weight
                # txt_loss = txt_rc_loss + 0.1 * cm_loss
                # img_loss = img_rc_loss + cm_loss

                txt_losses.update(txt_rc_loss.data[0],args.batch_size)
                img_losses.update(img_rc_loss.data[0],args.batch_size)
                cm_losses.update(cm_loss.data[0], args.batch_size)
                # </editor-fold desc = "Loss accumulation"?

                # <editor-fold desc = "Back Propagation">
                # Half of the times we update one pipeline the others the other one
                if train_images:
                # Image Network Training and Backpropagation

                    img_loss.backward()
                    # img_optim.step()
                    img_enc_optim.step()
                    img_dec_optim.step()

                else:
                    # Text Nextwork Training & Back Propagation

                    txt_loss.backward()
                    # txt_optim.step()
                    txt_enc_optim.step()
                    txt_dec_optim.step()

                train_images = not train_images
                # </editor-fold desc = "Back Propagation">

                # <editor-fold desc = "Logging">
                if i % args.image_save_interval == 0:
                    subdir_path = os.path.join( result_path, str(i / args.image_save_interval) )

                    if os.path.exists( subdir_path ):
                        pass
                    else:
                        os.makedirs( subdir_path )

                    for im_idx in range(3):
                        im_or = (images[im_idx].cpu().data.numpy().transpose(1,2,0)/2+.5)*255
                        im = (IzI[im_idx].cpu().data.numpy().transpose(1,2,0)/2+.5)*255

                        filename_prefix = os.path.join (subdir_path, str(im_idx))
                        scipy.misc.imsave( filename_prefix + '_original.A.jpg', im_or)
                        scipy.misc.imsave( filename_prefix + '.A.jpg', im)


                        txt_or = " ".join([vocab.idx2word[c] for c in list(captions[:,im_idx].view(-1).cpu().data)])
                        txt = " ".join([vocab.idx2word[c] for c in list(decoder_outputs[:,im_idx].view(-1).cpu().data)])
                        print("Original: ", txt_or)
                        print(txt)


                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

                # plot progress
                bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss_Img: {img_l:.3f}| Loss_Txt: {txt_l:.3f} | Loss_CM: {cm_l:.4f}'.format(
                    batch=i,
                    size=len(data_loader),
                    bt=batch_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td,
                    img_l=img_losses.avg,
                    txt_l=txt_losses.avg,
                    cm_l=cm_losses.avg,
                    )
                bar.next()

                                                                         # </editor-fold desc = "Logging">

            bar.finish()

            # <editor-fold desc = "Saving the models"?
            # Save the models
            print('\n')
            print('Saving the models in {}...'.format(model_path))
            torch.save(decoder_Img.state_dict(),
                       os.path.join(model_path,
                                    'decoder-img-%d-' %(epoch+1)) + current_date + ".pkl")
            torch.save(encoder_Img.state_dict(),
                       os.path.join(model_path,
                                    'encoder-img-%d-' %(epoch+1)) + current_date + ".pkl")
            torch.save(decoder_Txt.state_dict(),
                       os.path.join(model_path,
                                    'decoder-txt-%d-' %(epoch+1)) + current_date + ".pkl")
            torch.save(encoder_Txt.state_dict(),
                       os.path.join(model_path,
                                    'encoder-txt-%d-' %(epoch+1)) + current_date + ".pkl")

            # </editor-fold desc = "Saving the models"?

        if args.validate == "true":
            validate(encoder_Img, encoder_Txt, val_loader, mask, 10)
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')

            std = 999
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                reward_list = []
                count_list = []
                step_list = []

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree

                    example, step_count = self.executeEpisode()
                    iterationTrainExamples += example

                    step_list.append(step_count)
                    reward_list.append(iterationTrainExamples[-1][2])
                    count_list.append(eps)

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                plt.scatter(count_list, reward_list, label='rewards_training')
                plt.savefig("fig/" + str(self.round) + "_rewards_" + str(i) +
                            ".png")
                plt.close()
                #plt.scatter(count_list, step_list, label = 'steps_training')
                #plt.savefig("fig/"+str(self.round)+"_steps_"+str(i)+".png")
                #plt.close()

                iterationTrainExamples, std, mean = self.normalizeReward(
                    iterationTrainExamples)

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            #self.saveTrainExamples(i-1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            #self.pnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            #pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            self.show = True
            #nmcts = MCTS(self.game, self.nnet, self.args)
            """

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
            if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint, filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')"""
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename=self.getCheckpointFile(i))
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='best.pth.tar')

            if std < 100 and mean < self.game.lower / 4:
                print("stop traing because of identical rewards")
                break
Example #11
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()

                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory = []
                self.trainExamplesHistory.append(iterationTrainExamples)

            #if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
            #   print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
            #    self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            # self.saveTrainExamples(i-1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')

            self.nnet.train(trainExamples)
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename=self.getCheckpointFile(i))
            self.mcts = MCTS(self.game, self.nnet, self.args)
Example #12
0
def validate(encoder_Img,
             encoder_Txt,
             loader,
             mask,
             limit=1000,
             metric="cosine"):

    cm_criterion = nn.CosineEmbeddingLoss()
    # VALIDATION TIME
    print('\033[92mEPOCH ::: VALIDATION ::: ')

    # Set Evaluation Mode
    encoder_Img.eval()

    try:
        encoder_Txt.encoder.eval()
    except AttributeError:
        encoder_Txt.eval()

    batch_time = AverageMeter()
    end = time.time()

    bar = Bar('Computing Validation Set Embeddings', max=len(loader))

    cm_losses = AverageMeter()

    for i, (images, captions, lengths) in enumerate(loader):
        if i == limit:
            break

        # Set mini-batch dataset
        images = to_var(images)
        captions = to_var(captions)

        captions = captions.transpose(0, 1).unsqueeze(2)
        lengths = torch.LongTensor(lengths)

        _, img_emb = encoder_Img(images)

        try:
            txt_emb, _ = encoder_Txt(captions, lengths)
            txt_emb = txt_emb[0, :, :mask]
        except:
            encoder_hidden = encoder_Txt.initHidden(len(lengths))
            for ei in range(lengths[0] - 1):
                encoder_output, encoder_hidden = encoder_Txt(
                    captions[ei, :], encoder_hidden)

            txt_emb = txt_emb[:, 0, :mask]

        img_emb = img_emb[:, :mask]

        # current_embeddings = torch.cat( \
        #         (txt_emb.transpose(0,1).data,img_emb.unsqueeze(1).data)
        #         , 1)

        current_embeddings = np.concatenate( \
            (txt_emb.unsqueeze(0).cpu().data.numpy(),\
             img_emb.unsqueeze(0).cpu().data.numpy())\
            ,0)

        # current_embeddings = img_emb.data
        if i:
            # result_embeddings = torch.cat( \
            result_embeddings = np.concatenate( \
                (result_embeddings, current_embeddings) \
                ,1)
        else:
            result_embeddings = current_embeddings

        cm_loss = cm_criterion(txt_emb, img_emb, \
                             Variable(torch.ones(img_emb.size(0)).cuda()))

        cm_losses.update(cm_loss.data[0], img_emb.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # plot progress
        bar.suffix = '({batch}/{size}) Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | CM_LOSS: {cm_l:.4f}'.format(
            batch=i,
            size=len(loader),
            bt=batch_time.avg,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            cm_l=cm_losses.avg,
        )
        bar.next()
    bar.finish()

    a = [((result_embeddings[0][i] - result_embeddings[1][i])**2).mean()
         for i in range(result_embeddings.shape[0])]
    print("Validation MSE: ", np.mean(a))
    print("Validation MSE: ", np.mean(a))

    print("Computing Nearest Neighbors...")
    i = 0
    topk = []
    kss = [1, 5, 10]
    for k in kss:

        if i:
            print("Normalized ")
            result_embeddings[
                0] = result_embeddings[0] / result_embeddings[0].sum()
            result_embeddings[
                1] = result_embeddings[1] / result_embeddings[1].sum()

        # k = 5
        neighbors = NearestNeighbors(k, metric='cosine')
        neigh = neighbors
        neigh.fit(result_embeddings[1])
        kneigh = neigh.kneighbors(result_embeddings[0], return_distance=False)

        ks = set()
        for n in kneigh:
            ks.update(set(n))

        print(len(ks) / result_embeddings.shape[1])

        # a = [((result_embeddings[0][i] - result_embeddings[1][i]) ** 2).mean() for i in range(128)]
        # rs = result_embeddings.sum(2)
        # a = (((result_embeddings[0][0]- result_embeddings[1][0])**2).mean())
        # b = (((result_embeddings[0][0]- result_embeddings[0][34])**2).mean())
        topk.append(np.mean([int(i in nn) for i, nn in enumerate(kneigh)]))

    print(
        "Top-{k:},{k2:},{k3:} accuracy for Image Retrieval:\n\n\t\033[95m {tpk: .3f}% \t {tpk2: .3f}% \t {tpk3: .3f}% \n"
        .format(k=kss[0],
                k2=kss[1],
                k3=kss[2],
                tpk=100 * topk[0],
                tpk2=100 * topk[1],
                tpk3=100 * topk[2]))
Example #13
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        trainExamples = deque([], maxlen=self.args.maxlenOfQueue)
        for i in range(self.args.numIters):
            # bookkeeping
            print('------ITER ' + str(i + 1) + '------')
            eps_time = AverageMeter()
            bar = Bar('Self Play', max=self.args.numEps)
            end = time.time()

            for eps in range(self.args.numEps):
                trainExamples += self.executeEpisode()

                # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                    eps=eps + 1,
                    maxeps=self.args.numEps,
                    et=eps_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td)
                bar.next()
            bar.finish()

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pnet = self.nnet.__class__(self.game)
            pnet.load_checkpoint(folder=self.args.checkpoint,
                                 filename='temp.pth.tar')
            pmcts = MCTS(self.game, pnet, self.args)
            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : ' + str(nwins) + '/' + str(pwins))
            if float(nwins) / (pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet = pnet

            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='checkpoint_' + str(i) +
                                          '.pth.tar')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
                self.mcts = MCTS(self.game, self.nnet,
                                 self.args)  # reset search tree
Example #14
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):  #for number of rounds
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque(
                    [], maxlen=self.args.maxlenOfQueue
                )  #remove the previous training example

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(
                        self.args.numEps):  #for each self-play of this rounds
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree

                    #reutrn [(canonicalBoard,pi,v), (canonicalBoard,pi,v)]
                    # v is the result
                    selfPlayResult = self.executeEpisode()
                    #play one game, adding the gaming history
                    iterationTrainExamples += selfPlayResult

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            #self-play finished, updating the move history
            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(
                    0)  #remove the oldest gaming history
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)  #adding new move record
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(
                folder=self.args.checkpoint,
                filename='temp.pth.tar')  #save the previous net
            self.pnet.load_checkpoint(
                folder=self.args.checkpoint,
                filename='temp.pth.tar')  #read the previous net
            pmcts = MCTS(self.game, self.pnet,
                         self.args)  #reset previous models' mcts

            #using new data to train the new model
            self.nnet.train(
                trainExamples)  #trin the network with new move record
            nmcts = MCTS(self.game, self.nnet,
                         self.args)  #rest new models' mcts

            #OLD VS NEW
            print('PITTING AGAINST PREVIOUS VERSION')
            # rp = RandomPlayer(self.game).play
            # abp2 = AbpPlayer(self.game, 1, abpDepth=2).play
            arena = Arena(
                lambda board, turn: np.argmax(
                    pmcts.getActionProb(board, turn, temp=0)),
                lambda board, turn: np.argmax(
                    nmcts.getActionProb(board, turn, temp=0)), self.game)
            # arena = Arena(abp2,
            #               lambda board, turn: np.argmax(nmcts.getActionProb(board, turn, temp=0)), self.game)
            pwins, nwins, draws = arena.playGames(
                self.args.arenaCompare)  #playing new mode against old models

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins > 0 and float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                #OLD WIN!
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(
                    folder=self.args.checkpoint, filename='temp.pth.tar'
                )  #using previous mode, as it beat new model
            else:
                #NEW WIN!
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(
                    folder=self.args.checkpoint, filename='best.pth.tar'
                )  #save the new model, as this is the best
    def playGames(self, num, verbose=False):
        """
        Plays num games in which player1 starts num/2 games and player2 starts
        num/2 games.

        Returns:
            oneWon: games won by player1
            twoWon: games won by player2
            draws:  games won by nobody
        """
        eps_time = AverageMeter()
        bar = Bar('Arena.playGames', max=num)
        end = time.time()
        eps = 0
        maxeps = int(num)

        num = int(num / 2)
        oneWon = 0
        twoWon = 0
        draws = 0
        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult == 1:
                oneWon += 1
            elif gameResult == -1:
                twoWon += 1
            else:
                draws += 1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps,
                maxeps=maxeps,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        self.player1, self.player2 = self.player2, self.player1

        for _ in range(num):
            gameResult = self.playGame(verbose=verbose)
            if gameResult == -1:
                oneWon += 1
            elif gameResult == 1:
                twoWon += 1
            else:
                draws += 1
            # bookkeeping + plot progress
            eps += 1
            eps_time.update(time.time() - end)
            end = time.time()
            bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                eps=eps,
                maxeps=maxeps,
                et=eps_time.avg,
                total=bar.elapsed_td,
                eta=bar.eta_td)
            bar.next()

        bar.finish()

        return oneWon, twoWon, draws
Example #16
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')

            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()

                for eps in range(self.args.numEps):
                    self.mcts = MCTS(self.game, self.nnet,
                                     self.args)  # reset search tree
                    iterationTrainExamples += self.executeEpisode()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args.numEps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                self.trainExamplesHistory.append(iterationTrainExamples)

            if len(self.trainExamplesHistory
                   ) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(i - 1)

            # shuffle examples before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.pth.tar')
            pmcts = MCTS(self.game, self.pnet, self.args)

            self.nnet.train(trainExamples)
            nmcts = MCTS(self.game, self.nnet, self.args)

            print('PITTING AGAINST PREVIOUS VERSION')
            arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                          lambda x: np.argmax(nmcts.getActionProb(x, temp=0)),
                          self.game)
            pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

            print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                  (nwins, pwins, draws))
            if pwins + nwins == 0 or float(nwins) / (
                    pwins + nwins) < self.args.updateThreshold:
                print('REJECTING NEW MODEL')
                self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                          filename='temp.pth.tar')
            else:
                print('ACCEPTING NEW MODEL')
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename=self.getCheckpointFile(i))
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='best.pth.tar')
Example #17
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        if self.args.load_model:
            start = self.args.load_folder_file[1] + 1
        else:
            start = 1
        for i in range(start, self.args.numIters + 1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            # examples of the iteration
            greedy = i == 1 and not self.args.load_model

            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([],
                                               maxlen=self.args.maxlenOfQueue)

                num_eps = self.args.numEps
                if greedy:
                    num_eps = self.args.greedy_eps
                eps_time = AverageMeter()
                bar = Bar('Self Play', max=num_eps)
                end = time.time()

                for eps in range(num_eps):
                    if greedy:
                        iterationTrainExamples += self.execute_greedy_episode()
                    else:
                        iterationTrainExamples += self.execute_episodes()

                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=num_eps,
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                if not greedy:
                    self.trainExamplesHistory.append(iterationTrainExamples)

                    if len(self.trainExamplesHistory
                           ) > self.args.numItersForTrainExamplesHistory:
                        print("len(trainExamplesHistory) =",
                              len(self.trainExamplesHistory),
                              " => remove the oldest trainExamples")
                        self.trainExamplesHistory.pop(0)
                    # backup history to a file
                    # NB! the examples were collected using the model from the previous iteration, so (i-1)
                    self.saveTrainExamples(i)

                    # shuffle examples before training
                    trainExamples = []
                    for e in self.trainExamplesHistory:
                        trainExamples.extend(e)
                    shuffle(trainExamples)

                else:
                    trainExamples = iterationTrainExamples

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.h5')
            self.pnet.load_checkpoint(folder=self.args.checkpoint,
                                      filename='temp.h5')

            self.nnet.train(trainExamples)

            if not greedy:
                pmcts = MCTSSingle(self.game, self.pnet, self.args)
                nmcts = MCTSSingle(self.game, self.nnet, self.args)
                print('PITTING AGAINST PREVIOUS VERSION')
                arena = Arena(pmcts, nmcts, self.game, self.args)
                scores = arena.playGames(self.args.arenaCompare)

                if scores[1] == 0 or float(
                        scores[1]) / sum(scores) < self.args.updateThreshold:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(folder=self.args.checkpoint,
                                              filename='temp.h5')
                else:
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(
                        folder=self.args.checkpoint,
                        filename=self.getCheckpointFile(i))
                    self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                              filename='temp.h5')
            else:
                self.nnet.save_checkpoint(folder=self.args.checkpoint,
                                          filename='checkpoint_1.h5')
Example #18
0
    def _train_custom_loop(self, examples):
        """
        examples: list of examples, each example is of form (board, pi, v)
        """
        from pytorch_classification.utils import Bar, AverageMeter
        optimizer = optimizers.Adam(alpha=args.lr)
        optimizer.setup(self.nnet)

        for epoch in range(args.epochs):
            print('EPOCH ::: ' + str(epoch + 1))
            # self.nnet.train()
            data_time = AverageMeter()
            batch_time = AverageMeter()
            pi_losses = AverageMeter()
            v_losses = AverageMeter()
            end = time.time()

            bar = Bar('Training Net', max=int(len(examples) / args.batch_size))
            batch_idx = 0

            while batch_idx < int(len(examples) / args.batch_size):
                sample_ids = np.random.randint(len(examples),
                                               size=args.batch_size)
                boards, pis, vs = list(zip(*[examples[i] for i in sample_ids]))
                xp = self.nnet.xp
                boards = xp.array(boards, dtype=xp.float32)
                target_pis = xp.array(pis, dtype=xp.float32)
                target_vs = xp.array(vs, dtype=xp.float32)

                # measure data loading time
                data_time.update(time.time() - end)

                # compute output
                out_pi, out_v = self.nnet(boards)
                l_pi = self.loss_pi(target_pis, out_pi)
                l_v = self.loss_v(target_vs, out_v)
                total_loss = l_pi + l_v

                # record loss
                pi_loss = l_pi.data
                v_loss = l_v.data
                pi_losses.update(cuda.to_cpu(pi_loss), boards.shape[0])
                v_losses.update(cuda.to_cpu(v_loss), boards.shape[0])

                # compute gradient and do SGD step
                self.nnet.cleargrads()
                total_loss.backward()
                optimizer.update()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()
                batch_idx += 1

                # plot progress
                bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} ' \
                             '| Loss_pi: {lpi:.4f} | Loss_v: {lv:.3f}'.format(
                              batch=batch_idx,
                              size=int(len(examples)/args.batch_size),
                              data=data_time.avg,
                              bt=batch_time.avg,
                              total=bar.elapsed_td,
                              eta=bar.eta_td,
                              lpi=pi_losses.avg,
                              lv=v_losses.avg,
                              )
                bar.next()
            bar.finish()
Example #19
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """
        #Generate a fixed sensing matrix if option is toggled to True.
        #1)A is fixed. Also set arena_game_args.sensing_matrix to be equal to that of coach.game_args so the arena uses the same sensing matrix.
        #2)the folder which saves the fixed sensing matrix is empty
        if self.args['fixed_matrix'] == True:
            if self.args['load_existing_matrix'] == True:
                self.game_args.sensing_matrix = np.load(
                    self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')
                self.arena_game_args.sensing_matrix = np.load(
                    self.args['fixed_matrix_filepath'] + '/sensing_matrix.npy')

                #FOR TESTING-------------------------------------------------------
                #print(self.game_args.sensing_matrix)
                #END TESTING-------------------------------------------------------

            else:  #if not loading an existing matrix in self.args['fixed_matrix_filepath'], then generate a new sensing matrix of given type self.args['matrix_type']
                self.game_args.generateSensingMatrix(self.args['m'],
                                                     self.args['n'],
                                                     self.args['matrix_type'])
                self.arena_game_args.sensing_matrix = self.game_args.sensing_matrix
                #Save the fixed matrix
                self.game_args.save_Matrix(self.args['fixed_matrix_filepath'])

                #FOR TESTING-------------------------------------------------------
                #print(self.game_args.sensing_matrix)
                #END TESTING-------------------------------------------------------

        for i in range(1, self.args['numIters'] + 1):
            print('------ITER ' + str(i) + '------')
            if not self.skipFirstSelfPlay or i > 1:  #default of self.skipFirstSelfPlay is False. If loading training from file then skipFirstSelfPlay is set to True. skipFirstSelfPlay allows us to load the latest nn_model with latest set of TrainingExamples
                iterationTrainExamples = deque(
                    [], maxlen=self.args['maxlenOfQueue'])
                #bookkeeping objects contained in pytorch_classification.utils
                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args['numEps'])
                end = time.time()
                #IMPORTANT PART OF THE CODE. GENERATE NEW A AND NEW y HERE. EACH SELF-PLAY GAME HAS DIFFERENT A AND y.
                #-----------------------------------------------------
                for eps in range(self.args['numEps']):
                    #Initialize a new game by setting A, x, y, and then execute a single game of self play with self.executeEpisode()
                    if self.args[
                            'fixed_matrix'] == False:  #repeatedly generate sensing matrices if we are not fixing the sensing matrix.
                        self.game_args.generateSensingMatrix(
                            self.args['m'], self.args['n'],
                            self.args['matrix_type']
                        )  #generate a new sensing matrix
                    self.game_args.generateNewObsVec(
                        self.args['x_type'], self.args['sparsity']
                    )  #generate a new observed vector y. This assumes a matrix has been loaded in self.game_args!!!
                    self.mcts = MCTS(
                        self.game, self.nnet, self.args, self.game_args
                    )  #create new search tree for each game we play

                    #TESTING-------------------------
                    #print('The generated sparse vector x has sparsity: ' + str(self.game_args.game_iter))
                    #--------------------------------

                    #TESTING--------------------------
                    #print('Starting self-play game iteration: ' + str(eps))
                    #start_game = time.time()
                    #--------------------------------

                    iterationTrainExamples += self.executeEpisode(
                    )  #Play a new game with newly generated y. iterationTrainExamples is a deque containing states each generated self play game

                    #TESTING--------------------------
                    #end_game = time.time()
                    #print('Total time to play game ' + str(eps) + ' is: ' + str(end_game-start_game))
                    #-----------------------------------------------------
                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                        eps=eps + 1,
                        maxeps=self.args['numEps'],
                        et=eps_time.avg,
                        total=bar.elapsed_td,
                        eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history
                #self.trainExamplesHistory is a list of deques, where each deque contains all the states from numEps number of self-play games
                self.trainExamplesHistory.append(iterationTrainExamples)

            #Jump to here on the first iteration if we loaded an existing file into self.trainExamplesHistory from method loadTrainExamples below.
            if len(self.trainExamplesHistory
                   ) > self.args['numItersForTrainExamplesHistory']:
                print("len(trainExamplesHistory) =",
                      len(self.trainExamplesHistory),
                      " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file by calling saveTrainExamples method
            # The examples were collected using the model from the previous iteration, so (i-1)
            self.saveTrainExamples(
                i - 1
            )  #save examples to self.args['checkpoint'] folder with given iteration name of i-1

            # shuffle examples before training
            #trainExamples is the list form of trainExamplesHistory. Note that trainExamplesHistory is a list of deques,
            #where each deque contains training examples. trainExamples gets rid of the deque, and instead puts all training
            #samples in a single list, shuffled
            trainExamples = []
            for e in self.trainExamplesHistory:  #Each e is a deque
                trainExamples.extend(e)
            shuffle(trainExamples)

            #The Arena--------------------------------------------------------
            if self.args['Arena'] == True:
                self.nnet.save_checkpoint(
                    folder=self.args['network_checkpoint'],
                    filename='temp')  #copy old neural network into new one
                self.pnet.load_checkpoint(
                    folder=self.args['network_checkpoint'], filename='temp')

                #convert trainExamples into a format recognizable by Neural Network and train
                trainExamples = self.nnet.constructTraining(trainExamples)
                self.nnet.train(
                    trainExamples[0], trainExamples[1]
                )  #Train the new neural network self.nnet. The weights are now updated

                #Pit the two neural networks self.pnet and self.nnet in the arena
                print('PITTING AGAINST PREVIOUS VERSION')

                arena = Arena(
                    self.pnet, self.nnet, self.game, self.args,
                    self.arena_game_args
                )  #note that Arena will pit pnet with nnet, and Game_args A and y will change constantly. Note that next iteration, arena is a reference to a different object, so old object is deleted when there are no other references to it.
                pwins, nwins, draws = arena.playGames()

                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' %
                      (nwins, pwins, draws))
                if pwins + nwins > 0 and float(nwins) / (
                        pwins + nwins) < self.args['updateThreshold']:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(
                        folder=self.args['network_checkpoint'],
                        filename='temp')
                else:  #saves the weights(.h5) and model(.json) twice. Creates nnet_checkpoint(i-1)_model.json and nnet_checkpoint(i-1)_weights.h5, and rewrites best_model.json and best_weights.h5
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(
                        folder=self.args['network_checkpoint'],
                        filename='nnet_checkpoint' + str(i - 1))
                    self.nnet.save_checkpoint(
                        folder=self.args['network_checkpoint'],
                        filename='best')
            #-----------------------------------------------------------------

            else:  #If we do not activate Arena, then all we do is just train the network, rewrite best, and write a new file 'nnet_checkpoint' + str(i-1).
                print('TRAINING NEW NEURAL NETWORK...')
                trainExamples = self.nnet.constructTraining(trainExamples)

                #FOR TESTING-----------------------------------------------------
                #print('trainExamples feature arrays: ' + str(trainExamples[0]))
                #print('trainExamples label arrays: ' + str(trainExamples[1]))
                #END TESTING-----------------------------------------------------

                self.nnet.train(trainExamples[0],
                                trainExamples[1],
                                folder=self.args['network_checkpoint'],
                                filename='trainHistDict' + str(i - 1))

                #FOR TESTING-----------------------------------------------------
                #weights = self.nnet.nnet.model.get_weights()
                #min_max = []
                #for layer_weights in weights:
                #print('number of weights in current array in list (output as matrix size): ', layer_weights.shape)
                #layer_weights_min = np.amin(layer_weights)
                #layer_weights_max = np.amax(layer_weights)
                #min_max.append([layer_weights_min, layer_weights_max])
                #print('')
                #print('The smallest and largest weights of each layer are: ')
                #for pair in min_max:
                #print(pair)
                #print('')
                #END TESTING-----------------------------------------------------

                self.nnet.save_checkpoint(
                    folder=self.args['network_checkpoint'],
                    filename='nnet_checkpoint' + str(i - 1))
                self.nnet.save_checkpoint(
                    folder=self.args['network_checkpoint'], filename='best')
Example #20
0
    def learn(self):
        """
        Performs numIters iterations with numEps episodes of self-play in each
        iteration. After every iteration, it retrains neural network with
        examples in trainExamples (which has a maximium length of maxlenofQueue).
        It then pits the new neural network against the old one and accepts it
        only if it wins >= updateThreshold fraction of games.
        """

        for i in range(1, self.args.numIters+1):
            # bookkeeping
            print('------ITER ' + str(i) + '------')
            print(str(self.game.innerN) + "x" + str(self.game.innerM))
            # examples of the iteration
            if not self.skipFirstSelfPlay or i > 1:
                iterationTrainExamples = deque([], maxlen=self.args.maxlenOfQueue)

                eps_time = AverageMeter()
                bar = Bar('Self Play', max=self.args.numEps)
                end = time.time()
    
                for eps in range(self.args.numEps):
                    # self.mcts = MCTS(self.game, self.nnet, self.args)   # reset search tree
                    self.mcts = MCTS(self.nnet, self.args)   # reset search tree
                    iterationTrainExamples += self.executeEpisode()

    
                    # bookkeeping + plot progress
                    eps_time.update(time.time() - end)
                    end = time.time()
                    bar.suffix  = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(eps=eps+1, maxeps=self.args.numEps, et=eps_time.avg,
                                                                                                               total=bar.elapsed_td, eta=bar.eta_td)
                    bar.next()
                bar.finish()

                # save the iteration examples to the history 
                self.trainExamplesHistory.append(iterationTrainExamples)
                
            if len(self.trainExamplesHistory) > self.args.numItersForTrainExamplesHistory:
                print("len(trainExamplesHistory) =", len(self.trainExamplesHistory), " => remove the oldest trainExamples")
                self.trainExamplesHistory.pop(0)
            # backup history to a file
            # NB! the examples were collected using the model from the previous iteration, so (i-1)  
            self.saveTrainExamples(i-1)
            
            # shuffle examlpes before training
            trainExamples = []
            for e in self.trainExamplesHistory:
                trainExamples.extend(e)
            shuffle(trainExamples)

            tempfile =  'temp.pth.tar'
            bestfile =  'best.pth.tar'

            # training new network, keeping a copy of the old one
            self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=tempfile)
            self.nnet.train(trainExamples)

            if self.arenaEnabled:
                self.pnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)

                pmcts = MCTS(self.pnet, self.args)
                nmcts = MCTS(self.nnet, self.args)

                print('PITTING AGAINST PREVIOUS VERSION')
                # arena = Arena(lambda x: np.argmax(pmcts.getActionProb(x, temp=0)),
                #               lambda x: np.argmax(nmcts.getActionProb(x, temp=0)), self.game)
                arena = Arena(lambda x, y: pmcts.getActionProb(x, y, temp=0),
                           lambda x, y: nmcts.getActionProb(x, y, temp=0), self.game)
                pwins, nwins, draws = arena.playGames(self.args.arenaCompare)

                print('NEW/PREV WINS : %d / %d ; DRAWS : %d' % (nwins, pwins, draws))
                if pwins+nwins > 0 and float(nwins)/(pwins+nwins) < self.args.updateThreshold:
                    print('REJECTING NEW MODEL')
                    self.nnet.load_checkpoint(folder=self.args.checkpoint, filename=tempfile)
                else:
                    print('ACCEPTING NEW MODEL')
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=self.getCheckpointFile(i))
                    self.nnet.save_checkpoint(folder=self.args.checkpoint, filename=bestfile)
    def train(self, nReDataGeneration=1, nTrainingEpochs=20, batch_size=100):

        self.network.train()

        #        minCost = 100000.
        #        maxAccuracy = 0.
        elapsed_time = 0.
        #        current_epoch = 0

        # train my model
        print('Learning Started!')
        start_time = time.perf_counter()

        #############################################################3

        self.data.getBlockImages(blockH=self.featureH,
                                 blockW=self.featureW,
                                 nOKperClass=40,
                                 nNGperClass=40,
                                 classNoList=self.args.classNoList,
                                 label_type='index',
                                 isTrain=False)
        #############################################################
        current_accuracy = 0
        max_accuracy = self.args.optimalAccuracyThreshold
        for i in range(nReDataGeneration):

            eps_time = AverageMeter()
            bar = Bar('Training ' + str(i), max=self.args.nTrainingEpochs)
            end = time.time()

            self.data.getBlockImages(blockH=self.featureH,
                                     blockW=self.featureW,
                                     nOKperClass=160,
                                     nNGperClass=160,
                                     classNoList=self.args.classNoList,
                                     label_type='index',
                                     isTrain=True)

            Xnp = self.data.train.images
            Ynp = self.data.train.labels
            x = torch.from_numpy(
                Xnp.reshape([-1, self.featureC, self.featureW, self.featureH]))
            y = torch.from_numpy(Ynp)
            dataset = TensorDataset(data_tensor=x, target_tensor=y)
            self.train_loader = DataLoader(dataset,
                                           batch_size=self.args.batch_size,
                                           shuffle=True)

            for epoch in range(1, nTrainingEpochs + 1):
                for k, [image, label] in enumerate(self.train_loader):

                    image = Variable(image)
                    label = Variable(label)

                    if self.args.isGPU:
                        image = image.cuda()
                        label = label.cuda()

                    self.optimizer.zero_grad()

                    output = self.network(image)
                    output = self.softmax(output)
                    #                    print(output.size())
                    #                    print(label.size())
                    loss = self.loss_func(output, label)

                    loss.backward()
                    self.optimizer.step()

                    # bookkeeping + plot progress
                eps_time.update(time.time() - end)
                end = time.time()
                bar.suffix = '({eps}/{maxeps}) Eps Time: {et:.3f}s | Total: {total:} | ETA: {eta:}'.format(
                    eps=epoch,
                    maxeps=self.args.nTrainingEpochs,
                    et=eps_time.avg,
                    total=bar.elapsed_td,
                    eta=bar.eta_td)
                bar.next()

                if epoch % 2 == 0:
                    print(
                        '--------------------------------------------------------------------'
                    )
                    torch.save(self.network, self.saveModelPath)
                    torch.save(
                        self.network.state_dict(), self.saveParamsPath
                    )  # It saves only the model parameters (recommended)

                    torch.save(self.network, self.backupModelPath)
                    torch.save(self.network.state_dict(),
                               self.backupParamsPath)

                    current_accuracy = commander.getCurrentAccuracy(
                        batch_size=256, numIter=1, isTrainData=False)
                    if self.args.isGPU:
                        loss = loss.cpu()
#                        current_accuracy = current_accuracy.cpu()
#                    current_accuracy = current_accuracy.data.numpy()[0]
                    self.network.train()
                    print(
                        '|=====================================================================|'
                    )
                    print(
                        '|===== Epoch : %04d' % (i * nTrainingEpochs + epoch),
                        "======================|")
                    print('|===== Loss : ',
                          loss.data.numpy()[0], "========================|")
                    print(
                        "|===== Current accuracy : %.1f" %
                        (current_accuracy * 100.), "% =====|")
                    print(
                        '|=====================================================================|'
                    )
                    if current_accuracy >= max_accuracy:
                        max_accuracy = current_accuracy
                        torch.save(self.network, self.optimalModelPath)
                        torch.save(self.network.state_dict(),
                                   self.optimalParamsPath)
                        break

            if current_accuracy >= max_accuracy:
                break

            bar.finish()

        elapsed_time = (time.perf_counter() - start_time)

        #        accuracy_train = self.getCurrentAccuracy(batch_size=self.args.batch_size, isTrainData=True)
        #        accuracy_test = self.getCurrentAccuracy(batch_size=self.args.batch_size, isTrainData=False)
        #        if args.isGPU:
        #            accuracy_train = accuracy_train.cpu()
        #            accuracy_test = accuracy_test.cpu()

        print(
            '====================================================================='
        )
        #        print("Accuracy for training data : %.1f" % (accuracy_train.data.numpy()[0]*100.), "%")
        #        print("Accuracy for test data : %.1f" % (accuracy_test.data.numpy()[0]*100.), "%")
        print('Elapsed %.3f seconds.' % elapsed_time)
        print('%.0f h' % (elapsed_time / 3600),
              '%.0f m' % ((elapsed_time % 3600) / 60),
              '%.0f s' % (elapsed_time % 60))
        print('Learning Finished!')
        print(
            '====================================================================='
        )