Esempio n. 1
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    use_cuda = torch.cuda.is_available()

    tcn = create_model(use_cuda)
    tcn = torch.nn.DataParallel(tcn, device_ids=range(torch.cuda.device_count()))
    triplet_builder = builder(args.n_views, \
        args.train_directory, args.train_directory_depth, IMAGE_SIZE, args, sample_size=50)

    queue = multiprocessing.Queue(1)
    dataset_builder_process = multiprocessing.Process(target=build_set, args=(queue, triplet_builder, logger), daemon=True)
    dataset_builder_process.start()

    optimizer = optim.SGD(tcn.parameters(), lr=args.lr_start, momentum=0.9)
    # This will diminish the learning rate at the milestones.
    # 0.1, 0.01, 0.001
    learning_rate_scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30, 50, 100], gamma=0.5)

    criterion = nn.CrossEntropyLoss()

    trn_losses_ = []
    val_losses_= []
    val_acc_margin_ = []
    val_acc_no_margin_ = []

    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        print("=" * 20)
        logger.info("Starting epoch: {0} learning rate: {1}".format(epoch,
            learning_rate_scheduler.get_lr()))
        learning_rate_scheduler.step()

        dataset = queue.get()
        data_loader = DataLoader(
            dataset=dataset,
            batch_size=args.minibatch_size, # batch_size(epoch, args.max_minibatch_size),
            shuffle=True,
            pin_memory=use_cuda,
        )


        for _ in range(0, ITERATE_OVER_TRIPLETS):
            losses = []

            for frames, features in data_loader:
                # frames = Variable(minibatch)
                if use_cuda:
                    frames = frames.cuda()
                    features = features.cuda()
                anchor_frames = frames[:, 0, :, :, :]
                positive_frames = frames[:, 1, :, :, :]
                negative_frames = frames[:, 2, :, :, :]
                anchor_features = features[:, 0, :, :, :]
                positive_features = features[:, 1, :, :, :]
                negative_features = features[:, 2, :, :, :]

                anchor_output, unnormalized, _ = tcn(anchor_frames, anchor_features)
                positive_output, _, _ = tcn(positive_frames, positive_features)
                negative_output, _, _ = tcn(negative_frames, negative_features)

                d_positive = distance(anchor_output, positive_output)
                d_negative = distance(anchor_output, negative_output)

                loss_triplet = torch.clamp(args.margin + d_positive - d_negative, min=0.0).mean()
                loss = loss_triplet
                losses.append(loss.data.cpu().numpy())


                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        trn_losses_.append(np.mean(losses))
        logger.info('train loss: ', np.mean(losses))

        if epoch % 1 == 0:
            acc_margin, acc_no_margin, loss = validate(tcn, use_cuda, args)
            val_losses_.append(loss)
            val_acc_margin_.append(acc_margin)
            val_acc_no_margin_.append(acc_no_margin)

        if epoch % args.save_every == 0 and epoch != 0:
            logger.info('Saving model.')
            save_model(tcn, model_filename(args.model_name, epoch), args.model_folder)
        plot_mean(trn_losses_, args.model_folder, 'train_loss')
        plot_mean(val_losses_, args.model_folder, 'validation_loss')
        # plot_mean(train_acc_, args.model_folder, 'train_acc')
        plot_mean(val_acc_margin_, args.model_folder, 'validation_accuracy_margin')
        plot_mean(val_acc_no_margin_, args.model_folder, 'validation_accuracy_no_margin')
Esempio n. 2
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    use_cuda = torch.cuda.is_available()

    tcn = create_model(use_cuda)
    tcn = torch.nn.DataParallel(tcn,
                                device_ids=range(torch.cuda.device_count()))
    triplet_builder = builder(args.n_views, \
        args.train_directory, IMAGE_SIZE, args, sample_size=SAMPLE_SIZE)

    queue = multiprocessing.Queue(1)
    dataset_builder_process = multiprocessing.Process(target=build_set,
                                                      args=(queue,
                                                            triplet_builder,
                                                            logger),
                                                      daemon=True)
    dataset_builder_process.start()

    optimizer = optim.SGD(tcn.parameters(), lr=args.lr_start, momentum=0.9)
    # This will diminish the learning rate at the milestones.
    # 0.1, 0.01, 0.001
    learning_rate_scheduler = lr_scheduler.MultiStepLR(
        optimizer, milestones=[200, 500, 1000], gamma=0.1)

    criterion = nn.CrossEntropyLoss()

    trn_losses_ = []
    val_losses_ = []

    n_iter = 0
    n_valid_iter = 0
    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        print("=" * 20)
        logger.info("Starting epoch: {0} learning rate: {1}".format(
            epoch, learning_rate_scheduler.get_lr()))
        learning_rate_scheduler.step()

        dataset = queue.get()
        data_loader = DataLoader(
            dataset=dataset,
            batch_size=args.
            minibatch_size,  # batch_size(epoch, args.max_minibatch_size),
            shuffle=True,
            pin_memory=use_cuda,
        )

        for _ in range(0, ITERATE_OVER_TRIPLETS):
            losses = []
            for minibatch in data_loader:
                # frames = Variable(minibatch, require_grad=False)
                loss = loss_fn(tcn, minibatch)

                losses.append(loss.data.cpu().numpy())

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        writer.add_scalar('data/train_loss', np.mean(losses), n_iter)
        n_iter += 1
        trn_losses_.append(np.mean(losses))
        logger.info('train loss: ', np.mean(losses))

        if epoch % 1 == 0:
            loss, n_valid_iter = validate(tcn, use_cuda, n_valid_iter)
            val_losses_.append(loss)

        if epoch % args.save_every == 0 and epoch != 0:
            logger.info('Saving model.')
            save_model(tcn, model_filename(args.model_name, epoch),
                       args.model_folder)
        plot_mean(trn_losses_, args.model_folder, 'train_loss')
        plot_mean(val_losses_, args.model_folder, 'validation_loss')
Esempio n. 3
0
    def train(self):

        trn_losses_ = []
        val_losses_ = []
        val_acc_ = []
        trn_acc_ = []

        for epoch in range(self.args.start_epoch,
                           self.args.start_epoch + self.args.epochs):
            print("=" * 20)
            self.logger.info("Starting epoch: {0} ".format(epoch))

            dataset = self.training_queue.get()
            data_loader = DataLoader(
                dataset=dataset,
                batch_size=self.args.
                minibatch_size,  # batch_size(epoch, self.args.max_minibatch_size),
                shuffle=True,
                pin_memory=self.use_cuda,
            )

            train_embedding_features_buffer = []
            train_images_buffer = []

            correct = 0

            for _ in range(0, 1):
                losses = []

                for minibatch in data_loader:
                    if self.use_cuda:
                        anchor_frames = minibatch[0].cuda()
                        #anchor_euler_reparam = minibatch[1].cuda() # load as 3x3 rotation matrix
                        anchor_rots = minibatch[1].cuda(
                        )  # load as 3x3 rotation matrix
                    # frames = Variable(minibatch)
                    loss, a_pred = self.loss_fn(self.model, anchor_frames,
                                                anchor_rots)
                    losses.append(loss.data.cpu().numpy())
                    anchor_euler = euler_XYZ_to_reparam(
                        apply(rotationMatrixToEulerAngles, anchor_rots))
                    correct += (torch.norm(
                        a_pred - anchor_euler, 2) < 0.1).data.cpu().numpy(
                        ).sum()  # print(gradcheck(loss_fn, (tcn, minibatch,)))
                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()

                    # Add embeddings
                    train_embedding_features_buffer.append(
                        apply(rotationMatrixToEulerAngles, anchor_rots))
                    train_images_buffer.append(anchor_frames)
            print("logging to {}".format(self.logdir))

            self.writer.add_scalar('data/train_loss', np.mean(losses),
                                   self.itr)
            self.writer.add_scalar('data/train_correct',
                                   correct / len(data_loader), self.itr)
            self.itr += 1
            trn_losses_.append(np.mean(losses))
            self.logger.info('train loss: ', np.mean(losses))
            self.logger.info(
                "Training score correct  {correct}/{total}".format(
                    correct=correct, total=len(data_loader)))
            trn_acc_.append(correct)

            self.writer.add_image('frame_1', minibatch[0][0], self.itr)

            # Get embeddings
            features = torch.cat(
                train_embedding_features_buffer[:30]).squeeze_()
            # features = train_embedding_features_buffer.view(train_embedding_features_buffer.shape[0]*train_embedding_features_buffer.shape[1], -1)
            # label = torch.Tensor(np.asarray(label_buffer))
            images = torch.cat(
                train_images_buffer[:30]).squeeze_()  #/255.0, [0, 3, 1, 2]
            self.writer.add_embedding(features,
                                      label_img=images,
                                      global_step=epoch)

            if epoch % 1 == 0:
                loss, correct = self.validate()
                self.learning_rate_scheduler.step(loss)
                val_losses_.append(loss)
                val_acc_.append(correct)

            if epoch % self.args.save_every == 0 and epoch != 0:
                self.logger.info('Saving model.')
                self.save_model(
                    self.model, self.model_filename(self.args.model_name,
                                                    epoch),
                    join(self.model_folder, 'weight_files'))
                print("logging to {}".format(self.logdir))

            plot_mean(trn_losses_, self.model_folder, 'train_loss')
            plot_mean(val_losses_, self.model_folder, 'validation_loss')
            plot_mean(trn_acc_, self.model_folder, 'train_acc')
            plot_mean(val_acc_, self.model_folder, 'validation_accuracy')
Esempio n. 4
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    use_cuda = torch.cuda.is_available()

    tcn = create_model(use_cuda)
    tcn = torch.nn.DataParallel(tcn, device_ids=(range(torch.cuda.device_count()))) # Wrapper to distribute load on multiple GPUs
    attribute_classifier = DenseClassifier(num_classes=5).to(device) # load labeling network

    # triplet_builder = builder(args.n_views, \
    #     args.train_directory, args.labels_train_directory, IMAGE_SIZE, args, sample_size=200)

    # queue = multiprocessing.Queue(1)
    # dataset_builder_process = multiprocessing.Process(target=build_set, args=(queue, triplet_builder, logger), daemon=True)
    # dataset_builder_process.start()

    optimizer = optim.SGD(list(tcn.parameters()) + list(attribute_classifier.parameters()), lr=args.lr_start, momentum=0.9)
    # This will diminish the learning rate at the milestones.
    # 0.1, 0.01, 0.001
    learning_rate_scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[100, 200, 500], gamma=0.1)

    criterion = nn.CrossEntropyLoss()

    trn_losses_ = []
    val_losses_= []
    val_acc_margin_ = []
    val_acc_no_margin_ = []
    dataset = MultiViewTripletLabelDataset( args.n_views, args.train_directory, args.labels_train_directory, IMAGE_SIZE, sample_size=64)

    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        losses = []

        print("=" * 20)
        logger.info("Starting epoch: {0} learning rate: {1}".format(epoch,
            learning_rate_scheduler.get_lr()))
        learning_rate_scheduler.step()

        # dataset = queue.get()
        data_loader = DataLoader(
            dataset=dataset,
            batch_size=args.minibatch_size, # batch_size(epoch, args.max_minibatch_size),
            shuffle=True,
            pin_memory=use_cuda,
        )

        for i, minibatch in enumerate(data_loader):
            frames = minibatch[0]
            caption = minibatch[1]
            seq_idx = minibatch[2]
            if use_cuda:
                frames = frames[0].to(device)
                captions = caption[0].to(device)  
                seq_idx = seq_idx[0]
            snaps = dataset.get_videos(int(seq_idx[0]) * args.n_views)
            all_sel_imgs = []
            sel_imgs = snaps[0][::20]
            all_sel_imgs.append(sel_imgs)
            # all_sel_imgs = np.squeeze(np.asarray(all_sel_imgs)).reshape([sel_imgs.shape[0], 3, 299, 299])

            for _ in range(0, ITERATE_OVER_TRIPLETS):

                _, unnorm, all_sel_imgs_emb = tcn(torch.FloatTensor(sel_imgs).to(device))
                all_sel_imgs_emb = torch.mean(all_sel_imgs_emb, dim=0, keepdim=True)

                anchor_frames = frames[:, 0, :, :, :]
                positive_frames = frames[:, 1, :, :, :]
                negative_frames = frames[:, 2, :, :, :]

                anchor_output, unnormalized, _ = tcn(anchor_frames)
                positive_output, _, _ = tcn(positive_frames)
                negative_output, _, _ = tcn(negative_frames)

                d_positive = distance(anchor_output, positive_output)
                d_negative = distance(anchor_output, negative_output)
                # features = encoder(anchor_frames)
                loss_triplet = torch.clamp(args.margin + d_positive - d_negative, min=0.0).mean()
                if i 
                label_outputs_1, label_outputs_2 = attribute_classifier(all_sel_imgs_emb)
                labels_1 = captions[0, 0].view(-1)
                labels_2 = captions[0, 1].view(-1)
                loss_1 = criterion(label_outputs_1, labels_1)
                loss_2 = criterion(label_outputs_2, labels_2) 
                loss_language = loss_1 + loss_2               
                loss = loss_triplet + args.alpha * loss_language
                # loss = loss_language

                losses.append(loss.data.cpu().numpy())


                tcn.zero_grad()
                attribute_classifier.zero_grad()
                loss.backward()
                optimizer.step()
            if (i+1) % 5 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss Triplet: {:.4f}, Loss Language: {:.4f}' 
                    .format(epoch+1, args.epochs, i+1, len(dataset), loss_triplet.item(), loss_language.item()))


        trn_losses_.append(np.mean(losses))

        if epoch % 1 == 0:
            acc_margin, acc_no_margin, loss = validate(tcn, attribute_classifier, criterion, use_cuda, args)
            val_losses_.append(loss)
            val_acc_margin_.append(acc_margin)
            val_acc_no_margin_.append(acc_no_margin)

        if epoch % args.save_every == 0 and epoch != 0:
            logger.info('Saving model.')
            save_model(tcn, model_filename(args.model_name, epoch), args.model_folder)
        plot_mean(trn_losses_, args.model_folder, 'train_loss')
        plot_mean(val_losses_, args.model_folder, 'validation_loss')
        # plot_mean(train_acc_, args.model_folder, 'train_acc')
        plot_mean(val_acc_margin_, args.model_folder, 'validation_accuracy_margin')
        plot_mean(val_acc_no_margin_, args.model_folder, 'validation_accuracy_no_margin')





if __name__ == '__main__':
    main()
Esempio n. 5
0
    def train(self):

        trn_losses_ = []
        val_losses_= []
        val_acc_margin_ = []
        val_acc_no_margin_ = []

        for epoch in range(self.args.start_epoch, self.args.start_epoch + self.args.epochs):
            print("=" * 20)
            self.logger.info("Starting epoch: {0} ".format(epoch))

            dataset = self.training_queue.get()
            data_loader = DataLoader(
                dataset=dataset,
                batch_size=self.args.minibatch_size, # batch_size(epoch, self.args.max_minibatch_size),
                shuffle=True,
                pin_memory=self.use_cuda,
            )
            
            train_embedding_features_buffer = []
            train_images_buffer = []
            
            for _ in range(0, ITERATE_OVER_TRIPLETS):
                losses = []

                for minibatch, _ in data_loader:
                    # frames = Variable(minibatch)
                    if self.use_cuda:
                        frames = minibatch.cuda()
                    anchor_frames = frames[:, 0, :, :, :]
                    positive_frames = frames[:, 1, :, :, :]
                    negative_frames = frames[:, 2, :, :, :]
            
                    anchor_output, unnormalized, _ = self.model(anchor_frames)
                    positive_output, _, _ = self.model(positive_frames)
                    negative_output, _, _ = self.model(negative_frames)

                    d_positive = distance(anchor_output, positive_output)
                    d_negative = distance(anchor_output, negative_output)

                    loss_triplet = torch.clamp(self.args.margin + d_positive - d_negative, min=0.0).mean()
                    loss = loss_triplet
                    losses.append(loss.data.cpu().numpy())

                    self.optimizer.zero_grad()
                    loss.backward()
                    self.optimizer.step()

                    # Add embeddings
                    train_embedding_features_buffer.append(anchor_output)
                    train_images_buffer.append(anchor_frames)
            print("logging to {}".format(self.logdir))

            self.writer.add_scalar('data/train_triplet_loss', np.mean(losses), self.itr)
            self.itr += 1  
            trn_losses_.append(np.mean(losses))
            self.logger.info('train loss: ', np.mean(losses))
            self.writer.add_image('frame_anchor', minibatch[0][0], 0) 
            self.writer.add_image('frame_positive', minibatch[0][1], 1) 
            self.writer.add_image('frame_negative', minibatch[0][2], 2) 

            # Get embeddings
            features = torch.cat(train_embedding_features_buffer).squeeze_()
            # features = train_embedding_features_buffer.view(train_embedding_features_buffer.shape[0]*train_embedding_features_buffer.shape[1], -1)
            # label = torch.Tensor(np.asarray(label_buffer))
            images = torch.cat(train_images_buffer).squeeze_()#/255.0, [0, 3, 1, 2]
            self.writer.add_embedding(features, label_img=images, global_step=epoch)
            
            if epoch % 1 == 0:
                acc_margin, acc_no_margin, loss  = self.validate()
                self.learning_rate_scheduler.step(loss)
                val_losses_.append(loss)
                val_acc_margin_.append(acc_margin)
                val_acc_no_margin_.append(acc_no_margin)

            if epoch % self.args.save_every == 0 and epoch != 0:
                self.logger.info('Saving model.')
                self.save_model(self.model, self.model_filename(self.args.model_name, epoch), join(self.model_folder, 'weight_files'))
                print("logging to {}".format(self.logdir))

            plot_mean(trn_losses_, self.model_folder, 'train_loss')
            plot_mean(val_losses_, self.model_folder, 'validation_loss')
            # plot_mean(train_acc_, self.args.model_folder, 'train_acc')
            plot_mean(val_acc_margin_, self.model_folder, 'validation_accuracy_margin')
            plot_mean(val_acc_no_margin_, self.model_folder, 'validation_accuracy_no_margin')
Esempio n. 6
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    use_cuda = torch.cuda.is_available()

    tcn = create_model(use_cuda)
    tcn = torch.nn.DataParallel(
        tcn, device_ids=(range(torch.cuda.device_count())
                         ))  # Wrapper to distribute load on multiple GPUs
    attribute_classifier = DenseClassifier(num_classes=5).to(
        device)  # load labeling network

    triplet_builder = builder(args.n_views, \
        args.train_directory, args.labels_train_directory, IMAGE_SIZE, args, sample_size=32)

    queue = multiprocessing.Queue(1)
    dataset_builder_process = multiprocessing.Process(target=build_set,
                                                      args=(queue,
                                                            triplet_builder,
                                                            logger),
                                                      daemon=True)
    dataset_builder_process.start()

    optimizer = optim.SGD(list(tcn.parameters()) +
                          list(attribute_classifier.parameters()),
                          lr=args.lr_start,
                          momentum=0.9)
    # This will diminish the learning rate at the milestones.
    # 0.1, 0.01, 0.001
    learning_rate_scheduler = lr_scheduler.MultiStepLR(
        optimizer, milestones=[100, 200, 500], gamma=0.1)

    criterion = nn.CrossEntropyLoss()

    trn_losses_ = []
    val_losses_ = []
    val_acc_margin_ = []
    val_acc_no_margin_ = []

    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        losses = []

        print("=" * 20)
        logger.info("Starting epoch: {0} learning rate: {1}".format(
            epoch, learning_rate_scheduler.get_lr()))
        learning_rate_scheduler.step()

        dataset = queue.get()
        data_loader = DataLoader(
            dataset=dataset,
            batch_size=args.
            minibatch_size,  # batch_size(epoch, args.max_minibatch_size),
            shuffle=True,
            pin_memory=use_cuda,
        )
        for _ in range(0, ITERATE_OVER_TRIPLETS):

            for i, minibatch in enumerate(data_loader):

                frames = minibatch[0]
                captions = minibatch[1]
                if use_cuda:
                    frames = frames.cuda()
                    captions = captions.to(device)
                print(captions)
                print(len(data_loader))
                anchor_frames = frames[:, 0, :, :, :]
                positive_frames = frames[:, 1, :, :, :]
                negative_frames = frames[:, 2, :, :, :]
                anchor_output, unnormalized, mixed = tcn(anchor_frames)
                positive_output, _, _ = tcn(positive_frames)
                negative_output, _, _ = tcn(negative_frames)

                d_positive = distance(anchor_output, positive_output)
                d_negative = distance(anchor_output, negative_output)
                # features = encoder(anchor_frames)
                loss_triplet = torch.clamp(args.margin + d_positive -
                                           d_negative,
                                           min=0.0).mean()

                label_outputs_1, label_outputs_2 = attribute_classifier(mixed)
                labels_1 = captions[:, 0]
                # labels_2 = captions[:, 1]
                loss_1 = criterion(label_outputs_1, labels_1)
                # loss_2 = criterion(label_outputs_2, labels_2)
                loss_language = loss_1  #+ loss_2

                # loss = loss_triplet + args.alpha * loss_language
                loss = loss_language
                # loss = loss_triplet
                losses.append(loss.data.cpu().numpy())

                tcn.zero_grad()
                attribute_classifier.zero_grad()
                loss.backward()
                optimizer.step()
        trn_losses_.append(np.mean(losses))
        logger.info('train loss: ', np.mean(losses))

        if epoch % 1 == 0:
            acc_margin, acc_no_margin, loss = validate(tcn,
                                                       attribute_classifier,
                                                       criterion, use_cuda,
                                                       args)
            val_losses_.append(loss)
            val_acc_margin_.append(acc_margin)
            val_acc_no_margin_.append(acc_no_margin)

        if epoch % args.save_every == 0 and epoch != 0:
            logger.info('Saving model.')
            save_model(tcn, model_filename(args.model_name, epoch),
                       args.model_folder)
        plot_mean(trn_losses_, args.model_folder, 'train_loss')
        plot_mean(val_losses_, args.model_folder, 'validation_loss')
        # plot_mean(train_acc_, args.model_folder, 'train_acc')
        plot_mean(val_acc_margin_, args.model_folder,
                  'validation_accuracy_margin')
        plot_mean(val_acc_no_margin_, args.model_folder,
                  'validation_accuracy_no_margin')
Esempio n. 7
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    use_cuda = torch.cuda.is_available()

    tcn = create_model(use_cuda)

    dummy_state = Variable(torch.rand(1, 2, 3, 299, 299).cuda())
    dummy_action = Variable(torch.rand(1, 3).cuda())
    writer.add_graph(tcn, (dummy_state, ))

    tcn = torch.nn.DataParallel(tcn,
                                device_ids=range(torch.cuda.device_count()))
    triplet_builder = builder(args.n_views, \
        args.train_directory, IMAGE_SIZE, args, sample_size=SAMPLE_SIZE, n_seqs=TRAIN_SEQS_PER_EPOCH)

    queue = multiprocessing.Queue(1)
    dataset_builder_process = multiprocessing.Process(target=build_set,
                                                      args=(queue,
                                                            triplet_builder,
                                                            logger),
                                                      daemon=True)
    dataset_builder_process.start()

    #optimizer = optim.SGD(tcn.parameters(), lr=args.lr_start, momentum=0.9)
    optimizer = optim.Adam(tcn.parameters(), lr=0.001)
    # This will diminish the learning rate at the milestones.
    # 0.1, 0.01, 0.001
    learning_rate_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min')

    criterion = nn.CrossEntropyLoss()

    trn_losses_ = []
    val_losses_ = []

    n_iter = 0
    n_valid_iter = 0
    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        print("=" * 20)
        logger.info("Starting epoch: {0}".format(epoch))

        dataset = queue.get()
        data_loader = DataLoader(
            dataset=dataset,
            batch_size=args.
            minibatch_size,  # batch_size(epoch, args.max_minibatch_size),
            shuffle=True,
            pin_memory=use_cuda,
        )

        losses = []
        for _ in range(0, ITERATE_OVER_TRIPLETS):
            for minibatch in data_loader:
                # frames = Variable(minibatch, require_grad=False)
                loss = loss_fn(tcn, minibatch)
                losses.append(loss.data.cpu().numpy())
                # print(gradcheck(loss_fn, (tcn, minibatch,)))
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        writer.add_scalar('data/train_loss', np.mean(losses), n_iter)
        n_iter += 1
        trn_losses_.append(np.mean(losses))
        logger.info('train loss: ', np.mean(losses))
        writer.add_image('frame_1', minibatch[0][0], 0)
        #writer.add_image('frame_2', minibatch[0][1],0)
        #writer.add_image('frame_3', minibatch[0][2],0)
        if epoch % 1 == 0:
            loss, n_valid_iter = validate(tcn, use_cuda, n_valid_iter)
            learning_rate_scheduler.step(loss)
            val_losses_.append(loss)

        if epoch % args.save_every == 0 and epoch != 0:
            logger.info('Saving model to {}'.format(
                join(args.model_folder, model_filename(args.model_name,
                                                       epoch))))
            save_model(tcn, model_filename(args.model_name, epoch),
                       args.model_folder)
        plot_mean(trn_losses_, args.model_folder, 'train_loss')
        plot_mean(val_losses_, args.model_folder, 'validation_loss')
Esempio n. 8
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    use_cuda = torch.cuda.is_available()

    tcn = create_model(use_cuda)
    encoder = EncoderCNN(args.embed_size).to(device)
    decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), \
        args.num_layers).to(device)
    triplet_builder = builder(args.n_views, \
        args.train_directory, IMAGE_SIZE, vocab, args, sample_size=200)

    queue = multiprocessing.Queue(1)
    dataset_builder_process = multiprocessing.Process(target=build_set,
                                                      args=(queue,
                                                            triplet_builder,
                                                            logger),
                                                      daemon=True)
    dataset_builder_process.start()

    opt_params = list(tcn.parameters()) + list(decoder.parameters()) + list(
        encoder.parameters())
    optimizer = optim.SGD(opt_params, lr=args.lr_start, momentum=0.9)
    # This will diminish the learning rate at the milestones.
    # 0.1, 0.01, 0.001
    learning_rate_scheduler = lr_scheduler.MultiStepLR(
        optimizer, milestones=[100, 500, 1000], gamma=0.1)

    criterion = nn.CrossEntropyLoss()
    for epoch in range(args.start_epoch, args.start_epoch + args.epochs):
        print("=" * 20)
        logger.info("Starting epoch: {0} learning rate: {1}".format(
            epoch, learning_rate_scheduler.get_lr()))
        learning_rate_scheduler.step()

        dataset = queue.get()
        logger.info("Got {0} triplets".format(len(dataset)))
        data_loader = DataLoader(
            dataset=dataset,
            batch_size=args.
            minibatch_size,  # batch_size(epoch, args.max_minibatch_size),
            shuffle=True,
            pin_memory=use_cuda,
            collate_fn=collate_fn)

        if epoch % 10 == 0:
            validate(tcn, decoder, use_cuda, args)
        for _ in range(0, ITERATE_OVER_TRIPLETS):
            losses = []
            for minibatch, captions, lengths in data_loader:
                frames = Variable(minibatch)
                if use_cuda:
                    frames = frames.cuda()
                    captions = captions.to(device)
                anchor_frames = frames[:, 0, :, :, :]
                positive_frames = frames[:, 1, :, :, :]
                negative_frames = frames[:, 2, :, :, :]

                anchor_output, unnormalized = tcn(anchor_frames)
                positive_output, _ = tcn(positive_frames)
                negative_output, _ = tcn(negative_frames)

                d_positive = distance(anchor_output, positive_output)
                d_negative = distance(anchor_output, negative_output)
                targets = pack_padded_sequence(captions,
                                               lengths,
                                               batch_first=True)[0]
                # features = encoder(anchor_frames)

                caption_outputs = decoder(unnormalized, captions, lengths)
                loss_triplet = torch.clamp(args.margin + d_positive -
                                           d_negative,
                                           min=0.0).mean()
                loss_language = criterion(caption_outputs, targets)
                loss = loss_triplet + args.alpha * loss_language
                losses.append(loss.data.cpu().numpy())

                tcn.zero_grad()
                decoder.zero_grad()
                encoder.zero_grad()
                loss.backward()
                optimizer.step()

            logger.info('loss: ', np.mean(losses))
        # Generate an caption from the image
        _, sample_feature = tcn(frames[0, 0, :, :, :][None])
        sampled_ids = decoder.sample(sample_feature)
        sampled_ids = sampled_ids[0].cpu().numpy(
        )  # (1, max_seq_length) -> (max_seq_length)                sampled_caption = []
        sampled_caption = []
        for word_id in captions[0, :].cpu().numpy():
            word = vocab.idx2word[word_id]
            sampled_caption.append(word)
        sentence = ' '.join(sampled_caption)
        print(
            "Target: ",
            sentence,
        )
        for word_id in sampled_ids:
            word = vocab.idx2word[word_id]
            sampled_caption.append(word)
            if word == '<end>':
                break
        sentence = ' '.join(sampled_caption)
        print("Prediction: ", sentence)

        if epoch % args.save_every == 0 and epoch != 0:
            logger.info('Saving model.')
            save_model(tcn, model_filename(args.model_name, epoch),
                       args.model_folder)
        plot_mean(train_loss_, save_dir, 'train_loss')
        plot_mean(test_loss_, save_dir, 'test_loss')
        plot_mean(train_acc_, save_dir, 'train_acc')
        plot_mean(test_acc_, save_dir, 'test_acc')