def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') use_cuda = torch.cuda.is_available() tcn = create_model(use_cuda) tcn = torch.nn.DataParallel(tcn, device_ids=range(torch.cuda.device_count())) triplet_builder = builder(args.n_views, \ args.train_directory, args.train_directory_depth, IMAGE_SIZE, args, sample_size=50) queue = multiprocessing.Queue(1) dataset_builder_process = multiprocessing.Process(target=build_set, args=(queue, triplet_builder, logger), daemon=True) dataset_builder_process.start() optimizer = optim.SGD(tcn.parameters(), lr=args.lr_start, momentum=0.9) # This will diminish the learning rate at the milestones. # 0.1, 0.01, 0.001 learning_rate_scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30, 50, 100], gamma=0.5) criterion = nn.CrossEntropyLoss() trn_losses_ = [] val_losses_= [] val_acc_margin_ = [] val_acc_no_margin_ = [] for epoch in range(args.start_epoch, args.start_epoch + args.epochs): print("=" * 20) logger.info("Starting epoch: {0} learning rate: {1}".format(epoch, learning_rate_scheduler.get_lr())) learning_rate_scheduler.step() dataset = queue.get() data_loader = DataLoader( dataset=dataset, batch_size=args.minibatch_size, # batch_size(epoch, args.max_minibatch_size), shuffle=True, pin_memory=use_cuda, ) for _ in range(0, ITERATE_OVER_TRIPLETS): losses = [] for frames, features in data_loader: # frames = Variable(minibatch) if use_cuda: frames = frames.cuda() features = features.cuda() anchor_frames = frames[:, 0, :, :, :] positive_frames = frames[:, 1, :, :, :] negative_frames = frames[:, 2, :, :, :] anchor_features = features[:, 0, :, :, :] positive_features = features[:, 1, :, :, :] negative_features = features[:, 2, :, :, :] anchor_output, unnormalized, _ = tcn(anchor_frames, anchor_features) positive_output, _, _ = tcn(positive_frames, positive_features) negative_output, _, _ = tcn(negative_frames, negative_features) d_positive = distance(anchor_output, positive_output) d_negative = distance(anchor_output, negative_output) loss_triplet = torch.clamp(args.margin + d_positive - d_negative, min=0.0).mean() loss = loss_triplet losses.append(loss.data.cpu().numpy()) optimizer.zero_grad() loss.backward() optimizer.step() trn_losses_.append(np.mean(losses)) logger.info('train loss: ', np.mean(losses)) if epoch % 1 == 0: acc_margin, acc_no_margin, loss = validate(tcn, use_cuda, args) val_losses_.append(loss) val_acc_margin_.append(acc_margin) val_acc_no_margin_.append(acc_no_margin) if epoch % args.save_every == 0 and epoch != 0: logger.info('Saving model.') save_model(tcn, model_filename(args.model_name, epoch), args.model_folder) plot_mean(trn_losses_, args.model_folder, 'train_loss') plot_mean(val_losses_, args.model_folder, 'validation_loss') # plot_mean(train_acc_, args.model_folder, 'train_acc') plot_mean(val_acc_margin_, args.model_folder, 'validation_accuracy_margin') plot_mean(val_acc_no_margin_, args.model_folder, 'validation_accuracy_no_margin')
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') use_cuda = torch.cuda.is_available() tcn = create_model(use_cuda) tcn = torch.nn.DataParallel(tcn, device_ids=range(torch.cuda.device_count())) triplet_builder = builder(args.n_views, \ args.train_directory, IMAGE_SIZE, args, sample_size=SAMPLE_SIZE) queue = multiprocessing.Queue(1) dataset_builder_process = multiprocessing.Process(target=build_set, args=(queue, triplet_builder, logger), daemon=True) dataset_builder_process.start() optimizer = optim.SGD(tcn.parameters(), lr=args.lr_start, momentum=0.9) # This will diminish the learning rate at the milestones. # 0.1, 0.01, 0.001 learning_rate_scheduler = lr_scheduler.MultiStepLR( optimizer, milestones=[200, 500, 1000], gamma=0.1) criterion = nn.CrossEntropyLoss() trn_losses_ = [] val_losses_ = [] n_iter = 0 n_valid_iter = 0 for epoch in range(args.start_epoch, args.start_epoch + args.epochs): print("=" * 20) logger.info("Starting epoch: {0} learning rate: {1}".format( epoch, learning_rate_scheduler.get_lr())) learning_rate_scheduler.step() dataset = queue.get() data_loader = DataLoader( dataset=dataset, batch_size=args. minibatch_size, # batch_size(epoch, args.max_minibatch_size), shuffle=True, pin_memory=use_cuda, ) for _ in range(0, ITERATE_OVER_TRIPLETS): losses = [] for minibatch in data_loader: # frames = Variable(minibatch, require_grad=False) loss = loss_fn(tcn, minibatch) losses.append(loss.data.cpu().numpy()) optimizer.zero_grad() loss.backward() optimizer.step() writer.add_scalar('data/train_loss', np.mean(losses), n_iter) n_iter += 1 trn_losses_.append(np.mean(losses)) logger.info('train loss: ', np.mean(losses)) if epoch % 1 == 0: loss, n_valid_iter = validate(tcn, use_cuda, n_valid_iter) val_losses_.append(loss) if epoch % args.save_every == 0 and epoch != 0: logger.info('Saving model.') save_model(tcn, model_filename(args.model_name, epoch), args.model_folder) plot_mean(trn_losses_, args.model_folder, 'train_loss') plot_mean(val_losses_, args.model_folder, 'validation_loss')
def train(self): trn_losses_ = [] val_losses_ = [] val_acc_ = [] trn_acc_ = [] for epoch in range(self.args.start_epoch, self.args.start_epoch + self.args.epochs): print("=" * 20) self.logger.info("Starting epoch: {0} ".format(epoch)) dataset = self.training_queue.get() data_loader = DataLoader( dataset=dataset, batch_size=self.args. minibatch_size, # batch_size(epoch, self.args.max_minibatch_size), shuffle=True, pin_memory=self.use_cuda, ) train_embedding_features_buffer = [] train_images_buffer = [] correct = 0 for _ in range(0, 1): losses = [] for minibatch in data_loader: if self.use_cuda: anchor_frames = minibatch[0].cuda() #anchor_euler_reparam = minibatch[1].cuda() # load as 3x3 rotation matrix anchor_rots = minibatch[1].cuda( ) # load as 3x3 rotation matrix # frames = Variable(minibatch) loss, a_pred = self.loss_fn(self.model, anchor_frames, anchor_rots) losses.append(loss.data.cpu().numpy()) anchor_euler = euler_XYZ_to_reparam( apply(rotationMatrixToEulerAngles, anchor_rots)) correct += (torch.norm( a_pred - anchor_euler, 2) < 0.1).data.cpu().numpy( ).sum() # print(gradcheck(loss_fn, (tcn, minibatch,))) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Add embeddings train_embedding_features_buffer.append( apply(rotationMatrixToEulerAngles, anchor_rots)) train_images_buffer.append(anchor_frames) print("logging to {}".format(self.logdir)) self.writer.add_scalar('data/train_loss', np.mean(losses), self.itr) self.writer.add_scalar('data/train_correct', correct / len(data_loader), self.itr) self.itr += 1 trn_losses_.append(np.mean(losses)) self.logger.info('train loss: ', np.mean(losses)) self.logger.info( "Training score correct {correct}/{total}".format( correct=correct, total=len(data_loader))) trn_acc_.append(correct) self.writer.add_image('frame_1', minibatch[0][0], self.itr) # Get embeddings features = torch.cat( train_embedding_features_buffer[:30]).squeeze_() # features = train_embedding_features_buffer.view(train_embedding_features_buffer.shape[0]*train_embedding_features_buffer.shape[1], -1) # label = torch.Tensor(np.asarray(label_buffer)) images = torch.cat( train_images_buffer[:30]).squeeze_() #/255.0, [0, 3, 1, 2] self.writer.add_embedding(features, label_img=images, global_step=epoch) if epoch % 1 == 0: loss, correct = self.validate() self.learning_rate_scheduler.step(loss) val_losses_.append(loss) val_acc_.append(correct) if epoch % self.args.save_every == 0 and epoch != 0: self.logger.info('Saving model.') self.save_model( self.model, self.model_filename(self.args.model_name, epoch), join(self.model_folder, 'weight_files')) print("logging to {}".format(self.logdir)) plot_mean(trn_losses_, self.model_folder, 'train_loss') plot_mean(val_losses_, self.model_folder, 'validation_loss') plot_mean(trn_acc_, self.model_folder, 'train_acc') plot_mean(val_acc_, self.model_folder, 'validation_accuracy')
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') use_cuda = torch.cuda.is_available() tcn = create_model(use_cuda) tcn = torch.nn.DataParallel(tcn, device_ids=(range(torch.cuda.device_count()))) # Wrapper to distribute load on multiple GPUs attribute_classifier = DenseClassifier(num_classes=5).to(device) # load labeling network # triplet_builder = builder(args.n_views, \ # args.train_directory, args.labels_train_directory, IMAGE_SIZE, args, sample_size=200) # queue = multiprocessing.Queue(1) # dataset_builder_process = multiprocessing.Process(target=build_set, args=(queue, triplet_builder, logger), daemon=True) # dataset_builder_process.start() optimizer = optim.SGD(list(tcn.parameters()) + list(attribute_classifier.parameters()), lr=args.lr_start, momentum=0.9) # This will diminish the learning rate at the milestones. # 0.1, 0.01, 0.001 learning_rate_scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[100, 200, 500], gamma=0.1) criterion = nn.CrossEntropyLoss() trn_losses_ = [] val_losses_= [] val_acc_margin_ = [] val_acc_no_margin_ = [] dataset = MultiViewTripletLabelDataset( args.n_views, args.train_directory, args.labels_train_directory, IMAGE_SIZE, sample_size=64) for epoch in range(args.start_epoch, args.start_epoch + args.epochs): losses = [] print("=" * 20) logger.info("Starting epoch: {0} learning rate: {1}".format(epoch, learning_rate_scheduler.get_lr())) learning_rate_scheduler.step() # dataset = queue.get() data_loader = DataLoader( dataset=dataset, batch_size=args.minibatch_size, # batch_size(epoch, args.max_minibatch_size), shuffle=True, pin_memory=use_cuda, ) for i, minibatch in enumerate(data_loader): frames = minibatch[0] caption = minibatch[1] seq_idx = minibatch[2] if use_cuda: frames = frames[0].to(device) captions = caption[0].to(device) seq_idx = seq_idx[0] snaps = dataset.get_videos(int(seq_idx[0]) * args.n_views) all_sel_imgs = [] sel_imgs = snaps[0][::20] all_sel_imgs.append(sel_imgs) # all_sel_imgs = np.squeeze(np.asarray(all_sel_imgs)).reshape([sel_imgs.shape[0], 3, 299, 299]) for _ in range(0, ITERATE_OVER_TRIPLETS): _, unnorm, all_sel_imgs_emb = tcn(torch.FloatTensor(sel_imgs).to(device)) all_sel_imgs_emb = torch.mean(all_sel_imgs_emb, dim=0, keepdim=True) anchor_frames = frames[:, 0, :, :, :] positive_frames = frames[:, 1, :, :, :] negative_frames = frames[:, 2, :, :, :] anchor_output, unnormalized, _ = tcn(anchor_frames) positive_output, _, _ = tcn(positive_frames) negative_output, _, _ = tcn(negative_frames) d_positive = distance(anchor_output, positive_output) d_negative = distance(anchor_output, negative_output) # features = encoder(anchor_frames) loss_triplet = torch.clamp(args.margin + d_positive - d_negative, min=0.0).mean() if i label_outputs_1, label_outputs_2 = attribute_classifier(all_sel_imgs_emb) labels_1 = captions[0, 0].view(-1) labels_2 = captions[0, 1].view(-1) loss_1 = criterion(label_outputs_1, labels_1) loss_2 = criterion(label_outputs_2, labels_2) loss_language = loss_1 + loss_2 loss = loss_triplet + args.alpha * loss_language # loss = loss_language losses.append(loss.data.cpu().numpy()) tcn.zero_grad() attribute_classifier.zero_grad() loss.backward() optimizer.step() if (i+1) % 5 == 0: print ('Epoch [{}/{}], Step [{}/{}], Loss Triplet: {:.4f}, Loss Language: {:.4f}' .format(epoch+1, args.epochs, i+1, len(dataset), loss_triplet.item(), loss_language.item())) trn_losses_.append(np.mean(losses)) if epoch % 1 == 0: acc_margin, acc_no_margin, loss = validate(tcn, attribute_classifier, criterion, use_cuda, args) val_losses_.append(loss) val_acc_margin_.append(acc_margin) val_acc_no_margin_.append(acc_no_margin) if epoch % args.save_every == 0 and epoch != 0: logger.info('Saving model.') save_model(tcn, model_filename(args.model_name, epoch), args.model_folder) plot_mean(trn_losses_, args.model_folder, 'train_loss') plot_mean(val_losses_, args.model_folder, 'validation_loss') # plot_mean(train_acc_, args.model_folder, 'train_acc') plot_mean(val_acc_margin_, args.model_folder, 'validation_accuracy_margin') plot_mean(val_acc_no_margin_, args.model_folder, 'validation_accuracy_no_margin') if __name__ == '__main__': main()
def train(self): trn_losses_ = [] val_losses_= [] val_acc_margin_ = [] val_acc_no_margin_ = [] for epoch in range(self.args.start_epoch, self.args.start_epoch + self.args.epochs): print("=" * 20) self.logger.info("Starting epoch: {0} ".format(epoch)) dataset = self.training_queue.get() data_loader = DataLoader( dataset=dataset, batch_size=self.args.minibatch_size, # batch_size(epoch, self.args.max_minibatch_size), shuffle=True, pin_memory=self.use_cuda, ) train_embedding_features_buffer = [] train_images_buffer = [] for _ in range(0, ITERATE_OVER_TRIPLETS): losses = [] for minibatch, _ in data_loader: # frames = Variable(minibatch) if self.use_cuda: frames = minibatch.cuda() anchor_frames = frames[:, 0, :, :, :] positive_frames = frames[:, 1, :, :, :] negative_frames = frames[:, 2, :, :, :] anchor_output, unnormalized, _ = self.model(anchor_frames) positive_output, _, _ = self.model(positive_frames) negative_output, _, _ = self.model(negative_frames) d_positive = distance(anchor_output, positive_output) d_negative = distance(anchor_output, negative_output) loss_triplet = torch.clamp(self.args.margin + d_positive - d_negative, min=0.0).mean() loss = loss_triplet losses.append(loss.data.cpu().numpy()) self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Add embeddings train_embedding_features_buffer.append(anchor_output) train_images_buffer.append(anchor_frames) print("logging to {}".format(self.logdir)) self.writer.add_scalar('data/train_triplet_loss', np.mean(losses), self.itr) self.itr += 1 trn_losses_.append(np.mean(losses)) self.logger.info('train loss: ', np.mean(losses)) self.writer.add_image('frame_anchor', minibatch[0][0], 0) self.writer.add_image('frame_positive', minibatch[0][1], 1) self.writer.add_image('frame_negative', minibatch[0][2], 2) # Get embeddings features = torch.cat(train_embedding_features_buffer).squeeze_() # features = train_embedding_features_buffer.view(train_embedding_features_buffer.shape[0]*train_embedding_features_buffer.shape[1], -1) # label = torch.Tensor(np.asarray(label_buffer)) images = torch.cat(train_images_buffer).squeeze_()#/255.0, [0, 3, 1, 2] self.writer.add_embedding(features, label_img=images, global_step=epoch) if epoch % 1 == 0: acc_margin, acc_no_margin, loss = self.validate() self.learning_rate_scheduler.step(loss) val_losses_.append(loss) val_acc_margin_.append(acc_margin) val_acc_no_margin_.append(acc_no_margin) if epoch % self.args.save_every == 0 and epoch != 0: self.logger.info('Saving model.') self.save_model(self.model, self.model_filename(self.args.model_name, epoch), join(self.model_folder, 'weight_files')) print("logging to {}".format(self.logdir)) plot_mean(trn_losses_, self.model_folder, 'train_loss') plot_mean(val_losses_, self.model_folder, 'validation_loss') # plot_mean(train_acc_, self.args.model_folder, 'train_acc') plot_mean(val_acc_margin_, self.model_folder, 'validation_accuracy_margin') plot_mean(val_acc_no_margin_, self.model_folder, 'validation_accuracy_no_margin')
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') use_cuda = torch.cuda.is_available() tcn = create_model(use_cuda) tcn = torch.nn.DataParallel( tcn, device_ids=(range(torch.cuda.device_count()) )) # Wrapper to distribute load on multiple GPUs attribute_classifier = DenseClassifier(num_classes=5).to( device) # load labeling network triplet_builder = builder(args.n_views, \ args.train_directory, args.labels_train_directory, IMAGE_SIZE, args, sample_size=32) queue = multiprocessing.Queue(1) dataset_builder_process = multiprocessing.Process(target=build_set, args=(queue, triplet_builder, logger), daemon=True) dataset_builder_process.start() optimizer = optim.SGD(list(tcn.parameters()) + list(attribute_classifier.parameters()), lr=args.lr_start, momentum=0.9) # This will diminish the learning rate at the milestones. # 0.1, 0.01, 0.001 learning_rate_scheduler = lr_scheduler.MultiStepLR( optimizer, milestones=[100, 200, 500], gamma=0.1) criterion = nn.CrossEntropyLoss() trn_losses_ = [] val_losses_ = [] val_acc_margin_ = [] val_acc_no_margin_ = [] for epoch in range(args.start_epoch, args.start_epoch + args.epochs): losses = [] print("=" * 20) logger.info("Starting epoch: {0} learning rate: {1}".format( epoch, learning_rate_scheduler.get_lr())) learning_rate_scheduler.step() dataset = queue.get() data_loader = DataLoader( dataset=dataset, batch_size=args. minibatch_size, # batch_size(epoch, args.max_minibatch_size), shuffle=True, pin_memory=use_cuda, ) for _ in range(0, ITERATE_OVER_TRIPLETS): for i, minibatch in enumerate(data_loader): frames = minibatch[0] captions = minibatch[1] if use_cuda: frames = frames.cuda() captions = captions.to(device) print(captions) print(len(data_loader)) anchor_frames = frames[:, 0, :, :, :] positive_frames = frames[:, 1, :, :, :] negative_frames = frames[:, 2, :, :, :] anchor_output, unnormalized, mixed = tcn(anchor_frames) positive_output, _, _ = tcn(positive_frames) negative_output, _, _ = tcn(negative_frames) d_positive = distance(anchor_output, positive_output) d_negative = distance(anchor_output, negative_output) # features = encoder(anchor_frames) loss_triplet = torch.clamp(args.margin + d_positive - d_negative, min=0.0).mean() label_outputs_1, label_outputs_2 = attribute_classifier(mixed) labels_1 = captions[:, 0] # labels_2 = captions[:, 1] loss_1 = criterion(label_outputs_1, labels_1) # loss_2 = criterion(label_outputs_2, labels_2) loss_language = loss_1 #+ loss_2 # loss = loss_triplet + args.alpha * loss_language loss = loss_language # loss = loss_triplet losses.append(loss.data.cpu().numpy()) tcn.zero_grad() attribute_classifier.zero_grad() loss.backward() optimizer.step() trn_losses_.append(np.mean(losses)) logger.info('train loss: ', np.mean(losses)) if epoch % 1 == 0: acc_margin, acc_no_margin, loss = validate(tcn, attribute_classifier, criterion, use_cuda, args) val_losses_.append(loss) val_acc_margin_.append(acc_margin) val_acc_no_margin_.append(acc_no_margin) if epoch % args.save_every == 0 and epoch != 0: logger.info('Saving model.') save_model(tcn, model_filename(args.model_name, epoch), args.model_folder) plot_mean(trn_losses_, args.model_folder, 'train_loss') plot_mean(val_losses_, args.model_folder, 'validation_loss') # plot_mean(train_acc_, args.model_folder, 'train_acc') plot_mean(val_acc_margin_, args.model_folder, 'validation_accuracy_margin') plot_mean(val_acc_no_margin_, args.model_folder, 'validation_accuracy_no_margin')
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') use_cuda = torch.cuda.is_available() tcn = create_model(use_cuda) dummy_state = Variable(torch.rand(1, 2, 3, 299, 299).cuda()) dummy_action = Variable(torch.rand(1, 3).cuda()) writer.add_graph(tcn, (dummy_state, )) tcn = torch.nn.DataParallel(tcn, device_ids=range(torch.cuda.device_count())) triplet_builder = builder(args.n_views, \ args.train_directory, IMAGE_SIZE, args, sample_size=SAMPLE_SIZE, n_seqs=TRAIN_SEQS_PER_EPOCH) queue = multiprocessing.Queue(1) dataset_builder_process = multiprocessing.Process(target=build_set, args=(queue, triplet_builder, logger), daemon=True) dataset_builder_process.start() #optimizer = optim.SGD(tcn.parameters(), lr=args.lr_start, momentum=0.9) optimizer = optim.Adam(tcn.parameters(), lr=0.001) # This will diminish the learning rate at the milestones. # 0.1, 0.01, 0.001 learning_rate_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min') criterion = nn.CrossEntropyLoss() trn_losses_ = [] val_losses_ = [] n_iter = 0 n_valid_iter = 0 for epoch in range(args.start_epoch, args.start_epoch + args.epochs): print("=" * 20) logger.info("Starting epoch: {0}".format(epoch)) dataset = queue.get() data_loader = DataLoader( dataset=dataset, batch_size=args. minibatch_size, # batch_size(epoch, args.max_minibatch_size), shuffle=True, pin_memory=use_cuda, ) losses = [] for _ in range(0, ITERATE_OVER_TRIPLETS): for minibatch in data_loader: # frames = Variable(minibatch, require_grad=False) loss = loss_fn(tcn, minibatch) losses.append(loss.data.cpu().numpy()) # print(gradcheck(loss_fn, (tcn, minibatch,))) optimizer.zero_grad() loss.backward() optimizer.step() writer.add_scalar('data/train_loss', np.mean(losses), n_iter) n_iter += 1 trn_losses_.append(np.mean(losses)) logger.info('train loss: ', np.mean(losses)) writer.add_image('frame_1', minibatch[0][0], 0) #writer.add_image('frame_2', minibatch[0][1],0) #writer.add_image('frame_3', minibatch[0][2],0) if epoch % 1 == 0: loss, n_valid_iter = validate(tcn, use_cuda, n_valid_iter) learning_rate_scheduler.step(loss) val_losses_.append(loss) if epoch % args.save_every == 0 and epoch != 0: logger.info('Saving model to {}'.format( join(args.model_folder, model_filename(args.model_name, epoch)))) save_model(tcn, model_filename(args.model_name, epoch), args.model_folder) plot_mean(trn_losses_, args.model_folder, 'train_loss') plot_mean(val_losses_, args.model_folder, 'validation_loss')
def main(): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') use_cuda = torch.cuda.is_available() tcn = create_model(use_cuda) encoder = EncoderCNN(args.embed_size).to(device) decoder = DecoderRNN(args.embed_size, args.hidden_size, len(vocab), \ args.num_layers).to(device) triplet_builder = builder(args.n_views, \ args.train_directory, IMAGE_SIZE, vocab, args, sample_size=200) queue = multiprocessing.Queue(1) dataset_builder_process = multiprocessing.Process(target=build_set, args=(queue, triplet_builder, logger), daemon=True) dataset_builder_process.start() opt_params = list(tcn.parameters()) + list(decoder.parameters()) + list( encoder.parameters()) optimizer = optim.SGD(opt_params, lr=args.lr_start, momentum=0.9) # This will diminish the learning rate at the milestones. # 0.1, 0.01, 0.001 learning_rate_scheduler = lr_scheduler.MultiStepLR( optimizer, milestones=[100, 500, 1000], gamma=0.1) criterion = nn.CrossEntropyLoss() for epoch in range(args.start_epoch, args.start_epoch + args.epochs): print("=" * 20) logger.info("Starting epoch: {0} learning rate: {1}".format( epoch, learning_rate_scheduler.get_lr())) learning_rate_scheduler.step() dataset = queue.get() logger.info("Got {0} triplets".format(len(dataset))) data_loader = DataLoader( dataset=dataset, batch_size=args. minibatch_size, # batch_size(epoch, args.max_minibatch_size), shuffle=True, pin_memory=use_cuda, collate_fn=collate_fn) if epoch % 10 == 0: validate(tcn, decoder, use_cuda, args) for _ in range(0, ITERATE_OVER_TRIPLETS): losses = [] for minibatch, captions, lengths in data_loader: frames = Variable(minibatch) if use_cuda: frames = frames.cuda() captions = captions.to(device) anchor_frames = frames[:, 0, :, :, :] positive_frames = frames[:, 1, :, :, :] negative_frames = frames[:, 2, :, :, :] anchor_output, unnormalized = tcn(anchor_frames) positive_output, _ = tcn(positive_frames) negative_output, _ = tcn(negative_frames) d_positive = distance(anchor_output, positive_output) d_negative = distance(anchor_output, negative_output) targets = pack_padded_sequence(captions, lengths, batch_first=True)[0] # features = encoder(anchor_frames) caption_outputs = decoder(unnormalized, captions, lengths) loss_triplet = torch.clamp(args.margin + d_positive - d_negative, min=0.0).mean() loss_language = criterion(caption_outputs, targets) loss = loss_triplet + args.alpha * loss_language losses.append(loss.data.cpu().numpy()) tcn.zero_grad() decoder.zero_grad() encoder.zero_grad() loss.backward() optimizer.step() logger.info('loss: ', np.mean(losses)) # Generate an caption from the image _, sample_feature = tcn(frames[0, 0, :, :, :][None]) sampled_ids = decoder.sample(sample_feature) sampled_ids = sampled_ids[0].cpu().numpy( ) # (1, max_seq_length) -> (max_seq_length) sampled_caption = [] sampled_caption = [] for word_id in captions[0, :].cpu().numpy(): word = vocab.idx2word[word_id] sampled_caption.append(word) sentence = ' '.join(sampled_caption) print( "Target: ", sentence, ) for word_id in sampled_ids: word = vocab.idx2word[word_id] sampled_caption.append(word) if word == '<end>': break sentence = ' '.join(sampled_caption) print("Prediction: ", sentence) if epoch % args.save_every == 0 and epoch != 0: logger.info('Saving model.') save_model(tcn, model_filename(args.model_name, epoch), args.model_folder) plot_mean(train_loss_, save_dir, 'train_loss') plot_mean(test_loss_, save_dir, 'test_loss') plot_mean(train_acc_, save_dir, 'train_acc') plot_mean(test_acc_, save_dir, 'test_acc')