) net = Net( video_dim=args.feature_dim, embd_dim=args.embd_dim, we_dim=args.we_dim, n_pair=args.n_pair, max_words=args.max_words, sentence_dim=args.sentence_dim, ) net.train() # Optimizers + Loss loss_op = MaxMarginRankingLoss( margin=args.margin, negative_weighting=args.negative_weighting, batch_size=args.batch_size, n_pair=args.n_pair, hard_negative_rate=args.hard_negative_rate, ) net.cuda() loss_op.cuda() if args.pretrain_path != '': net.load_checkpoint(args.pretrain_path) optimizer = optim.Adam(net.parameters(), lr=args.lr) if args.verbose: print('Starting training loop ...')
def splits(sources, activities, train_persons, test_persons): # TODO: Hyper-param tuning code here. Ensure this block is before utils.print_hyperparams() fn call TODO: in # hyperparameter tuning code, make sure you change val of args.X so that correct values printed in tensorboard # with print_hyperparams fn train_dataset = BF(os.path.join(visual_feat_path, "train"), os.path.join(text_path, "train"), map_path, sources, activities, train_persons, rm_SIL=True) train_dataloader = DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=True, num_workers=0) test_dataset = BF(os.path.join(visual_feat_path, "test"), os.path.join(text_path, "test"), map_path, sources, activities, test_persons, rm_SIL=True) test_dataloader = DataLoader(test_dataset, batch_size=args.test_batch_size, shuffle=False, num_workers=0) # w2v of numbers or text? train_uniq_w2v = train_dataset.labels_uniq["labels_num_w2v"] test_uniq_w2v = test_dataset.labels_uniq["labels_num_w2v"] print("Dataset Loaded") train_dataset_size = len(train_dataset) print("Size of training", train_dataset_size) test_dataset_size = len(test_dataset) print("Size of testing", len(test_dataset)) log_dir = str(int(time.time())) writer = SummaryWriter(os.path.join(log_path, log_dir)) utils.print_hyperparams_tb(writer, args, train_dataset_size, test_dataset_size) # Setup w2v_dim = 200 mod_dim = train_dataset.mod_dim assert train_dataset.mod_dim == test_dataset.mod_dim net = model.Net(mod_dim, w2v_dim, args.latent_feat_dim, device).to(device) max_margin = MaxMarginRankingLoss(margin=args.margin).to(device) if args.optimizer == 'adam': optimizer = optim.Adam(net.parameters(), lr=args.lr) elif args.optimizer == 'sgd': optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum) # TODO: define scheduler here # learning rate scheduler best_train_accu, best_test_accu, best_epoch = -np.inf, -np.inf, -np.inf for epoch in range(args.epochs): train_accu = train(net, optimizer, max_margin, train_dataloader, sources, train_uniq_w2v, train_dataset_size, epoch, writer, train_dataset.stoi_map, train_dataset.itos_map) # print(list(net.parameters())) test_accu = test(net, test_dataloader, sources, test_uniq_w2v, test_dataset_size, epoch, writer, test_dataset.stoi_map, test_dataset.itos_map) if (test_accu - best_test_accu > 0.05) and (epoch < 155): best_train_accu, best_test_accu, best_epoch = train_accu, test_accu, epoch + 1 print("Best Train Accuracy: %.7f Test Accuracy: %.7f (Epoch: %d)" % (best_train_accu, best_test_accu, best_epoch))
face_ind_test = np.load(os.path.join(root_feat, 'no_face_ind_retrieval.npy')) face_ind_test = 1 - face_ind_test print('Done.') # Model video_modality_dim = {'face': (128, 128), 'audio': (128 * 16, 128), 'visual': (2048, 2048), 'motion': (1024, 1024)} net = Net(video_modality_dim, 300, audio_cluster=16, text_cluster=args.text_cluster_size) net.train() if args.GPU: net.cuda() # Optimizers + Loss max_margin = MaxMarginRankingLoss(margin=args.margin) if args.optimizer == 'adam': optimizer = optim.Adam(net.parameters(), lr=args.lr) elif args.optimizer == 'sgd': optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum) if args.GPU: max_margin.cuda() n_display = args.n_display dataset_size = len(dataset) lr_decay = args.lr_decay print('Starting training loop ...')
face_ind_test = np.load(os.path.join(root_feat,'no_face_ind_retrieval.npy')) face_ind_test = 1 - face_ind_test print 'Done.' # Model video_modality_dim = {'face': (128,128), 'audio': (128*16,128), 'visual': (2048,2048), 'motion': (1024,1024)} net = Net(video_modality_dim,300, audio_cluster=16,text_cluster=args.text_cluster_size) net.train() if args.GPU: net.cuda() # Optimizers + Loss max_margin = MaxMarginRankingLoss(margin=args.margin) if args.optimizer == 'adam': optimizer = optim.Adam(net.parameters(), lr=args.lr) elif args.optimizer == 'sgd': optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum) if args.GPU: max_margin.cuda() n_display = args.n_display dataset_size = len(dataset) lr_decay = args.lr_decay print 'Starting training loop ...'