def do_stuff(opt): print(f'\nTraining {opt} for {args.num_epochs} epochs...') net = CNN() if args.dataset == 'cifar' else MLP() _, kwargs = misc.split_optim_dict(misc.optim_dict[opt]) optimizer = misc.task_to_optimizer(opt)(params=net.parameters(), **kwargs) optimizer = misc.wrap_optimizer(opt, optimizer) return fit(net, data, optimizer, num_epochs=args.num_epochs, lr_schedule=True)
def do_stuff(opt): print(f'\nTraining {opt} for {args.num_epochs} epochs...') net = CNN() if args.dataset == 'cifar' else MLP() _, kwargs = misc.split_optim_dict(misc.optim_dict[opt]) optimizer = misc.task_to_optimizer(opt)(params=net.parameters(), **kwargs) if 'lookahead' in opt.lower(): optimizer = optimizers.Lookahead(optimizer, k=5, alpha=0.5) return fit(net, data, optimizer, num_epochs=args.num_epochs, lr_schedule=True)
def pre_train(dataloader, test_loader, dict_loader, dataloader_test, mask_labels, total_epochs=50, learning_rate=1e-4, use_gpu=True, seed=123): args = parser.parse_args() pprint(args) num_bits = args.num_bits model = CNN(model_name='alexnet', bit=num_bits, class_num=args.num_class) criterion = custom_loss(num_bits=num_bits) arch = 'cnn_' filename = arch + args.dataset + '_' + str(num_bits) + "bits" checkpoint_filename = os.path.join(args.checkpoint, filename + '.pt') if use_gpu: model = model.cuda() model = torch.nn.DataParallel(model, device_ids=range( torch.cuda.device_count())) criterion = criterion.cuda() torch.cuda.manual_seed(seed) running_loss = 0.0 start_epoch = 0 batch_time = AverageMeter() data_time = AverageMeter() end = time.time() best_prec = -99999 k = 10500 n_samples = 200000 alpha = 0.4 alpha_1 = 0.99 mask_labels = torch.from_numpy(mask_labels).long().cuda() Z_h1 = torch.zeros(n_samples, num_bits).float().cuda() # intermediate values z_h1 = torch.zeros(n_samples, num_bits).float().cuda() # temporal outputs h1 = torch.zeros(n_samples, num_bits).float().cuda() # current outputs Z_h2 = torch.zeros(args.anchor_num, num_bits).float().cuda() # intermediate values z_h2 = torch.zeros(args.anchor_num, num_bits).float().cuda() # temporal outputs h2 = torch.zeros(args.anchor_num, num_bits).float().cuda() # current outputs for epoch in range(start_epoch, total_epochs): model.train(True) rampup_value = rampup(epoch) rampdown_value = rampdown(epoch) learning_rate = rampup_value * rampdown_value * 0.00005 adam_beta1 = rampdown_value * 0.9 + (1.0 - rampdown_value) * 0.5 adam_beta2 = step_rampup(epoch) * 0.99 + (1 - step_rampup(epoch)) * 0.999 if epoch == 0: u_w = 0.0 else: u_w = rampup_value u_w_m = u_w * 5 u_w_m = torch.autograd.Variable(torch.FloatTensor([u_w_m]).cuda(), requires_grad=False) optimizer = Adam(model.parameters(), lr=learning_rate, betas=(adam_beta1, adam_beta2), eps=1e-8, amsgrad=True) anchors_data, anchor_Label = generate_anchor_vectors(dict_loader) for iteration, data in enumerate(dataloader, 0): anchor_index = np.arange(args.anchor_num) np.random.shuffle(anchor_index) anchor_index = anchor_index[:100] anchor_index = torch.from_numpy(anchor_index).long().cuda() anchor_inputs = anchors_data[anchor_index, :, :, :] anchor_labels = anchor_Label[anchor_index, :] inputs, labels, index = data['image'], data['labels'], data[ 'index'] labels = labels.float() mask_flag = Variable(mask_labels[index], requires_grad=False) idx = (mask_flag > 0) if index.shape[0] == args.batch_size: anchor_batch_S, anchor_batch_W = CalcSim( labels[idx, :].cuda(), anchor_labels.cuda()) if inputs.size(3) == 3: inputs = inputs.permute(0, 3, 1, 2) inputs = inputs.type(torch.FloatTensor) zcomp_h1 = z_h1[index.cuda(), :] zcomp_h2 = z_h2[anchor_index, :] labeled_batch_S, labeled_batch_W = CalcSim( labels[idx, :].cuda(), labels[idx, :].cuda()) if use_gpu: inputs = Variable(inputs.cuda(), requires_grad=False) anchor_batch_S = Variable(anchor_batch_S.cuda(), requires_grad=False) anchor_batch_W = Variable(anchor_batch_W.cuda(), requires_grad=False) labeled_batch_S = Variable(labeled_batch_S.cuda(), requires_grad=False) labeled_batch_W = Variable(labeled_batch_W.cuda(), requires_grad=False) # zero the parameter gradients optimizer.zero_grad() y_h1 = model(inputs) y_h2 = model(anchor_inputs) y = F.sigmoid(48 / num_bits * 0.4 * torch.matmul(y_h1, y_h2.permute(1, 0))) loss, l_batch_loss, m_loss = criterion( y, y_h1, y_h2, anchor_batch_S, anchor_batch_W, labeled_batch_S, labeled_batch_W, zcomp_h1, zcomp_h2, mask_flag, u_w_m, epoch, num_bits) h1[index, :] = y_h1.data.clone() h2[anchor_index, :] = y_h2.data.clone() # backward+optimize loss.backward() optimizer.step() running_loss += loss.item() Z_h2 = alpha_1 * Z_h2 + (1. - alpha_1) * h2 z_h2 = Z_h2 * (1. / (1. - alpha_1**(epoch + 1))) print( "Epoch[{}]({}/{}): Time:(data {:.3f}/ batch {:.3f}) Loss_H: {:.4f}/{:.4f}/{:.4f}" .format(epoch, iteration, len(dataloader), data_time.val, batch_time.val, loss.item(), l_batch_loss.item(), m_loss.item())) Z_h1 = alpha * Z_h1 + (1. - alpha) * h1 z_h1 = Z_h1 * (1. / (1. - alpha**(epoch + 1))) if epoch % 1 == 0: MAP = helpers.validate(model, dataloader_test, test_loader) print("Test image map is:{}".format(MAP)) is_best = MAP > best_prec best_prec = max(best_prec, MAP) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best, prefix=arch, num_bits=num_bits, filename=checkpoint_filename) return model