def main(): train_dataset = MultiLabelDataset( DATAPATH_TRAIN[args.dataset]) # train_dataset = MultiLabelDataset('/understand/learnLSH/data/deliciousLarge_shuf_train.txt') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, pin_memory=True, num_workers=6, shuffle=True) test_dataset = MultiLabelDataset( DATAPATH_TEST[args.dataset]) # test_dataset = MultiLabelDataset('/understand/learnLSH/data/deliciousLarge_shuf_test.txt') test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, pin_memory=True, num_workers=6, shuffle=True) print("Statistics:", train_dataset.N, train_dataset.D, train_dataset.L, train_dataset.max_D, train_dataset.max_L) model = Net(train_dataset.D, train_dataset.L).to(device) # Embedding_weight = model.fc.weight.data.cpu().numpy() Embedding_bia = model.bias.data.cpu().numpy() # last_weight = model.smax.params.weight.data.cpu().numpy() # last_bias = model.smax.params.bias.data.cpu().numpy() # print("Embedding_weight",model.fc.weight.data.cpu().numpy()[0]) # print("Embedding_bia",model.bias.data.cpu().numpy()) # print("last_weight",model.smax.params.weight.data.cpu().numpy()[0]) # np.save(".Embedding_weight",Embedding_weight ) # np.save(".deli_Embedding_bia",Embedding_bia ) # np.save(".last_weight",last_weight ) # np.save(".last_bias",last_bias ) # print('model.parameters:',model.parameters()) optimizer = Adam(model.parameters(),lr = args.lr) freeze = False best_acc1 = 0.0 best_acc5 = 0.0 for epoch in range(0, args.epochs, 1): epoch_start_time = time.time() train(args, model, device, train_loader, test_loader, optimizer, epoch, freeze) _, _ = evaluate_slide(args, epoch, len(train_loader) +1 , model, device, test_loader, training=False, k=5, slide = True) top1_acc, top5_acc = evaluate(args, epoch, len(train_loader) +1 , model, device, test_loader, training=False, k=5, slide = False) is_best = (top1_acc > best_acc1) or (top5_acc > best_acc5) best_acc1 = max( top1_acc,best_acc1) best_acc5 = max( top5_acc,best_acc5) print('| end of epoch {:3d} | time: {:5.2f}s |'.format(epoch, (time.time() - epoch_start_time))) print('-' * 89) if(best_acc1> FREEZE_ACC[args.dataset] and freeze == False): freeze=True print("Set freeze weight") model.fc.weight.requires_grad = False model.bias.requires_grad = False print("Current Parameters") for p in model.parameters(): if p.requires_grad: print(p.name,p.size())
triplet_opt1_dict = {} triplet_opt2_dict = {} for l in range(args.L): classifier = Network(args.layer_dim, args.K) triplet_dict[l] = TripletNet(classifier, args.margin) triplet_opt1_dict[l] = optim.SGD(triplet_dict[l].parameters(), lr=args.lr, momentum=0.9) triplet_opt2_dict[l] = optim.SGD(triplet_dict[l].parameters(), lr=args.lr, momentum=0.9) #triplet_dict[l].to(device) # print("classifier %d"%(l)) # print("network weight0 shape:", triplet_dict[l].classifier.dense1.weight.shape) # collect weight for hash table hash_weight = getTripletWeight(triplet_dict).cpu() print("hash weight shape:", hash_weight.shape) print("\n============Set Up Network==================") model = Net(feature_dim, num_class, args.layer_dim, hash_weight, args.K, args.L).to(device) optimizer = Adam(model.parameters(), lr=0.0001) # optimizer = optim.SGD(model.parameters(), lr = args.lr, momentum = 0.9) print("\n============Training start=================") with open(args.logfile, 'w') as out: for epoch in range(args.epoch_num): print("Epoch: ", epoch) train_network(args, model, device, train_loader, test_loader, optimizer, epoch, triplet_dict, triplet_opt1_dict, triplet_opt2_dict)
def main(): torch.manual_seed(args.seed) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") train_dataset = MultiLabelDataset(DATAPATH_TRAIN[args.dataset]) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, pin_memory=True, num_workers=6, shuffle=True) test_dataset = MultiLabelDataset(DATAPATH_TEST[args.dataset]) #test_dataset = MultiLabelDataset(DATAPATH_TRAIN[args.dataset]) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, pin_memory=True, num_workers=6, shuffle=False) print("Statistics:", train_dataset.N, train_dataset.D, train_dataset.L, train_dataset.max_D, train_dataset.max_L) model = Net(train_dataset.D, train_dataset.L).to(device) optimizer = Adam(model.parameters(args.lr)) freeze = False best_acc1 = 0.0 best_acc5 = 0.0 start_epoch = 0 resume_path = args.resume_model_path if (args.resume_full): if os.path.isfile(resume_path): checkpoint = torch.load(resume_path) start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc1'] model.load_state_dict(checkpoint['model_state_dict'], strict=False) print("=> loaded checkpoint '{}' (epoch {})".format( resume_path, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format( resume_path)) #if resume from saved model for epoch in range(start_epoch, args.epochs, 1): print("epoch {}, freeze {}:".format(epoch, freeze)) epoch_start_time = time.time() train(args, model, device, train_loader, test_loader, optimizer, epoch, freeze) print('| end of epoch {:3d} | time: {:5.2f}s |'.format( epoch, (time.time() - epoch_start_time))) print('-' * 89) top1_acc, top5_acc = evaluate(args, model, device, test_loader, training=False, k=5, slide=False) is_best = (top1_acc > best_acc1) or (top5_acc > best_acc5) best_acc1 = max(top1_acc, best_acc1) best_acc5 = max(top5_acc, best_acc5) for p in model.parameters(): if p.requires_grad: print(p.name, p.size()) if (best_acc1 > FREEZE_ACC[args.dataset] and freeze == False): freeze = True #freeze embedding print("Set freeze weight") model.fc.weight.requires_grad = False model.bias.requires_grad = False print("Current Parameters") for p in model.parameters(): if p.requires_grad: print(p.name, p.size()) print('-' * 89) print()
np.random.seed(1234) torch.manual_seed(1234) print("device", device) #read in train and test data print("\n===========Read in data: " + args.dataset + "===================") train_loader, test_loader, feature_dim, num_class, num_train, num_test = get_networkDataLoader( args) print( "Dataset Statistics: feature dimension: %d, label dimension: %d, number of train data: %d, number of test data: %d" % (feature_dim, num_class, num_train, num_test)) print("\n============Set Up Network==================") model = Net(feature_dim, num_class, args.layer_dim, args.K, args.L).to(device) optimizer = Adam(model.parameters(), lr=args.lr_task) # optimizer = optim.SparseAdam(model.parameters(), lr = args.lr) freeze_flag = False print("\n============Training start=================") for epoch in range(args.epoch_num): if (epoch > 5): freeze_flag = True print("Epoch: ", epoch) epoch_start_time = time.time() train_network(args, model, device, train_loader, test_loader, optimizer, epoch, freeze_flag) print('| end of epoch {:3d} | time: {:5.2f}s |'.format( epoch, (time.time() - epoch_start_time)))
if args.nhid != args.emsize and not args.proj: raise ValueError( 'When using the tied flag, hidden must be equal to embedding size') twht = encoder.weight D = args.emsize if args.proj else args.nhid ss = SampledSoftmax(ntokens, nsampled, D, tied_weight=twht) net.add_module("encoder", encoder) net.add_module("decoder", ss) net.cuda() print("Batch Size:", args.batch_size * args.scale, "Initial LR:", args.lr * args.scale) criterion = nn.CrossEntropyLoss() optimizer = Adam(net.parameters(), args.lr * args.scale, betas=(0.9, 0.999)) scheduler = LinearLR(optimizer, base_lr=args.lr * args.scale, max_iters=train_corpus.batch_num * args.epochs, last_iter=-1, min_lr=1e-8) ############################################################################### # Training code ############################################################################### def repackage_hidden(h, device_id=0): """Wraps hidden states in new Variables, to detach them from their history.""" if isinstance(h, Variable): return Variable(h.data).cuda(device_id)
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--dataset', type=str, default="wiki10", choices=DATASET) parser.add_argument('--batch-size', type=int, default=128, metavar='N', help='input batch size for training (default: 1)') parser.add_argument('--test-batch-size', type=int, default=1024, metavar='N', help='input batch size for testing (default: 1)') parser.add_argument('--epochs', type=int, default=8, metavar='N', help='number of epochs to train (default: 1)') parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate (default: 0.1)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=200, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--scale', type=int, default=20, metavar='N', help='batch size multiplier') parser.add_argument('--name', type=str, default="data/", metavar='N', help='datapath') args = parser.parse_args() torch.manual_seed(args.seed) use_cuda = torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") train_dataset = MultiLabelDataset(DATAPATH_TRAIN[args.dataset]) #train_dataset = MultiLabelDataset('/understand/learnLSH/data/wiki10_train.txt') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, pin_memory=True, num_workers=6, shuffle=True) # test_dataset = MultiLabelDataset('/understand/learnLSH/data/wiki10_test.txt') test_dataset = MultiLabelDataset(DATAPATH_TEST[args.dataset]) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, pin_memory=True, num_workers=6, shuffle=False) print("Statistics:", train_dataset.N, train_dataset.D, train_dataset.L, train_dataset.max_D, train_dataset.max_L) model = Net(train_dataset.D, train_dataset.L).to(device) optimizer = Adam(model.parameters(), args.lr) for epoch in range(1, args.epochs + 1): epoch_start_time = time.time() train(args, model, device, train_loader, optimizer, epoch) print('| end of epoch {:3d} | time: {:5.2f}s |'.format( epoch, (time.time() - epoch_start_time))) print('-' * 89) # if epoch % 10 == 0: # evaluate(args, model, device, test_loader) # print('-' * 89) evaluate(args, model, device, test_loader) print('-' * 89)