def main(): start_epoch = args.start_epoch # start from epoch 0 or last checkpoint epoch # Data print('==> Preparing dataset %s' % args.dataset) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) if args.dataset == 'cifar10': dataloader = datasets.CIFAR10 num_classes = 10 else: dataloader = datasets.CIFAR100 num_classes = 100 trainset = dataloader(root=args.dataroot, train=True, download=True, transform=transform_train) trainloader = data.DataLoader(dataset=trainset, batch_size=args.train_batch, shuffle=False) testset = dataloader(root=args.dataroot, train=False, download=False, transform=transform_test) testloader = data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=args.workers) # Model print("==> creating model '{}'".format("Alexnet")) model = AlexNet(num_classes=num_classes) model = model.cuda() print('Model on cuda') cudnn.benchmark = True print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters())/1000000.0)) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # Train and val for epoch in range(start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) train_loss, train_acc = train(trainloader, model, criterion, optimizer, epoch, use_cuda) test_loss, test_acc = test(testloader, model, criterion, epoch, use_cuda) print('Epoch[{}/{}]: LR: {:.3f}, Train loss: {:.5f}, Test loss: {:.5f}, Train acc: {:.2f}, Test acc: {:.2f}.'.format(epoch+1, args.epochs, state['lr'], train_loss, test_loss, train_acc, test_acc))
def Test_phase(args, k): model1 = AlexNet() model1.cuda() model2 = AlexNet() model2.cuda() model3 = AlexNet() model3.cuda() model4 = AlexNet() model4.cuda() state_dict = torch.load('checkpoints/alexnet_63.pth') model1.load_state_dict(state_dict) state_dict = torch.load('checkpoints/alexnet_6240.pth') model2.load_state_dict(state_dict) state_dict = torch.load('checkpoints/alexnet_6215.pth') model3.load_state_dict(state_dict) state_dict = torch.load('checkpoints/alexnet_6085.pth') model4.load_state_dict(state_dict) model1 = torch.nn.DataParallel(model1, device_ids=range(torch.cuda.device_count())) model2 = torch.nn.DataParallel(model2, device_ids=range(torch.cuda.device_count())) model3 = torch.nn.DataParallel(model3, device_ids=range(torch.cuda.device_count())) model4 = torch.nn.DataParallel(model4, device_ids=range(torch.cuda.device_count())) model1.eval() model2.eval() model3.eval() model4.eval() csv = csv_write(args) dataset = Test_Dataset(args.dpath) test_sampler = Test_Sampler(dataset._labels, n_way=args.nway, k_shot=args.kshot, query=args.query) test_loader = DataLoader(dataset=dataset, batch_sampler=test_sampler, num_workers=8, pin_memory=True) print('Test start!') for i in range(TEST_SIZE): for episode in test_loader: data = episode.cuda() data_shot, data_query = data[:k], data[k:] """ TEST Method """ """ Predict the query images belong to which classes At the training phase, you measured logits. The logits can be distance or similarity between query images and 5 images of each classes. From logits, you can pick a one class that have most low distance or high similarity. ex) # when logits is distance pred = torch.argmin(logits, dim=1) # when logits is prob pred = torch.argmax(logits, dim=1) pred is torch.tensor with size [20] and the each component value is zero to four """ model1.eval() features_shot = model1(data_shot) n_sample = int(args.query/args.nway) features_shot_mean = torch.zeros(args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j*args.kshot end = (j+1)*args.kshot features_shot_mean[j] = features_shot[start:end].mean(dim=0) features_query = model1(data_query) logits1 = square_euclidean_metric(features_query, features_shot_mean) model2.eval() features_shot = model2(data_shot) n_sample = int(args.query/args.nway) features_shot_mean = torch.zeros(args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j*args.kshot end = (j+1)*args.kshot features_shot_mean[j] = features_shot[start:end].mean(dim=0) features_query = model2(data_query) logits2 = square_euclidean_metric(features_query, features_shot_mean) model3.eval() features_shot = model3(data_shot) n_sample = int(args.query/args.nway) features_shot_mean = torch.zeros(args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j*args.kshot end = (j+1)*args.kshot features_shot_mean[j] = features_shot[start:end].mean(dim=0) features_query = model3(data_query) logits3 = square_euclidean_metric(features_query, features_shot_mean) model4.eval() features_shot = model4(data_shot) n_sample = int(args.query/args.nway) features_shot_mean = torch.zeros(args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j*args.kshot end = (j+1)*args.kshot features_shot_mean[j] = features_shot[start:end].mean(dim=0) features_query = model4(data_query) logits4 = square_euclidean_metric(features_query, features_shot_mean) logits = (logits1 + logits2 + logits3 + logits4)/4 lsoft = F.log_softmax(-logits, dim=1).view(args.kshot, n_sample, -1) pred = torch.argmin(logits,dim=1) # save your prediction as StudentID_Name.csv file csv.add(pred) csv.close() print('Test finished, check the csv file!') exit()
def val(args): # the number of N way, K shot images k = args.nway * args.kshot """ TODO 1.a """ " Make your own model for Few-shot Classification in 'model.py' file." """ TODO 1.a END """ model1 = AlexNet() model1.cuda() model2 = AlexNet() model2.cuda() model3 = AlexNet() model3.cuda() model4 = AlexNet() model4.cuda() state_dict = torch.load('checkpoints/alexnet_63.pth') model1.load_state_dict(state_dict) state_dict = torch.load('checkpoints/alexnet_6240.pth') model2.load_state_dict(state_dict) state_dict = torch.load('checkpoints/alexnet_6215.pth') model3.load_state_dict(state_dict) state_dict = torch.load('checkpoints/alexnet_6085.pth') model4.load_state_dict(state_dict) #state_dict = torch.load('') #model.load_state_dict(state_dict) #model1 = model.clone() model1 = torch.nn.DataParallel(model1, device_ids=range(torch.cuda.device_count())) model2 = torch.nn.DataParallel(model2, device_ids=range(torch.cuda.device_count())) model3 = torch.nn.DataParallel(model3, device_ids=range(torch.cuda.device_count())) model4 = torch.nn.DataParallel(model4, device_ids=range(torch.cuda.device_count())) model1.eval() model2.eval() model3.eval() model4.eval() csv = csv_write(args) # Validation data loading val_dataset = Dataset(args.dpath, state='val') val_sampler = Sampler(val_dataset._labels, n_way=args.nway, k_shot=args.kshot, query=args.query) val_data_loader = DataLoader(dataset=val_dataset, batch_sampler=val_sampler, num_workers=8, pin_memory=True) """ TODO 1.b (optional) """ " Set an optimizer or scheduler for Few-shot classification (optional) " # Default optimizer setting #optimizer = torch.optim.Adam(model.parameters(), lr=1e-5) #optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4) """ TODO 1.b (optional) END """ tl = Averager() # save average loss ta = Averager() # save average accuracy print('test start') test_correct = 0 test_total = 0 test_loss = 0 # validation start print('validation start') model1.eval() model2.eval() model3.eval() model4.eval() with torch.no_grad(): vl = Averager() # save average loss va = Averager() # save average accuracy for j in range(VAL_TOTAL): for episode in val_data_loader: data, label = [_.cuda() for _ in episode] data_shot, data_query = data[:k], data[k:] # load an episode label_shot, label_query = label[:k], label[k:] label_shot = sorted(list(set(label_shot.tolist()))) label_query = label_query.tolist() labels = [] for j in range(len(label_query)): label = label_shot.index(label_query[j]) labels.append(label) labels = torch.tensor(labels).cuda() """ TODO 2 ( Same as above TODO 2 ) """ """ Train the model Input: data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w] be careful when using torch.reshape or .view functions data_query : torch.tensor, query images, [args.query, 3, h, w] labels : torch.tensor, labels of query images, [args.query] output: loss : torch scalar tensor which used for updating your model logits : A value to measure accuracy and loss """ data, label = [_.cuda() for _ in episode] # load an episode # split an episode images and labels into shots and query set # note! data_shot shape is ( nway * kshot, 3, h, w ) not ( kshot * nway, 3, h, w ) # Take care when reshape the data shot data_shot, data_query = data[:k], data[k:] #print('label : ',label) label_shot, label_query = label[:k], label[k:] label_shot = sorted(list(set(label_shot.tolist()))) # convert labels into 0-4 values label_query = label_query.tolist() labels = [] for j in range(len(label_query)): label = label_shot.index(label_query[j]) labels.append(label) labels = torch.tensor(labels).cuda() """ TODO 2 ( Same as above TODO 2 ) """ """ Make a loss function and train your own model Input: data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w] be careful when using torch.reshape or .view functions (25, 3, 400, 400) data_query : torch.tensor, query images, [args.query, 3, h, w] (20, 3, 400, 400) labels : torch.tensor, labels of query images, [args.query] (20) output: loss : torch scalar tensor which used for updating your model logits : A value to measure accuracy and loss """ model1.eval() features_shot = model1(data_shot) n_sample = int(args.query/args.nway) features_shot_mean = torch.zeros(args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j*args.kshot end = (j+1)*args.kshot features_shot_mean[j] = features_shot[start:end].mean(dim=0) features_query = model1(data_query) logits1 = square_euclidean_metric(features_query, features_shot_mean) model2.eval() features_shot = model2(data_shot) n_sample = int(args.query/args.nway) features_shot_mean = torch.zeros(args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j*args.kshot end = (j+1)*args.kshot features_shot_mean[j] = features_shot[start:end].mean(dim=0) features_query = model2(data_query) logits2 = square_euclidean_metric(features_query, features_shot_mean) model3.eval() features_shot = model3(data_shot) n_sample = int(args.query/args.nway) features_shot_mean = torch.zeros(args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j*args.kshot end = (j+1)*args.kshot features_shot_mean[j] = features_shot[start:end].mean(dim=0) features_query = model3(data_query) logits3 = square_euclidean_metric(features_query, features_shot_mean) model4.eval() features_shot = model4(data_shot) n_sample = int(args.query/args.nway) features_shot_mean = torch.zeros(args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j*args.kshot end = (j+1)*args.kshot features_shot_mean[j] = features_shot[start:end].mean(dim=0) features_query = model4(data_query) logits4 = square_euclidean_metric(features_query, features_shot_mean) logits = (logits1+logits2+logits3+logits4)/4 lsoft = F.log_softmax(-logits, dim=1).view(args.kshot, n_sample, -1) pred = torch.argmin(logits,dim=1) """ TODO 2 END """ acc = count_acc(logits, labels) va.add(acc) csv.add(pred) proto = None; logits = None; loss = None print('val accuracy mean : %.4f' % va.item()) print('val loss mean : %.4f' % vl.item()) # initialize loss and accuracy mean vl = None va = None vl = Averager() va = Averager() csv.close() print('Test finished, check the csv file!') exit()
def train(args): # the number of N way, K shot images k = args.nway * args.kshot """ TODO 1.a """ " Make your own model for Few-shot Classification in 'model.py' file." # model setting model = AlexNet() model.cuda() """ TODO 1.a END """ # pretrained model load if args.restore_ckpt is not None: state_dict = torch.load(args.restore_ckpt) model.load_state_dict(state_dict) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) if args.test_mode == 1: Test_phase(model, args, k) else: # Train data loading dataset = Dataset(args.dpath, state='train') train_sampler = Train_Sampler(dataset._labels, n_way=args.nway, k_shot=args.kshot, query=args.query) data_loader = DataLoader(dataset=dataset, batch_sampler=train_sampler, num_workers=8, pin_memory=True) # Validation data loading val_dataset = Dataset(args.dpath, state='val') val_sampler = Sampler(val_dataset._labels, n_way=args.nway, k_shot=args.kshot, query=args.query) val_data_loader = DataLoader(dataset=val_dataset, batch_sampler=val_sampler, num_workers=8, pin_memory=True) """ TODO 1.b (optional) """ " Set an optimizer or scheduler for Few-shot classification (optional) " # Default optimizer setting #optimizer = torch.optim.Adam(model.parameters(), lr=1e-5) optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4) """ TODO 1.b (optional) END """ tl = Averager() # save average loss ta = Averager() # save average accuracy # training start print('train start') train_correct = 0 train_total = 0 train_loss = 0 test_correct = 0 test_total = 0 test_loss = 0 model.train() for i in range(args.se + 1, TOTAL): for episode in data_loader: optimizer.zero_grad() data, label = [_ for _ in episode] # load an episode # split an episode images and labels into shots and query set # note! data_shot shape is ( nway * kshot, 3, h, w ) not ( kshot * nway, 3, h, w ) # Take care when reshape the data shot data_shot, data_query = data[:k], data[k:] label_shot, label_query = label[:k], label[k:] label_shot = sorted(list(set(label_shot.tolist()))) # convert labels into 0-4 values label_query = label_query.tolist() labels = [] for j in range(len(label_query)): label = label_shot.index(label_query[j]) labels.append(label) labels = torch.tensor(labels).cuda() """ TODO 2 ( Same as above TODO 2 ) """ """ Train the model Input: data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w] be careful when using torch.reshape or .view functions data_query : torch.tensor, query images, [args.query, 3, h, w] labels : torch.tensor, labels of query images, [args.query] output: loss : torch scalar tensor which used for updating your model logits : A value to measure accuracy and loss """ features_shot = model(data_shot.cuda()) n_sample = int(args.query / args.nway) features_shot_mean = torch.zeros(args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j * args.kshot end = (j + 1) * args.kshot features_shot_mean[j] = features_shot[start:end].mean(dim=0) features_query = model(data_query.cuda()) logits = square_euclidean_metric(features_query, features_shot_mean) labels_expanded = labels.view(args.query, 1, 1) labels_expanded = labels_expanded.expand(args.query, args.nway, 1) lsoft = F.log_softmax(-logits, dim=1).view(args.kshot, n_sample, -1) labels_expanded = labels_expanded.view(lsoft.size()) loss = -lsoft.gather(2, labels_expanded).squeeze().view(-1).mean() _, pred = lsoft.max(2) """ TODO 2 END """ acc = count_acc(logits, labels) tl.add(loss.item()) ta.add(acc) loss.backward() optimizer.step() proto = None logits = None loss = None if (i + 1) % PRINT_FREQ == 0: print('train {}, loss={:.4f} acc={:.4f}'.format( i + 1, tl.item(), ta.item())) # initialize loss and accuracy mean tl = None ta = None tl = Averager() ta = Averager() # validation start if (i + 1) % VAL_FREQ == 0: print('validation start') model.eval() with torch.no_grad(): vl = Averager() # save average loss va = Averager() # save average accuracy for j in range(VAL_TOTAL): for episode in val_data_loader: data, label = [_.cuda() for _ in episode] data_shot, data_query = data[:k], data[ k:] # load an episode label_shot, label_query = label[:k], label[k:] label_shot = sorted(list(set(label_shot.tolist()))) label_query = label_query.tolist() labels = [] for j in range(len(label_query)): label = label_shot.index(label_query[j]) labels.append(label) labels = torch.tensor(labels).cuda() """ TODO 2 ( Same as above TODO 2 ) """ """ Train the model Input: data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w] be careful when using torch.reshape or .view functions data_query : torch.tensor, query images, [args.query, 3, h, w] labels : torch.tensor, labels of query images, [args.query] output: loss : torch scalar tensor which used for updating your model logits : A value to measure accuracy and loss """ optimizer.zero_grad() data, label = [_.cuda() for _ in episode] # load an episode # split an episode images and labels into shots and query set # note! data_shot shape is ( nway * kshot, 3, h, w ) not ( kshot * nway, 3, h, w ) # Take care when reshape the data shot data_shot, data_query = data[:k], data[k:] label_shot, label_query = label[:k], label[k:] label_shot = sorted(list(set(label_shot.tolist()))) # convert labels into 0-4 values label_query = label_query.tolist() labels = [] for j in range(len(label_query)): label = label_shot.index(label_query[j]) labels.append(label) labels = torch.tensor(labels).cuda() """ TODO 2 ( Same as above TODO 2 ) """ """ Make a loss function and train your own model Input: data_shot : torch.tensor, shot images, [args.nway * args.kshot, 3, h, w] be careful when using torch.reshape or .view functions (25, 3, 400, 400) data_query : torch.tensor, query images, [args.query, 3, h, w] (20, 3, 400, 400) labels : torch.tensor, labels of query images, [args.query] (20) output: loss : torch scalar tensor which used for updating your model logits : A value to measure accuracy and loss """ features_shot = model(data_shot.cuda()) n_sample = int(args.query / args.nway) features_shot_mean = torch.zeros( args.nway, features_shot.size(1)).cuda() for j in range(int(args.nway)): start = j * args.kshot end = (j + 1) * args.kshot features_shot_mean[j] = features_shot[ start:end].mean(dim=0) features_query = model(data_query.cuda()) logits = square_euclidean_metric( features_query, features_shot_mean) labels_expanded = labels.view(args.query, 1, 1) labels_expanded = labels_expanded.expand( args.query, args.nway, 1) lsoft = F.log_softmax(-logits, dim=1).view( args.kshot, n_sample, -1) labels_expanded = labels_expanded.view(lsoft.size()) loss = -lsoft.gather( 2, labels_expanded).squeeze().view(-1).mean() _, pred = lsoft.max(2) """ TODO 2 END """ acc = count_acc(logits, labels) vl.add(loss.item()) va.add(acc) proto = None logits = None loss = None print('val accuracy mean : %.4f' % va.item()) print('val loss mean : %.4f' % vl.item()) # initialize loss and accuracy mean vl = None va = None vl = Averager() va = Averager() if (i + 1) % SAVE_FREQ == 0: PATH = 'checkpoints/%d_%s.pth' % (i + 1, args.name) torch.save(model.module.state_dict(), PATH) print('model saved, iteration : %d' % i)
num_workers=8) t_loader = torch.utils.data.DataLoader(dataset.Office(t_list_path), batch_size=batch_size, shuffle=True, drop_last=True, num_workers=8) val_loader = torch.utils.data.DataLoader(dataset.Office(t_list_path, training=False), batch_size=1, num_workers=8) s_loader_len, t_loader_len = len(s_loader), len(t_loader) model = AlexNet(cudable=cuda, n_class=n_class) if cuda: model.cuda() opt, opt_D = model.get_optimizer(init_lr, lr_mult, lr_mult_D) # resume or init if not resume == '': pretrain = torch.load(pretrain_path) model.load_state_dict(pretrain['model']) opt.load_state_dict(pretrain['opt']) opt_D.load_state_dict(pretrain['opt_D']) epoch = pretrain['epoch'] # need change to 0 when DA else: model.load_state_dict(utils.load_pth_model(), strict=False) # model.load_state_dict(utils.load_pretrain_npy(), strict=False) epoch = 0
transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) use_gpu = torch.cuda.is_available() batch_size = 32 num_workers = 8 test_file = '/Users/demerzel/PycharmProjects/cat-dog/data/test.txt' test_dataset = CatDogDataset(file_path=test_file, model='test', data_transforms=test_data_transforms) test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) test_datasize = len(test_dataset) num_classes = 2 model = AlexNet(num_classes=2) model = torch.load('./output/epoch_70.pkl') if use_gpu: model = model.cuda() eval_model(model=model, use_gpu=use_gpu)