def get_imagenet_models(model_name): if model_name == 'model_vgg16bn': from models import vgg16_bn model = vgg16_bn(pretrained=True) elif model_name == 'model_resnet18_imgnet': from models import resnet18 model = resnet18(pretrained=True) elif model_name == 'model_inception': from models import inception_v3 model = inception_v3(pretrained=True) else: raise ValueError(f'Buggya no model named {model_name}') # print(f'Model: {model_name}') return model
def get_model_for_attack(model_name): if model_name == 'model_vgg16bn': model = vgg16_bn(pretrained=True) elif model_name == 'model_resnet18': model = resnet18(pretrained=True) elif model_name == 'model_inceptionv3': model = inception_v3(pretrained=True) elif model_name == 'model_vitb': from mnist_vit import ViT, MegaSizer model = MegaSizer( ImageNetRenormalize(ViT('B_16_imagenet1k', pretrained=True))) elif model_name.startswith('model_hub:'): _, a, b = model_name.split(":") model = torch.hub.load(a, b, pretrained=True) model = Cifar10Renormalize(model) elif model_name.startswith('model_mnist:'): _, a = model_name.split(":") model = torch.load('mnist.pt')[a] elif model_name.startswith('model_ex:'): _, a = model_name.split(":") model = torch.load(a) else: raise ValueError(f'Model f{model_name} does not exist.') return model
def main(): output_dir = "./save_fig" # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Hyper-parameters eps = 1e-8 ### data config test_dataset = load_data.Dog_dataloader(image_dir=image_dir, num_class=args.num_classes, mode="test") test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=2) ### novelty data out_test_dataset = load_data.Dog_dataloader(image_dir=image_dir, num_class=args.num_classes, mode="OOD") out_test_loader = torch.utils.data.DataLoader(out_test_dataset, batch_size=1, shuffle=False, num_workers=2) ##### model, optimizer config if args.net_type == "resnet50": model = models.resnet50(num_c=args.num_classes, pretrained=True) elif args.net_type == "resnet34": model = models.resnet34(num_c=args.num_classes, num_cc=args.OOD_num_classes, pretrained=True) elif args.net_type == "vgg19": model = models.vgg19(num_c=args.num_classes, num_cc=args.OOD_num_classes, pretrained=True) elif args.net_type == "vgg16": model = models.vgg16(num_c=args.num_classes, num_cc=args.OOD_num_classes, pretrained=True) elif args.net_type == "vgg19_bn": model = models.vgg19_bn(num_c=args.num_classes, num_cc=args.OOD_num_classes, pretrained=True) elif args.net_type == "vgg16_bn": model = models.vgg16_bn(num_c=args.num_classes, num_cc=args.OOD_num_classes, pretrained=True) print("load checkpoint_last") checkpoint = torch.load(args.model_path) ##### load model model.load_state_dict(checkpoint["model"]) start_epoch = checkpoint["epoch"] optimizer = optim.SGD(model.parameters(), lr=checkpoint["init_lr"]) #### create folder Path(output_dir).mkdir(exist_ok=True, parents=True) model = model.to(device).eval() # Start grad-CAM bp = BackPropagation(model=model) inv_normalize = transforms.Normalize( mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.255], std=[1 / 0.229, 1 / 0.224, 1 / 0.255]) target_layer = "layer4" stime = time.time() gcam = GradCAM(model=model) grad_cam = GradCAMmodule(target_layer, output_dir) grad_cam.model_config(model) for j, test_data in enumerate(test_loader): #### initialized org_image = test_data['input'].to(device) target_class = test_data['label'].to(device) target_class = int(target_class.argmax().cpu().detach()) result = model(org_image).argmax() print("number: {} pred: {} target: {}".format(j, result, target_class)) result = int(result.cpu().detach()) grad_cam.saveGradCAM(org_image, result, j)
def train_multiclass(train_file, test_file, stat_file, model='mobilenet_v2', classes=('artist_name', 'genre', 'style', 'technique', 'century'), label_file='_user_labels.pkl', im_path='/export/home/kschwarz/Documents/Data/Wikiart_artist49_images', chkpt=None, weight_file=None, triplet_selector='semihard', margin=0.2, labels_per_class=4, samples_per_label=4, use_gpu=True, device=0, epochs=100, batch_size=32, lr=1e-4, momentum=0.9, log_interval=10, log_dir='runs', exp_name=None, seed=123): argvars = locals().copy() torch.manual_seed(seed) # LOAD DATASET with open(stat_file, 'r') as f: data = pickle.load(f) mean, std = data['mean'], data['std'] mean = [float(m) for m in mean] std = [float(s) for s in std] normalize = transforms.Normalize(mean=mean, std=std) train_transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomRotation(90), transforms.ToTensor(), normalize, ]) val_transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.ToTensor(), normalize, ]) if model.lower() == 'inception_v3': # change input size to 299 train_transform.transforms[0].size = (299, 299) val_transform.transforms[0].size = (299, 299) trainset = create_trainset(train_file, label_file, im_path, train_transform, classes) for c in classes: if len(trainset.labels_to_ints[c]) < labels_per_class: print('less labels in class {} than labels_per_class, use all available labels ({})' .format(c, len(trainset.labels_to_ints[c]))) valset = create_valset(test_file, im_path, val_transform, trainset.labels_to_ints) # PARAMETERS use_cuda = use_gpu and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(device) torch.cuda.manual_seed_all(seed) if model.lower() not in ['squeezenet', 'mobilenet_v1', 'mobilenet_v2', 'vgg16_bn', 'inception_v3', 'alexnet']: assert False, 'Unknown model {}\n\t+ Choose from: ' \ '[sqeezenet, mobilenet_v1, mobilenet_v2, vgg16_bn, inception_v3, alexnet].'.format(model) elif model.lower() == 'mobilenet_v1': bodynet = mobilenet_v1(pretrained=weight_file is None) elif model.lower() == 'mobilenet_v2': bodynet = mobilenet_v2(pretrained=weight_file is None) elif model.lower() == 'vgg16_bn': bodynet = vgg16_bn(pretrained=weight_file is None) elif model.lower() == 'inception_v3': bodynet = inception_v3(pretrained=weight_file is None) elif model.lower() == 'alexnet': bodynet = alexnet(pretrained=weight_file is None) else: # squeezenet bodynet = squeezenet(pretrained=weight_file is None) # Load weights for the body network if weight_file is not None: print("=> loading weights from '{}'".format(weight_file)) pretrained_dict = torch.load(weight_file, map_location=lambda storage, loc: storage)['state_dict'] state_dict = bodynet.state_dict() pretrained_dict = {k.replace('bodynet.', ''): v for k, v in pretrained_dict.items() # in case of multilabel weight file if (k.replace('bodynet.', '') in state_dict.keys() and v.shape == state_dict[k.replace('bodynet.', '')].shape)} # number of classes might have changed # check which weights will be transferred if not pretrained_dict == state_dict: # some changes were made for k in set(state_dict.keys() + pretrained_dict.keys()): if k in state_dict.keys() and k not in pretrained_dict.keys(): print('\tWeights for "{}" were not found in weight file.'.format(k)) elif k in pretrained_dict.keys() and k not in state_dict.keys(): print('\tWeights for "{}" were are not part of the used model.'.format(k)) elif state_dict[k].shape != pretrained_dict[k].shape: print('\tShapes of "{}" are different in model ({}) and weight file ({}).'. format(k, state_dict[k].shape, pretrained_dict[k].shape)) else: # everything is good pass state_dict.update(pretrained_dict) bodynet.load_state_dict(state_dict) net = MetricNet(bodynet, len(classes)) n_parameters = sum([p.data.nelement() for p in net.parameters() if p.requires_grad]) if use_cuda: net = net.cuda() print('Using {}\n\t+ Number of params: {}'.format(str(net).split('(', 1)[0], n_parameters)) if not os.path.isdir(log_dir): os.makedirs(log_dir) # tensorboard summary writer timestamp = time.strftime('%m-%d-%H-%M') expname = timestamp + '_' + str(net).split('(', 1)[0] if exp_name is not None: expname = expname + '_' + exp_name log = TBPlotter(os.path.join(log_dir, 'tensorboard', expname)) log.print_logdir() # allow auto-tuner to find best algorithm for the hardware cudnn.benchmark = True with open(label_file, 'rb') as f: labels = pickle.load(f)['labels'] n_labeled = '\t'.join([str(Counter(l).items()) for l in labels.transpose()]) write_config(argvars, os.path.join(log_dir, expname), extras={'n_labeled': n_labeled}) # ININTIALIZE TRAINING optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, threshold=1e-1, verbose=True) if triplet_selector.lower() not in ['random', 'semihard', 'hardest', 'mixed', 'khardest']: assert False, 'Unknown option {} for triplet selector. Choose from "random", "semihard", "hardest" or "mixed"' \ '.'.format(triplet_selector) elif triplet_selector.lower() == 'random': criterion = TripletLoss(margin=margin, triplet_selector=RandomNegativeTripletSelector(margin, cpu=not use_cuda)) elif triplet_selector.lower() == 'semihard' or triplet_selector.lower() == 'mixed': criterion = TripletLoss(margin=margin, triplet_selector=SemihardNegativeTripletSelector(margin, cpu=not use_cuda)) elif triplet_selector.lower() == 'khardest': criterion = TripletLoss(margin=margin, triplet_selector=KHardestNegativeTripletSelector(margin, k=3, cpu=not use_cuda)) else: criterion = TripletLoss(margin=margin, triplet_selector=HardestNegativeTripletSelector(margin, cpu=not use_cuda)) if use_cuda: criterion = criterion.cuda() kwargs = {'num_workers': 4} if use_cuda else {} multilabel_train = np.stack([trainset.df[c].values for c in classes]).transpose() train_batch_sampler = BalancedBatchSamplerMulticlass(multilabel_train, n_label=labels_per_class, n_per_label=samples_per_label, ignore_label=None) trainloader = DataLoader(trainset, batch_sampler=train_batch_sampler, **kwargs) multilabel_val = np.stack([valset.df[c].values for c in classes]).transpose() val_batch_sampler = BalancedBatchSamplerMulticlass(multilabel_val, n_label=labels_per_class, n_per_label=samples_per_label, ignore_label=None) valloader = DataLoader(valset, batch_sampler=val_batch_sampler, **kwargs) # optionally resume from a checkpoint start_epoch = 1 if chkpt is not None: if os.path.isfile(chkpt): print("=> loading checkpoint '{}'".format(chkpt)) checkpoint = torch.load(chkpt, map_location=lambda storage, loc: storage) start_epoch = checkpoint['epoch'] best_acc_score = checkpoint['best_acc_score'] best_acc = checkpoint['acc'] net.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(chkpt, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(chkpt)) def train(epoch): losses = AverageMeter() gtes = AverageMeter() non_zero_triplets = AverageMeter() distances_ap = AverageMeter() distances_an = AverageMeter() # switch to train mode net.train() for batch_idx, (data, target) in enumerate(trainloader): target = torch.stack(target) if use_cuda: data, target = Variable(data.cuda()), [Variable(t.cuda()) for t in target] else: data, target = Variable(data), [Variable(t) for t in target] # compute output outputs = net(data) # normalize features for i in range(len(classes)): outputs[i] = torch.nn.functional.normalize(outputs[i], p=2, dim=1) loss = Variable(torch.Tensor([0]), requires_grad=True).type_as(data[0]) n_triplets = 0 for op, tgt in zip(outputs, target): # filter unlabeled samples if there are any (have label -1) labeled = (tgt != -1).nonzero().view(-1) op, tgt = op[labeled], tgt[labeled] l, nt = criterion(op, tgt) loss += l n_triplets += nt non_zero_triplets.update(n_triplets, target[0].size(0)) # measure GTE and record loss gte, dist_ap, dist_an = GTEMulticlass(outputs, target) # do not compute ap pairs for concealed classes gtes.update(gte.data, target[0].size(0)) distances_ap.update(dist_ap.data, target[0].size(0)) distances_an.update(dist_an.data, target[0].size(0)) losses.update(loss.data[0], target[0].size(0)) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % log_interval == 0: print('Train Epoch: {} [{}/{}]\t' 'Loss: {:.4f} ({:.4f})\t' 'GTE: {:.2f}% ({:.2f}%)\t' 'Non-zero Triplets: {:d} ({:d})'.format( epoch, batch_idx * len(target[0]), len(trainloader) * len(target[0]), float(losses.val), float(losses.avg), float(gtes.val) * 100., float(gtes.avg) * 100., int(non_zero_triplets.val), int(non_zero_triplets.avg))) # log avg values to somewhere log.write('loss', float(losses.avg), epoch, test=False) log.write('gte', float(gtes.avg), epoch, test=False) log.write('non-zero trplts', int(non_zero_triplets.avg), epoch, test=False) log.write('dist_ap', float(distances_ap.avg), epoch, test=False) log.write('dist_an', float(distances_an.avg), epoch, test=False) def test(epoch): losses = AverageMeter() gtes = AverageMeter() non_zero_triplets = AverageMeter() distances_ap = AverageMeter() distances_an = AverageMeter() # switch to evaluation mode net.eval() for batch_idx, (data, target) in enumerate(valloader): target = torch.stack(target) if use_cuda: data, target = Variable(data.cuda()), [Variable(t.cuda()) for t in target] else: data, target = Variable(data), [Variable(t) for t in target] # compute output outputs = net(data) # normalize features for i in range(len(classes)): outputs[i] = torch.nn.functional.normalize(outputs[i], p=2, dim=1) loss = Variable(torch.Tensor([0]), requires_grad=True).type_as(data[0]) n_triplets = 0 for op, tgt in zip(outputs, target): # filter unlabeled samples if there are any (have label -1) labeled = (tgt != -1).nonzero().view(-1) op, tgt = op[labeled], tgt[labeled] l, nt = criterion(op, tgt) loss += l n_triplets += nt non_zero_triplets.update(n_triplets, target[0].size(0)) # measure GTE and record loss gte, dist_ap, dist_an = GTEMulticlass(outputs, target) gtes.update(gte.data.cpu(), target[0].size(0)) distances_ap.update(dist_ap.data.cpu(), target[0].size(0)) distances_an.update(dist_an.data.cpu(), target[0].size(0)) losses.update(loss.data[0].cpu(), target[0].size(0)) print('\nVal set: Average loss: {:.4f} Average GTE {:.2f}%, ' 'Average non-zero triplets: {:d} LR: {:.6f}'.format(float(losses.avg), float(gtes.avg) * 100., int(non_zero_triplets.avg), optimizer.param_groups[-1]['lr'])) log.write('loss', float(losses.avg), epoch, test=True) log.write('gte', float(gtes.avg), epoch, test=True) log.write('non-zero trplts', int(non_zero_triplets.avg), epoch, test=True) log.write('dist_ap', float(distances_ap.avg), epoch, test=True) log.write('dist_an', float(distances_an.avg), epoch, test=True) return losses.avg, 1 - gtes.avg if start_epoch == 1: # compute baseline: _, best_acc = test(epoch=0) else: # checkpoint was loaded best_acc = best_acc for epoch in range(start_epoch, epochs + 1): if triplet_selector.lower() == 'mixed' and epoch == 26: criterion.triplet_selector = HardestNegativeTripletSelector(margin, cpu=not use_cuda) print('Changed negative selection from semihard to hardest.') # train for one epoch train(epoch) # evaluate on validation set val_loss, val_acc = test(epoch) scheduler.step(val_loss) # remember best acc and save checkpoint is_best = val_acc > best_acc best_acc = max(val_acc, best_acc) save_checkpoint({ 'epoch': epoch, 'state_dict': net.state_dict(), 'best_acc': best_acc, }, is_best, expname, directory=log_dir) if optimizer.param_groups[-1]['lr'] < 1e-5: print('Learning rate reached minimum threshold. End training.') break # report best values best = torch.load(os.path.join(log_dir, expname + '_model_best.pth.tar'), map_location=lambda storage, loc: storage) print('Finished training after epoch {}:\n\tbest acc score: {}' .format(best['epoch'], best['acc'])) print('Best model mean accuracy: {}'.format(best_acc))
def main(): # set the path to pre-trained model and output args.outf = args.outf + args.net_type + '_' + args.dataset + '/' if os.path.isdir(args.outf) == False: os.mkdir(args.outf) torch.cuda.manual_seed(0) torch.cuda.set_device(args.gpu) out_dist_list = [ 'skin_cli', 'skin_derm', 'corrupted', 'corrupted_70', 'imgnet', 'nct', 'final_test' ] # load networks if args.net_type == 'densenet_121': model = densenet_121.Net(models.densenet121(pretrained=False), 8) ckpt = torch.load("../checkpoints/densenet-121/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() elif args.net_type == 'mobilenet': model = mobilenet.Net(models.mobilenet_v2(pretrained=False), 8) ckpt = torch.load("../checkpoints/mobilenet/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") elif args.net_type == 'resnet_50': model = resnet_50.Net(models.resnet50(pretrained=False), 8) ckpt = torch.load("../checkpoints/resnet-50/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") elif args.net_type == 'vgg_16': model = vgg_16.Net(models.vgg16_bn(pretrained=False), 8) ckpt = torch.load("../checkpoints/vgg-16/checkpoint.pth") model.load_state_dict(ckpt['model_state_dict']) model.eval() model.cuda() print("Done!") else: raise Exception(f"There is no net_type={args.net_type} available.") in_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) print('load model: ' + args.net_type) # load dataset print('load target data: ', args.dataset) train_loader, test_loader = data_loader.getTargetDataSet( args.dataset, args.batch_size, in_transform, args.dataroot) # set information about feature extaction model.eval() temp_x = torch.rand(2, 3, 224, 224).cuda() temp_x = Variable(temp_x) temp_list = model.feature_list(temp_x)[1] num_output = len(temp_list) feature_list = np.empty(num_output) count = 0 for out in temp_list: feature_list[count] = out.size(1) count += 1 print('get sample mean and covariance') sample_mean, precision = lib_generation.sample_estimator( model, args.num_classes, feature_list, train_loader) print('get Mahalanobis scores') m_list = [0.0, 0.01, 0.005, 0.002, 0.0014, 0.001, 0.0005] for magnitude in m_list: print('Noise: ' + str(magnitude)) for i in range(num_output): M_in = lib_generation.get_Mahalanobis_score(model, test_loader, args.num_classes, args.outf, \ True, args.net_type, sample_mean, precision, i, magnitude) M_in = np.asarray(M_in, dtype=np.float32) if i == 0: Mahalanobis_in = M_in.reshape((M_in.shape[0], -1)) else: Mahalanobis_in = np.concatenate( (Mahalanobis_in, M_in.reshape((M_in.shape[0], -1))), axis=1) for out_dist in out_dist_list: out_test_loader = data_loader.getNonTargetDataSet( out_dist, args.batch_size, in_transform, args.dataroot) print('Out-distribution: ' + out_dist) for i in range(num_output): M_out = lib_generation.get_Mahalanobis_score(model, out_test_loader, args.num_classes, args.outf, \ False, args.net_type, sample_mean, precision, i, magnitude) M_out = np.asarray(M_out, dtype=np.float32) if i == 0: Mahalanobis_out = M_out.reshape((M_out.shape[0], -1)) else: Mahalanobis_out = np.concatenate( (Mahalanobis_out, M_out.reshape((M_out.shape[0], -1))), axis=1) Mahalanobis_in = np.asarray(Mahalanobis_in, dtype=np.float32) Mahalanobis_out = np.asarray(Mahalanobis_out, dtype=np.float32) Mahalanobis_data, Mahalanobis_labels = lib_generation.merge_and_generate_labels( Mahalanobis_out, Mahalanobis_in) file_name = os.path.join( args.outf, 'Mahalanobis_%s_%s_%s.npy' % (str(magnitude), args.dataset, out_dist)) Mahalanobis_data = np.concatenate( (Mahalanobis_data, Mahalanobis_labels), axis=1) np.save(file_name, Mahalanobis_data)
def train_multiclass( train_file, test_file, stat_file, model='mobilenet_v2', classes=('artist_name', 'genre', 'style', 'technique', 'century'), im_path='/export/home/kschwarz/Documents/Data/Wikiart_artist49_images', label_file='_user_labels.pkl', chkpt=None, weight_file=None, use_gpu=True, device=0, epochs=100, batch_size=32, lr=1e-4, momentum=0.9, log_interval=10, log_dir='runs', exp_name=None, seed=123): argvars = locals().copy() torch.manual_seed(seed) # LOAD DATASET with open(stat_file, 'r') as f: data = pickle.load(f) mean, std = data['mean'], data['std'] mean = [float(m) for m in mean] std = [float(s) for s in std] normalize = transforms.Normalize(mean=mean, std=std) train_transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomRotation(90), transforms.ToTensor(), normalize, ]) val_transform = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.ToTensor(), normalize, ]) if model.lower() == 'inception_v3': # change input size to 299 train_transform.transforms[0].size = (299, 299) val_transform.transforms[0].size = (299, 299) trainset = create_trainset(train_file, label_file, im_path, train_transform, classes) valset = create_valset(test_file, im_path, val_transform, trainset.labels_to_ints) num_labels = [len(trainset.labels_to_ints[c]) for c in classes] # PARAMETERS use_cuda = use_gpu and torch.cuda.is_available() if use_cuda: torch.cuda.set_device(device) torch.cuda.manual_seed_all(seed) if model.lower() not in [ 'squeezenet', 'mobilenet_v1', 'mobilenet_v2', 'vgg16_bn', 'inception_v3', 'alexnet' ]: assert False, 'Unknown model {}\n\t+ Choose from: ' \ '[sqeezenet, mobilenet_v1, mobilenet_v2, vgg16_bn, inception_v3, alexnet].'.format(model) elif model.lower() == 'mobilenet_v1': bodynet = mobilenet_v1(pretrained=weight_file is None) elif model.lower() == 'mobilenet_v2': bodynet = mobilenet_v2(pretrained=weight_file is None) elif model.lower() == 'vgg16_bn': bodynet = vgg16_bn(pretrained=weight_file is None) elif model.lower() == 'inception_v3': bodynet = inception_v3(pretrained=weight_file is None) elif model.lower() == 'alexnet': bodynet = alexnet(pretrained=weight_file is None) else: # squeezenet bodynet = squeezenet(pretrained=weight_file is None) # Load weights for the body network if weight_file is not None: print("=> loading weights from '{}'".format(weight_file)) pretrained_dict = torch.load( weight_file, map_location=lambda storage, loc: storage)['state_dict'] state_dict = bodynet.state_dict() pretrained_dict = { k.replace('bodynet.', ''): v for k, v in pretrained_dict.items() # in case of multilabel weight file if (k.replace('bodynet.', '') in state_dict.keys() and v.shape == state_dict[k.replace('bodynet.', '')].shape) } # number of classes might have changed # check which weights will be transferred if not pretrained_dict == state_dict: # some changes were made for k in set(state_dict.keys() + pretrained_dict.keys()): if k in state_dict.keys() and k not in pretrained_dict.keys(): print('\tWeights for "{}" were not found in weight file.'. format(k)) elif k in pretrained_dict.keys() and k not in state_dict.keys( ): print( '\tWeights for "{}" were are not part of the used model.' .format(k)) elif state_dict[k].shape != pretrained_dict[k].shape: print( '\tShapes of "{}" are different in model ({}) and weight file ({}).' .format(k, state_dict[k].shape, pretrained_dict[k].shape)) else: # everything is good pass state_dict.update(pretrained_dict) bodynet.load_state_dict(state_dict) net = OctopusNet(bodynet, n_labels=num_labels) n_parameters = sum( [p.data.nelement() for p in net.parameters() if p.requires_grad]) if use_cuda: net = net.cuda() print('Using {}\n\t+ Number of params: {}'.format( str(net).split('(', 1)[0], n_parameters)) if not os.path.isdir(log_dir): os.makedirs(log_dir) # tensorboard summary writer timestamp = time.strftime('%m-%d-%H-%M') expname = timestamp + '_' + str(net).split('(', 1)[0] if exp_name is not None: expname = expname + '_' + exp_name log = TBPlotter(os.path.join(log_dir, 'tensorboard', expname)) log.print_logdir() # allow auto-tuner to find best algorithm for the hardware cudnn.benchmark = True with open(label_file, 'rb') as f: labels = pickle.load(f)['labels'] n_labeled = '\t'.join( [str(Counter(l).items()) for l in labels.transpose()]) write_config(argvars, os.path.join(log_dir, expname), extras={'n_labeled': n_labeled}) # ININTIALIZE TRAINING optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, threshold=1e-1, verbose=True) criterion = nn.CrossEntropyLoss() if use_cuda: criterion = criterion.cuda() kwargs = {'num_workers': 4} if use_cuda else {} trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, **kwargs) valloader = DataLoader(valset, batch_size=batch_size, shuffle=True, **kwargs) # optionally resume from a checkpoint start_epoch = 1 if chkpt is not None: if os.path.isfile(chkpt): print("=> loading checkpoint '{}'".format(chkpt)) checkpoint = torch.load(chkpt, map_location=lambda storage, loc: storage) start_epoch = checkpoint['epoch'] best_acc_score = checkpoint['best_acc_score'] best_acc = checkpoint['acc'] net.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( chkpt, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(chkpt)) def train(epoch): losses = AverageMeter() accs = AverageMeter() class_acc = [AverageMeter() for i in range(len(classes))] # switch to train mode net.train() for batch_idx, (data, target) in enumerate(trainloader): if use_cuda: data, target = Variable( data.cuda()), [Variable(t.cuda()) for t in target] else: data, target = Variable(data), [Variable(t) for t in target] # compute output outputs = net(data) preds = [torch.max(outputs[i], 1)[1] for i in range(len(classes))] loss = Variable(torch.Tensor([0]), requires_grad=True).type_as(data[0]) for i, o, t, p in zip(range(len(classes)), outputs, target, preds): # filter unlabeled samples if there are any (have label -1) labeled = (t != -1).nonzero().view(-1) o, t, p = o[labeled], t[labeled], p[labeled] loss += criterion(o, t) # measure class accuracy and record loss class_acc[i].update( (torch.sum(p == t).type(torch.FloatTensor) / t.size(0)).data) accs.update( torch.mean( torch.stack( [class_acc[i].val for i in range(len(classes))])), target[0].size(0)) losses.update(loss.data, target[0].size(0)) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % log_interval == 0: print('Train Epoch: {} [{}/{}]\t' 'Loss: {:.4f} ({:.4f})\t' 'Acc: {:.2f}% ({:.2f}%)'.format(epoch, batch_idx * len(target), len(trainloader.dataset), float(losses.val), float(losses.avg), float(accs.val) * 100., float(accs.avg) * 100.)) print('\t' + '\n\t'.join([ '{}: {:.2f}%'.format(classes[i], float(class_acc[i].val) * 100.) for i in range(len(classes)) ])) # log avg values to somewhere log.write('loss', float(losses.avg), epoch, test=False) log.write('acc', float(accs.avg), epoch, test=False) for i in range(len(classes)): log.write('class_acc', float(class_acc[i].avg), epoch, test=False) def test(epoch): losses = AverageMeter() accs = AverageMeter() class_acc = [AverageMeter() for i in range(len(classes))] # switch to evaluation mode net.eval() for batch_idx, (data, target) in enumerate(valloader): if use_cuda: data, target = Variable( data.cuda()), [Variable(t.cuda()) for t in target] else: data, target = Variable(data), [Variable(t) for t in target] # compute output outputs = net(data) preds = [torch.max(outputs[i], 1)[1] for i in range(len(classes))] loss = Variable(torch.Tensor([0]), requires_grad=True).type_as(data[0]) for i, o, t, p in zip(range(len(classes)), outputs, target, preds): labeled = (t != -1).nonzero().view(-1) loss += criterion(o[labeled], t[labeled]) # measure class accuracy and record loss class_acc[i].update((torch.sum(p[labeled] == t[labeled]).type( torch.FloatTensor) / t[labeled].size(0)).data) accs.update( torch.mean( torch.stack( [class_acc[i].val for i in range(len(classes))])), target[0].size(0)) losses.update(loss.data, target[0].size(0)) score = accs.avg - torch.std( torch.stack([class_acc[i].avg for i in range(len(classes))]) ) / accs.avg # compute mean - std/mean as measure for accuracy print( '\nVal set: Average loss: {:.4f} Average acc {:.2f}% Acc score {:.2f} LR: {:.6f}' .format(float(losses.avg), float(accs.avg) * 100., float(score), optimizer.param_groups[-1]['lr'])) print('\t' + '\n\t'.join([ '{}: {:.2f}%'.format(classes[i], float(class_acc[i].avg) * 100.) for i in range(len(classes)) ])) log.write('loss', float(losses.avg), epoch, test=True) log.write('acc', float(accs.avg), epoch, test=True) for i in range(len(classes)): log.write('class_acc', float(class_acc[i].avg), epoch, test=True) return losses.avg.cpu().numpy(), float(score), float( accs.avg), [float(class_acc[i].avg) for i in range(len(classes))] if start_epoch == 1: # compute baseline: _, best_acc_score, best_acc, _ = test(epoch=0) else: # checkpoint was loaded best_acc_score = best_acc_score best_acc = best_acc for epoch in range(start_epoch, epochs + 1): # train for one epoch train(epoch) # evaluate on validation set val_loss, val_acc_score, val_acc, val_class_accs = test(epoch) scheduler.step(val_loss) # remember best acc and save checkpoint is_best = val_acc_score > best_acc_score best_acc_score = max(val_acc_score, best_acc_score) save_checkpoint( { 'epoch': epoch, 'state_dict': net.state_dict(), 'best_acc_score': best_acc_score, 'acc': val_acc, 'class_acc': {c: a for c, a in zip(classes, val_class_accs)} }, is_best, expname, directory=log_dir) if val_acc > best_acc: shutil.copyfile( os.path.join(log_dir, expname + '_checkpoint.pth.tar'), os.path.join(log_dir, expname + '_model_best_mean_acc.pth.tar')) best_acc = max(val_acc, best_acc) if optimizer.param_groups[-1]['lr'] < 1e-5: print('Learning rate reached minimum threshold. End training.') break # report best values best = torch.load(os.path.join(log_dir, expname + '_model_best.pth.tar'), map_location=lambda storage, loc: storage) print( 'Finished training after epoch {}:\n\tbest acc score: {}\n\tacc: {}\n\t class acc: {}' .format(best['epoch'], best['best_acc_score'], best['acc'], best['class_acc'])) print('Best model mean accuracy: {}'.format(best_acc))