コード例 #1
0
def get_imagenet_models(model_name):
    if model_name == 'model_vgg16bn':
        from models import vgg16_bn
        model = vgg16_bn(pretrained=True)
    elif model_name == 'model_resnet18_imgnet':
        from models import resnet18
        model = resnet18(pretrained=True)
    elif model_name == 'model_inception':
        from models import inception_v3
        model = inception_v3(pretrained=True)
    else:
        raise ValueError(f'Buggya no model named {model_name}')
    # print(f'Model: {model_name}')
    return model
コード例 #2
0
def get_model_for_attack(model_name):
    if model_name == 'model_vgg16bn':
        model = vgg16_bn(pretrained=True)
    elif model_name == 'model_resnet18':
        model = resnet18(pretrained=True)
    elif model_name == 'model_inceptionv3':
        model = inception_v3(pretrained=True)
    elif model_name == 'model_vitb':
        from mnist_vit import ViT, MegaSizer
        model = MegaSizer(
            ImageNetRenormalize(ViT('B_16_imagenet1k', pretrained=True)))
    elif model_name.startswith('model_hub:'):
        _, a, b = model_name.split(":")
        model = torch.hub.load(a, b, pretrained=True)
        model = Cifar10Renormalize(model)
    elif model_name.startswith('model_mnist:'):
        _, a = model_name.split(":")
        model = torch.load('mnist.pt')[a]
    elif model_name.startswith('model_ex:'):
        _, a = model_name.split(":")
        model = torch.load(a)
    else:
        raise ValueError(f'Model f{model_name} does not exist.')
    return model
コード例 #3
0
def main():
    output_dir = "./save_fig"

    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Hyper-parameters
    eps = 1e-8

    ### data config
    test_dataset = load_data.Dog_dataloader(image_dir=image_dir,
                                            num_class=args.num_classes,
                                            mode="test")
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=2)

    ### novelty data
    out_test_dataset = load_data.Dog_dataloader(image_dir=image_dir,
                                                num_class=args.num_classes,
                                                mode="OOD")
    out_test_loader = torch.utils.data.DataLoader(out_test_dataset,
                                                  batch_size=1,
                                                  shuffle=False,
                                                  num_workers=2)

    ##### model, optimizer config
    if args.net_type == "resnet50":
        model = models.resnet50(num_c=args.num_classes, pretrained=True)
    elif args.net_type == "resnet34":
        model = models.resnet34(num_c=args.num_classes,
                                num_cc=args.OOD_num_classes,
                                pretrained=True)
    elif args.net_type == "vgg19":
        model = models.vgg19(num_c=args.num_classes,
                             num_cc=args.OOD_num_classes,
                             pretrained=True)
    elif args.net_type == "vgg16":
        model = models.vgg16(num_c=args.num_classes,
                             num_cc=args.OOD_num_classes,
                             pretrained=True)
    elif args.net_type == "vgg19_bn":
        model = models.vgg19_bn(num_c=args.num_classes,
                                num_cc=args.OOD_num_classes,
                                pretrained=True)
    elif args.net_type == "vgg16_bn":
        model = models.vgg16_bn(num_c=args.num_classes,
                                num_cc=args.OOD_num_classes,
                                pretrained=True)

    print("load checkpoint_last")
    checkpoint = torch.load(args.model_path)

    ##### load model
    model.load_state_dict(checkpoint["model"])
    start_epoch = checkpoint["epoch"]
    optimizer = optim.SGD(model.parameters(), lr=checkpoint["init_lr"])

    #### create folder
    Path(output_dir).mkdir(exist_ok=True, parents=True)

    model = model.to(device).eval()
    # Start grad-CAM
    bp = BackPropagation(model=model)
    inv_normalize = transforms.Normalize(
        mean=[-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.255],
        std=[1 / 0.229, 1 / 0.224, 1 / 0.255])
    target_layer = "layer4"

    stime = time.time()

    gcam = GradCAM(model=model)

    grad_cam = GradCAMmodule(target_layer, output_dir)
    grad_cam.model_config(model)
    for j, test_data in enumerate(test_loader):
        #### initialized
        org_image = test_data['input'].to(device)
        target_class = test_data['label'].to(device)

        target_class = int(target_class.argmax().cpu().detach())
        result = model(org_image).argmax()
        print("number: {} pred: {} target: {}".format(j, result, target_class))
        result = int(result.cpu().detach())
        grad_cam.saveGradCAM(org_image, result, j)
コード例 #4
0
def train_multiclass(train_file, test_file, stat_file,
                     model='mobilenet_v2',
                     classes=('artist_name', 'genre', 'style', 'technique', 'century'),
                     label_file='_user_labels.pkl',
                     im_path='/export/home/kschwarz/Documents/Data/Wikiart_artist49_images',
                     chkpt=None, weight_file=None,
                     triplet_selector='semihard', margin=0.2,
                     labels_per_class=4, samples_per_label=4,
                     use_gpu=True, device=0,
                     epochs=100, batch_size=32, lr=1e-4, momentum=0.9,
                     log_interval=10, log_dir='runs',
                     exp_name=None, seed=123):
    argvars = locals().copy()
    torch.manual_seed(seed)

    # LOAD DATASET
    with open(stat_file, 'r') as f:
        data = pickle.load(f)
        mean, std = data['mean'], data['std']
        mean = [float(m) for m in mean]
        std = [float(s) for s in std]
    normalize = transforms.Normalize(mean=mean, std=std)
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(90),
        transforms.ToTensor(),
        normalize,
    ])
    val_transform = transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.ToTensor(),
                normalize,
    ])

    if model.lower() == 'inception_v3':            # change input size to 299
        train_transform.transforms[0].size = (299, 299)
        val_transform.transforms[0].size = (299, 299)
    trainset = create_trainset(train_file, label_file, im_path, train_transform, classes)
    for c in classes:
        if len(trainset.labels_to_ints[c]) < labels_per_class:
            print('less labels in class {} than labels_per_class, use all available labels ({})'
                  .format(c, len(trainset.labels_to_ints[c])))
    valset = create_valset(test_file, im_path, val_transform, trainset.labels_to_ints)
    # PARAMETERS
    use_cuda = use_gpu and torch.cuda.is_available()
    if use_cuda:
        torch.cuda.set_device(device)
        torch.cuda.manual_seed_all(seed)

    if model.lower() not in ['squeezenet', 'mobilenet_v1', 'mobilenet_v2', 'vgg16_bn', 'inception_v3', 'alexnet']:
        assert False, 'Unknown model {}\n\t+ Choose from: ' \
                      '[sqeezenet, mobilenet_v1, mobilenet_v2, vgg16_bn, inception_v3, alexnet].'.format(model)
    elif model.lower() == 'mobilenet_v1':
        bodynet = mobilenet_v1(pretrained=weight_file is None)
    elif model.lower() == 'mobilenet_v2':
        bodynet = mobilenet_v2(pretrained=weight_file is None)
    elif model.lower() == 'vgg16_bn':
        bodynet = vgg16_bn(pretrained=weight_file is None)
    elif model.lower() == 'inception_v3':
        bodynet = inception_v3(pretrained=weight_file is None)
    elif model.lower() == 'alexnet':
        bodynet = alexnet(pretrained=weight_file is None)
    else:       # squeezenet
        bodynet = squeezenet(pretrained=weight_file is None)

    # Load weights for the body network
    if weight_file is not None:
        print("=> loading weights from '{}'".format(weight_file))
        pretrained_dict = torch.load(weight_file, map_location=lambda storage, loc: storage)['state_dict']
        state_dict = bodynet.state_dict()
        pretrained_dict = {k.replace('bodynet.', ''): v for k, v in pretrained_dict.items()         # in case of multilabel weight file
                           if (k.replace('bodynet.', '') in state_dict.keys() and v.shape == state_dict[k.replace('bodynet.', '')].shape)}  # number of classes might have changed
        # check which weights will be transferred
        if not pretrained_dict == state_dict:  # some changes were made
            for k in set(state_dict.keys() + pretrained_dict.keys()):
                if k in state_dict.keys() and k not in pretrained_dict.keys():
                    print('\tWeights for "{}" were not found in weight file.'.format(k))
                elif k in pretrained_dict.keys() and k not in state_dict.keys():
                    print('\tWeights for "{}" were are not part of the used model.'.format(k))
                elif state_dict[k].shape != pretrained_dict[k].shape:
                    print('\tShapes of "{}" are different in model ({}) and weight file ({}).'.
                          format(k, state_dict[k].shape, pretrained_dict[k].shape))
                else:  # everything is good
                    pass

        state_dict.update(pretrained_dict)
        bodynet.load_state_dict(state_dict)

    net = MetricNet(bodynet, len(classes))

    n_parameters = sum([p.data.nelement() for p in net.parameters() if p.requires_grad])
    if use_cuda:
        net = net.cuda()
    print('Using {}\n\t+ Number of params: {}'.format(str(net).split('(', 1)[0], n_parameters))

    if not os.path.isdir(log_dir):
        os.makedirs(log_dir)

    # tensorboard summary writer
    timestamp = time.strftime('%m-%d-%H-%M')
    expname = timestamp + '_' + str(net).split('(', 1)[0]
    if exp_name is not None:
        expname = expname + '_' + exp_name
    log = TBPlotter(os.path.join(log_dir, 'tensorboard', expname))
    log.print_logdir()

    # allow auto-tuner to find best algorithm for the hardware
    cudnn.benchmark = True

    with open(label_file, 'rb') as f:
        labels = pickle.load(f)['labels']
        n_labeled = '\t'.join([str(Counter(l).items()) for l in labels.transpose()])

    write_config(argvars, os.path.join(log_dir, expname), extras={'n_labeled': n_labeled})


    # ININTIALIZE TRAINING
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, threshold=1e-1, verbose=True)

    if triplet_selector.lower() not in ['random', 'semihard', 'hardest', 'mixed', 'khardest']:
        assert False, 'Unknown option {} for triplet selector. Choose from "random", "semihard", "hardest" or "mixed"' \
                      '.'.format(triplet_selector)
    elif triplet_selector.lower() == 'random':
        criterion = TripletLoss(margin=margin,
                                triplet_selector=RandomNegativeTripletSelector(margin, cpu=not use_cuda))
    elif triplet_selector.lower() == 'semihard' or triplet_selector.lower() == 'mixed':
        criterion = TripletLoss(margin=margin,
                                triplet_selector=SemihardNegativeTripletSelector(margin, cpu=not use_cuda))
    elif triplet_selector.lower() == 'khardest':
        criterion = TripletLoss(margin=margin,
                                triplet_selector=KHardestNegativeTripletSelector(margin, k=3, cpu=not use_cuda))
    else:
        criterion = TripletLoss(margin=margin,
                                triplet_selector=HardestNegativeTripletSelector(margin, cpu=not use_cuda))
    if use_cuda:
        criterion = criterion.cuda()

    kwargs = {'num_workers': 4} if use_cuda else {}
    multilabel_train = np.stack([trainset.df[c].values for c in classes]).transpose()
    train_batch_sampler = BalancedBatchSamplerMulticlass(multilabel_train, n_label=labels_per_class,
                                                         n_per_label=samples_per_label, ignore_label=None)
    trainloader = DataLoader(trainset, batch_sampler=train_batch_sampler, **kwargs)
    multilabel_val = np.stack([valset.df[c].values for c in classes]).transpose()
    val_batch_sampler = BalancedBatchSamplerMulticlass(multilabel_val, n_label=labels_per_class,
                                                       n_per_label=samples_per_label, ignore_label=None)
    valloader = DataLoader(valset, batch_sampler=val_batch_sampler, **kwargs)

    # optionally resume from a checkpoint
    start_epoch = 1
    if chkpt is not None:
        if os.path.isfile(chkpt):
            print("=> loading checkpoint '{}'".format(chkpt))
            checkpoint = torch.load(chkpt, map_location=lambda storage, loc: storage)
            start_epoch = checkpoint['epoch']
            best_acc_score = checkpoint['best_acc_score']
            best_acc = checkpoint['acc']
            net.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(chkpt, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(chkpt))

    def train(epoch):
        losses = AverageMeter()
        gtes = AverageMeter()
        non_zero_triplets = AverageMeter()
        distances_ap = AverageMeter()
        distances_an = AverageMeter()

        # switch to train mode
        net.train()
        for batch_idx, (data, target) in enumerate(trainloader):
            target = torch.stack(target)
            if use_cuda:
                data, target = Variable(data.cuda()), [Variable(t.cuda()) for t in target]
            else:
                data, target = Variable(data), [Variable(t) for t in target]

            # compute output
            outputs = net(data)

            # normalize features
            for i in range(len(classes)):
                outputs[i] = torch.nn.functional.normalize(outputs[i], p=2, dim=1)

            loss = Variable(torch.Tensor([0]), requires_grad=True).type_as(data[0])
            n_triplets = 0
            for op, tgt in zip(outputs, target):
                # filter unlabeled samples if there are any (have label -1)
                labeled = (tgt != -1).nonzero().view(-1)
                op, tgt = op[labeled], tgt[labeled]

                l, nt = criterion(op, tgt)
                loss += l
                n_triplets += nt

            non_zero_triplets.update(n_triplets, target[0].size(0))
            # measure GTE and record loss
            gte, dist_ap, dist_an = GTEMulticlass(outputs, target)           # do not compute ap pairs for concealed classes
            gtes.update(gte.data, target[0].size(0))
            distances_ap.update(dist_ap.data, target[0].size(0))
            distances_an.update(dist_an.data, target[0].size(0))
            losses.update(loss.data[0], target[0].size(0))

            # compute gradient and do optimizer step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{}]\t'
                      'Loss: {:.4f} ({:.4f})\t'
                      'GTE: {:.2f}% ({:.2f}%)\t'
                      'Non-zero Triplets: {:d} ({:d})'.format(
                    epoch, batch_idx * len(target[0]), len(trainloader) * len(target[0]),
                    float(losses.val), float(losses.avg),
                    float(gtes.val) * 100., float(gtes.avg) * 100.,
                    int(non_zero_triplets.val), int(non_zero_triplets.avg)))

        # log avg values to somewhere
        log.write('loss', float(losses.avg), epoch, test=False)
        log.write('gte', float(gtes.avg), epoch, test=False)
        log.write('non-zero trplts', int(non_zero_triplets.avg), epoch, test=False)
        log.write('dist_ap', float(distances_ap.avg), epoch, test=False)
        log.write('dist_an', float(distances_an.avg), epoch, test=False)

    def test(epoch):
        losses = AverageMeter()
        gtes = AverageMeter()
        non_zero_triplets = AverageMeter()
        distances_ap = AverageMeter()
        distances_an = AverageMeter()

        # switch to evaluation mode
        net.eval()
        for batch_idx, (data, target) in enumerate(valloader):
            target = torch.stack(target)
            if use_cuda:
                data, target = Variable(data.cuda()), [Variable(t.cuda()) for t in target]
            else:
                data, target = Variable(data), [Variable(t) for t in target]
            # compute output
            outputs = net(data)

            # normalize features
            for i in range(len(classes)):
                outputs[i] = torch.nn.functional.normalize(outputs[i], p=2, dim=1)

            loss = Variable(torch.Tensor([0]), requires_grad=True).type_as(data[0])
            n_triplets = 0
            for op, tgt in zip(outputs, target):
                # filter unlabeled samples if there are any (have label -1)
                labeled = (tgt != -1).nonzero().view(-1)
                op, tgt = op[labeled], tgt[labeled]

                l, nt = criterion(op, tgt)
                loss += l
                n_triplets += nt

            non_zero_triplets.update(n_triplets, target[0].size(0))
            # measure GTE and record loss
            gte, dist_ap, dist_an = GTEMulticlass(outputs, target)
            gtes.update(gte.data.cpu(), target[0].size(0))
            distances_ap.update(dist_ap.data.cpu(), target[0].size(0))
            distances_an.update(dist_an.data.cpu(), target[0].size(0))
            losses.update(loss.data[0].cpu(), target[0].size(0))

        print('\nVal set: Average loss: {:.4f} Average GTE {:.2f}%, '
              'Average non-zero triplets: {:d} LR: {:.6f}'.format(float(losses.avg), float(gtes.avg) * 100.,
                                                       int(non_zero_triplets.avg),
                                                                  optimizer.param_groups[-1]['lr']))
        log.write('loss', float(losses.avg), epoch, test=True)
        log.write('gte', float(gtes.avg), epoch, test=True)
        log.write('non-zero trplts', int(non_zero_triplets.avg), epoch, test=True)
        log.write('dist_ap', float(distances_ap.avg), epoch, test=True)
        log.write('dist_an', float(distances_an.avg), epoch, test=True)
        return losses.avg, 1 - gtes.avg

    if start_epoch == 1:         # compute baseline:
        _, best_acc = test(epoch=0)
    else:       # checkpoint was loaded
        best_acc = best_acc

    for epoch in range(start_epoch, epochs + 1):
        if triplet_selector.lower() == 'mixed' and epoch == 26:
            criterion.triplet_selector = HardestNegativeTripletSelector(margin, cpu=not use_cuda)
            print('Changed negative selection from semihard to hardest.')
        # train for one epoch
        train(epoch)
        # evaluate on validation set
        val_loss, val_acc = test(epoch)
        scheduler.step(val_loss)

        # remember best acc and save checkpoint
        is_best = val_acc > best_acc
        best_acc = max(val_acc, best_acc)
        save_checkpoint({
            'epoch': epoch,
            'state_dict': net.state_dict(),
            'best_acc': best_acc,
        }, is_best, expname, directory=log_dir)

        if optimizer.param_groups[-1]['lr'] < 1e-5:
            print('Learning rate reached minimum threshold. End training.')
            break

    # report best values
    best = torch.load(os.path.join(log_dir, expname + '_model_best.pth.tar'), map_location=lambda storage, loc: storage)
    print('Finished training after epoch {}:\n\tbest acc score: {}'
          .format(best['epoch'], best['acc']))
    print('Best model mean accuracy: {}'.format(best_acc))
コード例 #5
0
def main():
    # set the path to pre-trained model and output
    args.outf = args.outf + args.net_type + '_' + args.dataset + '/'
    if os.path.isdir(args.outf) == False:
        os.mkdir(args.outf)
    torch.cuda.manual_seed(0)
    torch.cuda.set_device(args.gpu)

    out_dist_list = [
        'skin_cli', 'skin_derm', 'corrupted', 'corrupted_70', 'imgnet', 'nct',
        'final_test'
    ]

    # load networks
    if args.net_type == 'densenet_121':
        model = densenet_121.Net(models.densenet121(pretrained=False), 8)
        ckpt = torch.load("../checkpoints/densenet-121/checkpoint.pth")
        model.load_state_dict(ckpt['model_state_dict'])
        model.eval()
        model.cuda()
    elif args.net_type == 'mobilenet':
        model = mobilenet.Net(models.mobilenet_v2(pretrained=False), 8)
        ckpt = torch.load("../checkpoints/mobilenet/checkpoint.pth")
        model.load_state_dict(ckpt['model_state_dict'])
        model.eval()
        model.cuda()
        print("Done!")
    elif args.net_type == 'resnet_50':
        model = resnet_50.Net(models.resnet50(pretrained=False), 8)
        ckpt = torch.load("../checkpoints/resnet-50/checkpoint.pth")
        model.load_state_dict(ckpt['model_state_dict'])
        model.eval()
        model.cuda()
        print("Done!")
    elif args.net_type == 'vgg_16':
        model = vgg_16.Net(models.vgg16_bn(pretrained=False), 8)
        ckpt = torch.load("../checkpoints/vgg-16/checkpoint.pth")
        model.load_state_dict(ckpt['model_state_dict'])
        model.eval()
        model.cuda()
        print("Done!")
    else:
        raise Exception(f"There is no net_type={args.net_type} available.")

    in_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    print('load model: ' + args.net_type)

    # load dataset
    print('load target data: ', args.dataset)
    train_loader, test_loader = data_loader.getTargetDataSet(
        args.dataset, args.batch_size, in_transform, args.dataroot)

    # set information about feature extaction
    model.eval()
    temp_x = torch.rand(2, 3, 224, 224).cuda()
    temp_x = Variable(temp_x)
    temp_list = model.feature_list(temp_x)[1]
    num_output = len(temp_list)
    feature_list = np.empty(num_output)
    count = 0
    for out in temp_list:
        feature_list[count] = out.size(1)
        count += 1

    print('get sample mean and covariance')
    sample_mean, precision = lib_generation.sample_estimator(
        model, args.num_classes, feature_list, train_loader)

    print('get Mahalanobis scores')
    m_list = [0.0, 0.01, 0.005, 0.002, 0.0014, 0.001, 0.0005]

    for magnitude in m_list:
        print('Noise: ' + str(magnitude))
        for i in range(num_output):
            M_in = lib_generation.get_Mahalanobis_score(model, test_loader, args.num_classes, args.outf, \
                                                        True, args.net_type, sample_mean, precision, i, magnitude)
            M_in = np.asarray(M_in, dtype=np.float32)
            if i == 0:
                Mahalanobis_in = M_in.reshape((M_in.shape[0], -1))
            else:
                Mahalanobis_in = np.concatenate(
                    (Mahalanobis_in, M_in.reshape((M_in.shape[0], -1))),
                    axis=1)

        for out_dist in out_dist_list:
            out_test_loader = data_loader.getNonTargetDataSet(
                out_dist, args.batch_size, in_transform, args.dataroot)
            print('Out-distribution: ' + out_dist)
            for i in range(num_output):
                M_out = lib_generation.get_Mahalanobis_score(model, out_test_loader, args.num_classes, args.outf, \
                                                             False, args.net_type, sample_mean, precision, i, magnitude)
                M_out = np.asarray(M_out, dtype=np.float32)
                if i == 0:
                    Mahalanobis_out = M_out.reshape((M_out.shape[0], -1))
                else:
                    Mahalanobis_out = np.concatenate(
                        (Mahalanobis_out, M_out.reshape((M_out.shape[0], -1))),
                        axis=1)

            Mahalanobis_in = np.asarray(Mahalanobis_in, dtype=np.float32)
            Mahalanobis_out = np.asarray(Mahalanobis_out, dtype=np.float32)
            Mahalanobis_data, Mahalanobis_labels = lib_generation.merge_and_generate_labels(
                Mahalanobis_out, Mahalanobis_in)
            file_name = os.path.join(
                args.outf, 'Mahalanobis_%s_%s_%s.npy' %
                (str(magnitude), args.dataset, out_dist))
            Mahalanobis_data = np.concatenate(
                (Mahalanobis_data, Mahalanobis_labels), axis=1)
            np.save(file_name, Mahalanobis_data)
コード例 #6
0
def train_multiclass(
        train_file,
        test_file,
        stat_file,
        model='mobilenet_v2',
        classes=('artist_name', 'genre', 'style', 'technique', 'century'),
        im_path='/export/home/kschwarz/Documents/Data/Wikiart_artist49_images',
        label_file='_user_labels.pkl',
        chkpt=None,
        weight_file=None,
        use_gpu=True,
        device=0,
        epochs=100,
        batch_size=32,
        lr=1e-4,
        momentum=0.9,
        log_interval=10,
        log_dir='runs',
        exp_name=None,
        seed=123):
    argvars = locals().copy()
    torch.manual_seed(seed)

    # LOAD DATASET
    with open(stat_file, 'r') as f:
        data = pickle.load(f)
        mean, std = data['mean'], data['std']
        mean = [float(m) for m in mean]
        std = [float(s) for s in std]
    normalize = transforms.Normalize(mean=mean, std=std)
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(90),
        transforms.ToTensor(),
        normalize,
    ])
    val_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.ToTensor(),
        normalize,
    ])

    if model.lower() == 'inception_v3':  # change input size to 299
        train_transform.transforms[0].size = (299, 299)
        val_transform.transforms[0].size = (299, 299)
    trainset = create_trainset(train_file, label_file, im_path,
                               train_transform, classes)
    valset = create_valset(test_file, im_path, val_transform,
                           trainset.labels_to_ints)
    num_labels = [len(trainset.labels_to_ints[c]) for c in classes]

    # PARAMETERS
    use_cuda = use_gpu and torch.cuda.is_available()
    if use_cuda:
        torch.cuda.set_device(device)
        torch.cuda.manual_seed_all(seed)

    if model.lower() not in [
            'squeezenet', 'mobilenet_v1', 'mobilenet_v2', 'vgg16_bn',
            'inception_v3', 'alexnet'
    ]:
        assert False, 'Unknown model {}\n\t+ Choose from: ' \
                      '[sqeezenet, mobilenet_v1, mobilenet_v2, vgg16_bn, inception_v3, alexnet].'.format(model)
    elif model.lower() == 'mobilenet_v1':
        bodynet = mobilenet_v1(pretrained=weight_file is None)
    elif model.lower() == 'mobilenet_v2':
        bodynet = mobilenet_v2(pretrained=weight_file is None)
    elif model.lower() == 'vgg16_bn':
        bodynet = vgg16_bn(pretrained=weight_file is None)
    elif model.lower() == 'inception_v3':
        bodynet = inception_v3(pretrained=weight_file is None)
    elif model.lower() == 'alexnet':
        bodynet = alexnet(pretrained=weight_file is None)
    else:  # squeezenet
        bodynet = squeezenet(pretrained=weight_file is None)

    # Load weights for the body network
    if weight_file is not None:
        print("=> loading weights from '{}'".format(weight_file))
        pretrained_dict = torch.load(
            weight_file,
            map_location=lambda storage, loc: storage)['state_dict']
        state_dict = bodynet.state_dict()
        pretrained_dict = {
            k.replace('bodynet.', ''): v
            for k, v in
            pretrained_dict.items()  # in case of multilabel weight file
            if (k.replace('bodynet.', '') in state_dict.keys()
                and v.shape == state_dict[k.replace('bodynet.', '')].shape)
        }  # number of classes might have changed
        # check which weights will be transferred
        if not pretrained_dict == state_dict:  # some changes were made
            for k in set(state_dict.keys() + pretrained_dict.keys()):
                if k in state_dict.keys() and k not in pretrained_dict.keys():
                    print('\tWeights for "{}" were not found in weight file.'.
                          format(k))
                elif k in pretrained_dict.keys() and k not in state_dict.keys(
                ):
                    print(
                        '\tWeights for "{}" were are not part of the used model.'
                        .format(k))
                elif state_dict[k].shape != pretrained_dict[k].shape:
                    print(
                        '\tShapes of "{}" are different in model ({}) and weight file ({}).'
                        .format(k, state_dict[k].shape,
                                pretrained_dict[k].shape))
                else:  # everything is good
                    pass

        state_dict.update(pretrained_dict)
        bodynet.load_state_dict(state_dict)

    net = OctopusNet(bodynet, n_labels=num_labels)

    n_parameters = sum(
        [p.data.nelement() for p in net.parameters() if p.requires_grad])
    if use_cuda:
        net = net.cuda()
    print('Using {}\n\t+ Number of params: {}'.format(
        str(net).split('(', 1)[0], n_parameters))

    if not os.path.isdir(log_dir):
        os.makedirs(log_dir)

    # tensorboard summary writer
    timestamp = time.strftime('%m-%d-%H-%M')
    expname = timestamp + '_' + str(net).split('(', 1)[0]
    if exp_name is not None:
        expname = expname + '_' + exp_name
    log = TBPlotter(os.path.join(log_dir, 'tensorboard', expname))
    log.print_logdir()

    # allow auto-tuner to find best algorithm for the hardware
    cudnn.benchmark = True

    with open(label_file, 'rb') as f:
        labels = pickle.load(f)['labels']
        n_labeled = '\t'.join(
            [str(Counter(l).items()) for l in labels.transpose()])

    write_config(argvars,
                 os.path.join(log_dir, expname),
                 extras={'n_labeled': n_labeled})

    # ININTIALIZE TRAINING
    optimizer = optim.SGD(net.parameters(), lr=lr, momentum=momentum)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     'min',
                                                     patience=10,
                                                     threshold=1e-1,
                                                     verbose=True)
    criterion = nn.CrossEntropyLoss()
    if use_cuda:
        criterion = criterion.cuda()

    kwargs = {'num_workers': 4} if use_cuda else {}
    trainloader = DataLoader(trainset,
                             batch_size=batch_size,
                             shuffle=True,
                             **kwargs)
    valloader = DataLoader(valset,
                           batch_size=batch_size,
                           shuffle=True,
                           **kwargs)

    # optionally resume from a checkpoint
    start_epoch = 1
    if chkpt is not None:
        if os.path.isfile(chkpt):
            print("=> loading checkpoint '{}'".format(chkpt))
            checkpoint = torch.load(chkpt,
                                    map_location=lambda storage, loc: storage)
            start_epoch = checkpoint['epoch']
            best_acc_score = checkpoint['best_acc_score']
            best_acc = checkpoint['acc']
            net.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                chkpt, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(chkpt))

    def train(epoch):
        losses = AverageMeter()
        accs = AverageMeter()
        class_acc = [AverageMeter() for i in range(len(classes))]

        # switch to train mode
        net.train()
        for batch_idx, (data, target) in enumerate(trainloader):
            if use_cuda:
                data, target = Variable(
                    data.cuda()), [Variable(t.cuda()) for t in target]
            else:
                data, target = Variable(data), [Variable(t) for t in target]

            # compute output
            outputs = net(data)
            preds = [torch.max(outputs[i], 1)[1] for i in range(len(classes))]

            loss = Variable(torch.Tensor([0]),
                            requires_grad=True).type_as(data[0])
            for i, o, t, p in zip(range(len(classes)), outputs, target, preds):
                # filter unlabeled samples if there are any (have label -1)
                labeled = (t != -1).nonzero().view(-1)
                o, t, p = o[labeled], t[labeled], p[labeled]
                loss += criterion(o, t)
                # measure class accuracy and record loss
                class_acc[i].update(
                    (torch.sum(p == t).type(torch.FloatTensor) /
                     t.size(0)).data)
            accs.update(
                torch.mean(
                    torch.stack(
                        [class_acc[i].val for i in range(len(classes))])),
                target[0].size(0))
            losses.update(loss.data, target[0].size(0))

            # compute gradient and do optimizer step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{}]\t'
                      'Loss: {:.4f} ({:.4f})\t'
                      'Acc: {:.2f}% ({:.2f}%)'.format(epoch,
                                                      batch_idx * len(target),
                                                      len(trainloader.dataset),
                                                      float(losses.val),
                                                      float(losses.avg),
                                                      float(accs.val) * 100.,
                                                      float(accs.avg) * 100.))
                print('\t' + '\n\t'.join([
                    '{}: {:.2f}%'.format(classes[i],
                                         float(class_acc[i].val) * 100.)
                    for i in range(len(classes))
                ]))

        # log avg values to somewhere
        log.write('loss', float(losses.avg), epoch, test=False)
        log.write('acc', float(accs.avg), epoch, test=False)
        for i in range(len(classes)):
            log.write('class_acc', float(class_acc[i].avg), epoch, test=False)

    def test(epoch):
        losses = AverageMeter()
        accs = AverageMeter()
        class_acc = [AverageMeter() for i in range(len(classes))]

        # switch to evaluation mode
        net.eval()
        for batch_idx, (data, target) in enumerate(valloader):
            if use_cuda:
                data, target = Variable(
                    data.cuda()), [Variable(t.cuda()) for t in target]
            else:
                data, target = Variable(data), [Variable(t) for t in target]

            # compute output
            outputs = net(data)
            preds = [torch.max(outputs[i], 1)[1] for i in range(len(classes))]

            loss = Variable(torch.Tensor([0]),
                            requires_grad=True).type_as(data[0])
            for i, o, t, p in zip(range(len(classes)), outputs, target, preds):
                labeled = (t != -1).nonzero().view(-1)
                loss += criterion(o[labeled], t[labeled])
                # measure class accuracy and record loss
                class_acc[i].update((torch.sum(p[labeled] == t[labeled]).type(
                    torch.FloatTensor) / t[labeled].size(0)).data)
            accs.update(
                torch.mean(
                    torch.stack(
                        [class_acc[i].val for i in range(len(classes))])),
                target[0].size(0))
            losses.update(loss.data, target[0].size(0))

        score = accs.avg - torch.std(
            torch.stack([class_acc[i].avg for i in range(len(classes))])
        ) / accs.avg  # compute mean - std/mean as measure for accuracy
        print(
            '\nVal set: Average loss: {:.4f} Average acc {:.2f}% Acc score {:.2f} LR: {:.6f}'
            .format(float(losses.avg),
                    float(accs.avg) * 100., float(score),
                    optimizer.param_groups[-1]['lr']))
        print('\t' + '\n\t'.join([
            '{}: {:.2f}%'.format(classes[i],
                                 float(class_acc[i].avg) * 100.)
            for i in range(len(classes))
        ]))
        log.write('loss', float(losses.avg), epoch, test=True)
        log.write('acc', float(accs.avg), epoch, test=True)
        for i in range(len(classes)):
            log.write('class_acc', float(class_acc[i].avg), epoch, test=True)
        return losses.avg.cpu().numpy(), float(score), float(
            accs.avg), [float(class_acc[i].avg) for i in range(len(classes))]

    if start_epoch == 1:  # compute baseline:
        _, best_acc_score, best_acc, _ = test(epoch=0)
    else:  # checkpoint was loaded
        best_acc_score = best_acc_score
        best_acc = best_acc

    for epoch in range(start_epoch, epochs + 1):
        # train for one epoch
        train(epoch)
        # evaluate on validation set
        val_loss, val_acc_score, val_acc, val_class_accs = test(epoch)
        scheduler.step(val_loss)

        # remember best acc and save checkpoint
        is_best = val_acc_score > best_acc_score
        best_acc_score = max(val_acc_score, best_acc_score)
        save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': net.state_dict(),
                'best_acc_score': best_acc_score,
                'acc': val_acc,
                'class_acc': {c: a
                              for c, a in zip(classes, val_class_accs)}
            },
            is_best,
            expname,
            directory=log_dir)

        if val_acc > best_acc:
            shutil.copyfile(
                os.path.join(log_dir, expname + '_checkpoint.pth.tar'),
                os.path.join(log_dir,
                             expname + '_model_best_mean_acc.pth.tar'))
        best_acc = max(val_acc, best_acc)

        if optimizer.param_groups[-1]['lr'] < 1e-5:
            print('Learning rate reached minimum threshold. End training.')
            break

    # report best values
    best = torch.load(os.path.join(log_dir, expname + '_model_best.pth.tar'),
                      map_location=lambda storage, loc: storage)
    print(
        'Finished training after epoch {}:\n\tbest acc score: {}\n\tacc: {}\n\t class acc: {}'
        .format(best['epoch'], best['best_acc_score'], best['acc'],
                best['class_acc']))
    print('Best model mean accuracy: {}'.format(best_acc))