Esempio n. 1
0
def get_dataset(dataset):
    if dataset == 'imagenet':
        transform_train = Compose([
            RandomResizedCrop(C.get()['target_size'] + 32,
                              scale=(0.9, 1.0),
                              interpolation=PIL.Image.BICUBIC),
        ])
        transform_test = Compose([
            Resize(C.get()['target_size'] + 32,
                   interpolation=PIL.Image.BICUBIC)
        ])
        trainset = ImageNet(root=imagenet_path,
                            split='train',
                            transform=transform_train)
        testset1 = ImageNet(root=imagenet_path,
                            split='val',
                            transform=transform_train)
        testset2 = ImageNet(root=imagenet_path,
                            split='val',
                            transform=transform_test)

        trainset.num_class = testset1.num_class = testset2.num_class = 1000
        trainset.targets = [lb for _, lb in trainset.samples]
    else:
        raise ValueError(dataset)
    return trainset, testset1, testset2
Esempio n. 2
0
    def __init__(self, data, target, n_outputs, gpu=-1, index2name=None):

        self.model = ImageNet(n_outputs)
        self.model_name = 'cnn_model'

        if gpu >= 0:
            self.model.to_gpu()

        self.gpu = gpu

        self.x_train,\
        self.x_test,\
        self.y_train,\
        self.y_test = train_test_split(data, target, test_size=0.1)

        self.n_train = len(self.y_train)
        self.n_test = len(self.y_test)

        self.optimizer = optimizers.Adam()
        self.optimizer.setup(self.model)

        self.train_loss = []
        self.test_loss = []

        self.index2name = index2name
def load_dataset(dataset_name, download=False):
    def _transform(pil):
        return np.array(pil).astype('float32') / 255.

    if dataset_name == 'mnist':
        dataset = MNIST(root='./datasets/',
                        train=False,
                        transform=_transform,
                        download=download)
    if dataset_name == 'cifar10':
        dataset = CIFAR10(root='./datasets/',
                          train=False,
                          transform=_transform,
                          download=download)
    if dataset_name == 'imagenet':
        dataset = ImageNet(root='./datasets/',
                           split='val',
                           transform=_transform,
                           download=download)

    n_sample = 64
    dataset = Subset(dataset,
                     indices=list(
                         range(0, len(dataset),
                               len(dataset) // n_sample))[:n_sample])

    return dataset
Esempio n. 4
0
def main():
    set_gpu('0')
    save_path = 'features/test_new_domain_miniimagenet/'
    test_set = ImageNet(root='../cross-domain-fsl/dataset/mini-imagenet/test_new_domain')
    val_loader = DataLoader(dataset=test_set, batch_size=1, shuffle=False, num_workers=8,
                            pin_memory=True)
    model = resnet50()
    model = torch.nn.DataParallel(model).cuda()
    model.load_state_dict(torch.load('save/proto-5/max-acc.pth'))

    # model_dict = model.state_dict()
    # pretrained_dict = model_zoo.load_url('https://download.pytorch.org/models/resnet50-19c8e357.pth')
    # # 1. filter out unnecessary keys
    # pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
    # # 2. overwrite entries in the existing state dict
    # print pretrained_dict
    # model_dict.update(pretrained_dict)
    # # 3. load the new state dict
    # model.load_state_dict(model_dict)
    model = model.cuda()
    model.eval()
    # model = torch.nn.DataParallel(model).cuda()
    features = [[] for i in range(359)]
    for (image, label) in val_loader:
        image = image.cuda()
        label = label.numpy()
        feature = model(image)
        feature = feature.data.cpu().numpy()
        # print feature.shape[0]
        for j in range(feature.shape[0]):
            features[int(label[j])].append(feature[j])
    for i in range(359):
        save_file = os.path.join(save_path, str(i)+'.txt')
        feature_np = np.asarray(features[i])
        np.savetxt(save_file, feature_np)
Esempio n. 5
0
def image():
    file_val = request.files['file']
    filename = file_val.filename
    image_path = os.path.join(UPLOAD_FOLDER, filename)
    file_val.save(image_path)

    pred = "It can be a " + ImageNet(image_path)

    return jsonify({'image_path': image_path, 'reply': pred})
Esempio n. 6
0
    parser.add_argument('--start_index', '-st', default=0, type=int)
    parser.add_argument('--batch_size', default=1000, type=int)
    parser.add_argument('--out_dir', default='../results/tar_adv_img/')
    parser.add_argument('--defense', default=0, type=int)
    args = parser.parse_known_args()[0]
    imgPath = '../results/tar_adv_img/'  # 加载原图
    pretrained_path = '../results/adv_border_chk/'
    out_dir = args.out_dir  #保存目标攻击defense or 未经防御的对抗样本
    os.makedirs(out_dir, exist_ok=True)

    # Use GPUs
    device = torch.device("cpu")
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    data_loader = ImageNet.get_data_loaders(data_path=imgPath,
                                            batch_size=1,
                                            num_workers=0,
                                            normalize=False)

    classifier = ImageNet.get_classifier().to(device)
    classifier.eval()
    chk_name = os.listdir(pretrained_path)
    chk_name.sort()  # necessary in linux

    img_names = os.listdir('../results/adv_border_png/')
    img_names.sort()

    cnt = 0
    cnt_effect = 0
    cnt_total = 0
    cnt_temp = 0
    for i, (input, target) in enumerate(data_loader):
        corrupt_idx = 2

    if args.corrupt != '':
        corrupt_type, corrupt_level = args.corrupt.split(':')
        corrupt_level = int(corrupt_level)
        print(f'corruption {corrupt_type} : {corrupt_level}')

        from imagenet_c import corrupt
        if not corrupt_type.isdigit():
            ts.insert(corrupt_idx, lambda img: PIL.Image.fromarray(corrupt(np.array(img), corrupt_level, corrupt_type)))
        else:
            ts.insert(corrupt_idx, lambda img: PIL.Image.fromarray(corrupt(np.array(img), corrupt_level, None, int(corrupt_type))))

    transform_test = transforms.Compose(ts)

    testset = ImageNet(root='/data/public/rw/datasets/imagenet-pytorch', split='val', transform=transform_test)
    sss = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
    for _ in range(1):
        sss = sss.split(list(range(len(testset))), testset.targets)
    train_idx, valid_idx = next(sss)
    testset = Subset(testset, valid_idx)

    testloader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=32, pin_memory=True, drop_last=False)

    metric = Accumulator()
    dl_test = tqdm(testloader)
    data_id = 0
    tta_rule_cnt = [0] * tta_num
    for data, label in dl_test:
        data = data.view(-1, data.shape[-3], data.shape[-2], data.shape[-1])
        data = data.cuda()
Esempio n. 8
0
class CNN:
    def __init__(self, data, target, n_outputs, gpu=-1, index2name=None):

        self.model = ImageNet(n_outputs)
        self.model_name = 'cnn_model'

        if gpu >= 0:
            self.model.to_gpu()

        self.gpu = gpu

        self.x_train,\
        self.x_test,\
        self.y_train,\
        self.y_test = train_test_split(data, target, test_size=0.1)

        self.n_train = len(self.y_train)
        self.n_test = len(self.y_test)

        self.optimizer = optimizers.Adam()
        self.optimizer.setup(self.model)

        self.train_loss = []
        self.test_loss = []

        self.index2name = index2name

    def predict(self, x_data, gpu=-1):
        index = self.model.predict(x_data, gpu)
        if self.index2name is None:
            return index
        else:
            return self.index2name[index]

    def train_and_test(self, n_epoch=100, batchsize=100):

        epoch = 1
        best_accuracy = 0
        while epoch <= n_epoch:
            print 'epoch', epoch

            perm = np.random.permutation(self.n_train)
            sum_train_accuracy = 0
            sum_train_loss = 0
            for i in xrange(0, self.n_train, batchsize):
                x_batch = self.x_train[perm[i:i+batchsize]]
                y_batch = self.y_train[perm[i:i+batchsize]]

                real_batchsize = len(x_batch)

                self.optimizer.zero_grads()
                loss, acc = self.model.forward(x_batch, y_batch, train=True, gpu=self.gpu)
                loss.backward()
                self.optimizer.update()
                # print(type(loss))
                sum_train_loss += float(loss.data.get()) * real_batchsize
                sum_train_accuracy += float(acc.data.get()) * real_batchsize

            print 'train mean loss={}, accuracy={}'.format(sum_train_loss/self.n_train, sum_train_accuracy/self.n_train)
            self.train_loss.append(sum_train_accuracy/self.n_train)

            # evaluation
            sum_test_accuracy = 0
            sum_test_loss = 0
            for i in xrange(0, self.n_test, batchsize):
                x_batch = self.x_test[i:i+batchsize]
                y_batch = self.y_test[i:i+batchsize]

                real_batchsize = len(x_batch)

                loss, acc = self.model.forward(x_batch, y_batch, train=False, gpu=self.gpu)

                sum_test_loss += float(cuda.to_cpu(loss.data)) * real_batchsize
                sum_test_accuracy += float(cuda.to_cpu(acc.data)) * real_batchsize

            print 'test mean loss={}, accuracy={}'.format(sum_test_loss/self.n_test, sum_test_accuracy/self.n_test)
            self.test_loss.append(sum_test_accuracy/self.n_test)
            epoch += 1
        plt.plot(self.test_loss)
        plt.plot(self.train_loss)
        print('...save graph')
        plt.savefig('result.png')

    def dump_model(self):
        self.model.to_cpu()
        pickle.dump(self.model, open(self.model_name, 'wb'),-1)

    def load_model(self):
        self.model = pickle.load(open(self.model_name,'rb'))
        if self.gpu >= 0:
            self.model.to_gpu()
        self.optimizer.setup(self.model)
Esempio n. 9
0
#!/usr/bin/env python

from nltk.corpus import wordnet as wn


from imagenet import ImageNet

CLASSES = '../../Classes/classes.txt'
OUTFILE = 'definitions.txt'

inet = ImageNet(CLASSES)

with open(OUTFILE, 'w') as f:
    for i in range(0, 1000):
        w = inet.name(i)
        w_ = w.replace(' ', '_')
        ss = wn.synsets(w_)
        print w
        if not ss:
            print "ERR: %d %s not found" % (i, w)
        for s in ss:
            f.write(w_ + " " + s.definition() + "\n")

        
Esempio n. 10
0
MODELS = {
    'googlenet': 'bvlc_googlenet',
    'places': 'googlenet_places205',
    'oxford': 'oxford102',
    'cnn_age': 'cnn_age',
    'cnn_gender': 'cnn_gender',
    'caffenet': 'bvlc_reference_caffenet',
    'ilsvrc13': 'bvlc_reference_rcnn_ilsvrc13',
    'flickr_style': 'finetune_flickr_style'
    #    'cars' : 'cars'
}

MODEL = 'caffenet'
model_name = MODELS[MODEL]

classes = ImageNet(CLASSES)

parser = argparse.ArgumentParser()
parser.add_argument("n", type=str, help="Number of classes")
parser.add_argument("sample", type=str, help="Sample size for next iter")
parser.add_argument("image", type=str, help="The image to classify")
parser.add_argument("output", type=str, help="Output json config")

args = parser.parse_args()

if not os.path.isfile(args.image):
    print "%s is not a readable file" % args.input
    sys.exit(-1)

caffe.set_mode_cpu()
model_d = os.path.join(CAFFE_MODELS, model_name)
Esempio n. 11
0
def get_dataloaders(dataset,
                    batch,
                    num_workers,
                    dataroot,
                    ops_names,
                    magnitudes,
                    cutout,
                    cutout_length,
                    split=0.5,
                    split_idx=0,
                    target_lb=-1):
    """

    Args:
        dataset: str
        batch: int
        num_workers: int
        dataroot: the dataset dir
        ops_names: list[tuple], [N=105, K=2], str
        magnitudes: tensor, shape [N, k]
        cutout: boolean,
        cutout_length: int
        split: float, default 0.5
        split_idx: int, the number of the next(StratifiedShuffleSplit.split) function is called is equal `split_idx` + 1
        target_lb: int, target label, if `target_lb` > 0, the train_label only include the `target_lb`

    Returns:

    """
    if 'cifar' in dataset or 'svhn' in dataset:
        transform_train_pre = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
        ])
        transform_train_after = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD),
        ])
        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD),
        ])
    elif 'imagenet' in dataset:
        transform_train_pre = transforms.Compose([
            transforms.RandomResizedCrop(224,
                                         scale=(0.08, 1.0),
                                         interpolation=Image.BICUBIC),
            transforms.RandomHorizontalFlip(),
            transforms.ColorJitter(
                brightness=0.4,
                contrast=0.4,
                saturation=0.4,
            ),
        ])
        transform_train_after = transforms.Compose([
            transforms.ToTensor(),
            Lighting(0.1, _IMAGENET_PCA['eigval'], _IMAGENET_PCA['eigvec']),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
        transform_test = transforms.Compose([
            transforms.Resize(256, interpolation=Image.BICUBIC),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])
    else:
        raise ValueError('dataset=%s' % dataset)

    if cutout and cutout_length != 0:
        transform_train_after.transforms.append(CutoutDefault(cutout_length))

    if dataset == 'cifar10':
        total_trainset = torchvision.datasets.CIFAR10(root=dataroot,
                                                      train=True,
                                                      download=True,
                                                      transform=None)
        total_trainset.train_data = total_trainset.train_data[:100]
        total_trainset.train_labels = total_trainset.train_labels[:100]
        # testset = torchvision.datasets.CIFAR10(root=dataroot, train=False, download=True, transform=None)
        total_trainset.targets = total_trainset.train_labels
    elif dataset == 'reduced_cifar10':
        total_trainset = torchvision.datasets.CIFAR10(root=dataroot,
                                                      train=True,
                                                      download=True,
                                                      transform=None)
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=46000,
                                     random_state=0)  # 4000 trainset
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.train_labels)
        train_idx, valid_idx = next(sss)
        targets = [total_trainset.train_labels[idx] for idx in train_idx]
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        # testset = torchvision.datasets.CIFAR10(root=dataroot, train=False, download=True, transform=None)
    elif dataset == 'cifar100':
        total_trainset = torchvision.datasets.CIFAR100(root=dataroot,
                                                       train=True,
                                                       download=True,
                                                       transform=None)
        total_trainset.targets = total_trainset.test_labels
        # testset = torchvision.datasets.CIFAR100(root=dataroot, train=False, download=True, transform=transform_test)
    elif dataset == 'reduced_cifar100':
        total_trainset = torchvision.datasets.CIFAR100(root=dataroot,
                                                       train=True,
                                                       download=True,
                                                       transform=None)
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=46000,
                                     random_state=0)  # 4000 trainset
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.targets)
        train_idx, valid_idx = next(sss)
        targets = [total_trainset.targets[idx] for idx in train_idx]
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        # testset = torchvision.datasets.CIFAR10(root=dataroot, train=False, download=True, transform=None)
    elif dataset == 'svhn':
        trainset = torchvision.datasets.SVHN(root=dataroot,
                                             split='train',
                                             download=True,
                                             transform=None)
        extraset = torchvision.datasets.SVHN(root=dataroot,
                                             split='extra',
                                             download=True,
                                             transform=None)
        total_trainset = ConcatDataset([trainset, extraset])
        # testset = torchvision.datasets.SVHN(root=dataroot, split='test', download=True, transform=transform_test)
    elif dataset == 'reduced_svhn':
        total_trainset = torchvision.datasets.SVHN(root=dataroot,
                                                   split='train',
                                                   download=True,
                                                   transform=None)
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=73257 - 1000,
                                     random_state=0)  # 1000 trainset
        # sss = sss.split(list(range(len(total_trainset))), total_trainset.targets)
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.labels)
        train_idx, valid_idx = next(sss)
        # targets = [total_trainset.targets[idx] for idx in train_idx]
        targets = [total_trainset.labels[idx] for idx in train_idx]
        total_trainset = Subset(total_trainset, train_idx)
        # total_trainset.targets = targets
        total_trainset.labels = targets
        total_trainset.targets = targets

        # testset = torchvision.datasets.SVHN(root=dataroot, split='test', download=True, transform=transform_test)
    elif dataset == 'imagenet':
        total_trainset = ImageNet(root=os.path.join(dataroot,
                                                    'imagenet-pytorch'),
                                  download=True,
                                  transform=None)
        # testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), split='val', transform=transform_test)

        # compatibility
        total_trainset.targets = [lb for _, lb in total_trainset.samples]
    elif dataset == 'reduced_imagenet':
        # randomly chosen indices
        idx120 = [
            904, 385, 759, 884, 784, 844, 132, 214, 990, 786, 979, 582, 104,
            288, 697, 480, 66, 943, 308, 282, 118, 926, 882, 478, 133, 884,
            570, 964, 825, 656, 661, 289, 385, 448, 705, 609, 955, 5, 703, 713,
            695, 811, 958, 147, 6, 3, 59, 354, 315, 514, 741, 525, 685, 673,
            657, 267, 575, 501, 30, 455, 905, 860, 355, 911, 24, 708, 346, 195,
            660, 528, 330, 511, 439, 150, 988, 940, 236, 803, 741, 295, 111,
            520, 856, 248, 203, 147, 625, 589, 708, 201, 712, 630, 630, 367,
            273, 931, 960, 274, 112, 239, 463, 355, 955, 525, 404, 59, 981,
            725, 90, 782, 604, 323, 418, 35, 95, 97, 193, 690, 869, 172
        ]
        total_trainset = ImageNet(root=os.path.join(dataroot,
                                                    'imagenet-pytorch'),
                                  transform=None)
        # testset = ImageNet(root=os.path.join(dataroot, 'imagenet-pytorch'), split='val', transform=transform_test)

        # compatibility
        total_trainset.targets = [lb for _, lb in total_trainset.samples]

        # sss = StratifiedShuffleSplit(n_splits=1, test_size=len(total_trainset) - 6000, random_state=0)  # 4000 trainset
        # sss = StratifiedShuffleSplit(n_splits=1, test_size=0, random_state=0)  # 4000 trainset
        # sss = sss.split(list(range(len(total_trainset))), total_trainset.targets)
        # train_idx, valid_idx = next(sss)
        # print(len(train_idx), len(valid_idx))

        # filter out
        # train_idx = list(filter(lambda x: total_trainset.labels[x] in idx120, train_idx))
        # valid_idx = list(filter(lambda x: total_trainset.labels[x] in idx120, valid_idx))
        # # test_idx = list(filter(lambda x: testset.samples[x][1] in idx120, range(len(testset))))
        train_idx = list(range(len(total_trainset)))

        filter_train_idx = list(
            filter(lambda x: total_trainset.targets[x] in idx120, train_idx))
        # valid_idx = list(filter(lambda x: total_trainset.targets[x] in idx120, valid_idx))
        # test_idx = list(filter(lambda x: testset.samples[x][1] in idx120, range(len(testset))))
        # print(len(filter_train_idx))

        targets = [
            idx120.index(total_trainset.targets[idx])
            for idx in filter_train_idx
        ]
        sss = StratifiedShuffleSplit(n_splits=1,
                                     test_size=len(filter_train_idx) - 6000,
                                     random_state=0)  # 4000 trainset
        sss = sss.split(list(range(len(filter_train_idx))), targets)
        train_idx, valid_idx = next(sss)
        train_idx = [filter_train_idx[x] for x in train_idx]
        valid_idx = [filter_train_idx[x] for x in valid_idx]

        targets = [
            idx120.index(total_trainset.targets[idx]) for idx in train_idx
        ]
        for idx in range(len(total_trainset.samples)):
            if total_trainset.samples[idx][1] not in idx120:
                continue
            total_trainset.samples[idx] = (total_trainset.samples[idx][0],
                                           idx120.index(
                                               total_trainset.samples[idx][1]))
        total_trainset = Subset(total_trainset, train_idx)
        total_trainset.targets = targets

        # for idx in range(len(testset.samples)):
        #     if testset.samples[idx][1] not in idx120:
        #         continue
        #     testset.samples[idx] = (testset.samples[idx][0], idx120.index(testset.samples[idx][1]))
        # testset = Subset(testset, test_idx)
        print('reduced_imagenet train=', len(total_trainset))
    else:
        raise ValueError('invalid dataset name=%s' % dataset)

    train_sampler = None
    if split > 0.0:
        sss = StratifiedShuffleSplit(n_splits=5,
                                     test_size=split,
                                     random_state=0)
        sss = sss.split(list(range(len(total_trainset))),
                        total_trainset.targets)
        for _ in range(split_idx + 1):
            train_idx, valid_idx = next(sss)

        if target_lb >= 0:
            train_idx = [
                i for i in train_idx if total_trainset.targets[i] == target_lb
            ]
            valid_idx = [
                i for i in valid_idx if total_trainset.targets[i] == target_lb
            ]

        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetSampler(valid_idx)

        # if horovod:
        #     import horovod.torch as hvd
        #     train_sampler = torch.utils.data.distributed.DistributedSampler(train_sampler, num_replicas=hvd.size(), rank=hvd.rank())
    else:
        valid_sampler = SubsetSampler([])

        # if horovod:
        #     import horovod.torch as hvd
        #     train_sampler = torch.utils.data.distributed.DistributedSampler(valid_sampler, num_replicas=hvd.size(), rank=hvd.rank())
    train_data = AugmentDataset(total_trainset, transform_train_pre,
                                transform_train_after, transform_test,
                                ops_names, True, magnitudes)
    valid_data = AugmentDataset(total_trainset, transform_train_pre,
                                transform_train_after, transform_test,
                                ops_names, False, magnitudes)

    trainloader = torch.utils.data.DataLoader(train_data,
                                              batch_size=batch,
                                              shuffle=False,
                                              sampler=train_sampler,
                                              drop_last=False,
                                              pin_memory=True,
                                              num_workers=num_workers)

    validloader = torch.utils.data.DataLoader(
        valid_data,
        batch_size=batch,
        # sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]),
        sampler=valid_sampler,
        drop_last=False,
        pin_memory=True,
        num_workers=num_workers)

    # trainloader = torch.utils.data.DataLoader(
    #     total_trainset, batch_size=batch, shuffle=True if train_sampler is None else False, num_workers=32, pin_memory=True,
    #     sampler=train_sampler, drop_last=True)
    # validloader = torch.utils.data.DataLoader(
    #     total_trainset, batch_size=batch, shuffle=False, num_workers=16, pin_memory=True,
    #     sampler=valid_sampler, drop_last=False)

    # testloader = torch.utils.data.DataLoader(
    #     testset, batch_size=batch, shuffle=False, num_workers=32, pin_memory=True,
    #     drop_last=False
    # )
    print(len(train_data))
    return trainloader, validloader
Esempio n. 12
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        description='Draw examples of attacked ImageNet examples')
    parser.add_argument('--framing',
                        required=True,
                        help='Path to pretrained framing')
    parser.add_argument('--output',
                        '-o',
                        default='examples.png',
                        help='Output file')
    args = parser.parse_args()

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    _, data_loader = ImageNet.get_data_loaders(batch_size=BATCH_SIZE,
                                               num_workers=0,
                                               normalize=False,
                                               shuffle_val=True)

    framing = load_pretrained_imagenet_framing(args.framing).to(device)
    classifier = resnet50(pretrained=True).to(device)
    classifier.eval()

    input, target = next(iter(data_loader))
    input = input.to(device)

    with torch.no_grad():
        input_att, _ = framing(input, normalize=False)

    normalized_input = input.clone()
    normalized_input_att = input_att.clone()
    for id in range(BATCH_SIZE):
Esempio n. 13
0
def get_imagenet(data_dir, upscale_factor, patch_size, data_augmentation=False, split="train", dataset_ratio=1):
    return ImageNet(data_dir, upscale_factor, patch_size, data_augmentation, split, dataset_ratio)
Esempio n. 14
0
# SCALE = "8"
# BLEND = "90"

outfile = os.path.join(OUTDIR, "neuralgia6.txt")

start_targets = random.sample(range(0, 1000), NSTART)

#conffile = os.path.join(OUTDIR, 'conf0.json')

#neuralgia.write_config(start_targets, conffile)

#subprocess.call(["./neuralgia.sh", OUTDIR, 'image0', conffile, SIZE, SCALE, BLEND])

lastimage = "./Neuralgia/Chapter5/image199.jpg"

imagen = ImageNet("./Classes/classes.txt")

classes = ', '.join([imagen.name(c) for c in start_targets])

with open(outfile, 'w') as f:
    f.write("%s: %s\n" % (lastimage, classes))




for i in range(0,1):
    jsonfile = "./Neuralgia/conf%d.json" % i
    subprocess.call(["./classify.py", str(NTWEEN), str(NSAMPLE), lastimage, jsonfile])
    subprocess.call(["./neuralgia.sh", OUTDIR, "image%d" % i, jsonfile, SIZE, SCALE, BLEND])
    lastimage = "./Neuralgia/image%d.jpg" % i
    t = neuralgia.read_config(jsonfile)
Esempio n. 15
0
# BLEND = "90"

outfile = os.path.join(OUTDIR, "twitter1.txt")

start_targets = random.sample(range(0, 1000), NSTART)

conffile = os.path.join(OUTDIR, 'conf0.json')

neuralgia.write_config(start_targets, conffile)

subprocess.call(["./neuralgae_draw.sh", OUTDIR, 'image0', conffile, SIZE, SCALE, BLEND])

#lastimage = "./Neuralgia/Chapter5/image199.jpg"
lastimage = os.path.join(OUTDIR, 'image0.jpg')

imagen = ImageNet("./Classes/classes.txt")

classes = ', '.join([imagen.name(c) for c in start_targets])

with open(outfile, 'w') as f:
    f.write("%s: %s\n" % (lastimage, classes))




for i in range(0,20):
    jsonfile = os.path.join(OUTDIR, ("conf%d.json" % i))
    subprocess.call(["./classify.py", str(NTWEEN), str(NSAMPLE), lastimage, jsonfile])
    subprocess.call(["./neuralgae_draw.sh", OUTDIR, "image%d" % i, jsonfile, SIZE, SCALE, BLEND])
    lastimage = os.path.join(OUTDIR, ("image%d.jpg" % i))
    t = neuralgia.read_config(jsonfile)
Esempio n. 16
0
MODELS = {
    'googlenet': 'bvlc_googlenet',
    'places': 'googlenet_places205',
    'oxford': 'oxford102',
    'cnn_age': 'cnn_age',
    'cnn_gender': 'cnn_gender',
    'caffenet': 'bvlc_reference_caffenet',
    'ilsvrc13': 'bvlc_reference_rcnn_ilsvrc13',
    'flickr_style': 'finetune_flickr_style'
#    'cars' : 'cars'
}

MODEL = 'caffenet'
model_name = MODELS[MODEL]

classes = ImageNet(CLASSES)

parser = argparse.ArgumentParser()
parser.add_argument("n",            type=str, help="Number of classes")
parser.add_argument("sample",       type=str, help="Sample size for next iter")
parser.add_argument("image",        type=str, help="The image to classify")
parser.add_argument("output",       type=str, help="Output json config")

args = parser.parse_args()

if not os.path.isfile(args.image):
    print "%s is not a readable file" % args.input
    sys.exit(-1)

caffe.set_mode_cpu()
model_d = os.path.join(CAFFE_MODELS, model_name)
Esempio n. 17
0
def main_worker(gpu, ngpus_per_node, args):
    CHECKPOINT_ID = "{}_{}epochs_{}bsz_{:0.4f}lr" \
        .format(args.id[:5], args.epochs, args.batch_size, args.lr)
    if args.mlp:
        CHECKPOINT_ID += "_mlp"
    if args.aug_plus:
        CHECKPOINT_ID += "_augplus"
    if args.cos:
        CHECKPOINT_ID += "_cos"
    if args.faa_aug:
        CHECKPOINT_ID += "_faa"
    if args.randomcrop:
        CHECKPOINT_ID += "_randcrop"
    if args.rotnet:
        CHECKPOINT_ID += "_rotnet"
    if args.rand_aug:
        CHECKPOINT_ID += "_randaug"
    if not (args.kfold == None):
        CHECKPOINT_ID += "_fold_%d" % (args.kfold)
    if not (args.custom_aug_name == None):
        CHECKPOINT_ID += "_custom_aug_" + args.custom_aug_name

    CHECKPOINT_ID += args.dataid

    args.gpu = gpu

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:

        def print_pass(*args):
            pass

        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)
    # create model
    heads = {}
    if not args.nomoco:
        heads["moco"] = {"num_classes": args.moco_dim}
    if args.rotnet:
        heads["rotnet"] = {"num_classes": 4}
    model = moco.builder.MoCo(models.__dict__[args.arch],
                              K=args.moco_k,
                              m=args.moco_m,
                              T=args.moco_t,
                              mlp=args.mlp,
                              dataid=args.dataid,
                              multitask_heads=heads)
    print(model)

    # setup file structure for saving
    pathlib.Path(args.checkpoint_fp).mkdir(parents=True, exist_ok=True)

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(
                (args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
        # comment out the following line for debugging
        # raise NotImplementedError("Only DistributedDataParallel is supported.")
    else:
        # AllGather implementation (batch shuffle, queue update, etc.) in
        # this code only supports DistributedDataParallel.
        raise NotImplementedError("Only DistributedDataParallel is supported.")

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            args.id = checkpoint['id']
            args.name = checkpoint['name']
            CHECKPOINT_ID = checkpoint['name']
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code

    # Set up crops and normalization depending on the dataset.

    # Cifar 10 crops and normalization.
    if args.dataid == "cifar10" or args.dataid == "svhn":
        _CIFAR_MEAN, _CIFAR_STD = (0.4914, 0.4822, 0.4465), (0.2023, 0.1994,
                                                             0.2010)
        normalize = transforms.Normalize(mean=_CIFAR_MEAN, std=_CIFAR_STD)
        if not args.randomcrop:
            random_resized_crop = transforms.RandomResizedCrop(
                28, scale=(args.rrc_param, 1.))
        else:
            # Use the crop they were using in Fast AutoAugment.
            random_resized_crop = transforms.RandomCrop(32, padding=4)

    # Use the imagenet parameters.
    elif args.dataid == "imagenet" or args.dataid == "logos":
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])
        random_resized_crop = transforms.RandomResizedCrop(224,
                                                           scale=(0.2, 1.))

    if args.aug_plus and (args.faa_aug or args.rand_aug or args.rand_aug_orig
                          or not (args.custom_aug_name == None)):
        raise Exception("Cannot have multiple augs on command line")

    if args.aug_plus:
        # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709
        augmentation = [
            random_resized_crop,
            transforms.RandomApply(
                [
                    transforms.ColorJitter(0.4, 0.4, 0.4,
                                           0.1)  # not strengthened
                ],
                p=0.8),
            transforms.RandomGrayscale(p=0.2),
            transforms.RandomApply(
                [moco.loader.GaussianBlur([args.sigma / 20, args.sigma])],
                p=0.5),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]
    elif args.faa_aug:
        augmentation, _ = slm_utils.get_faa_transforms.get_faa_transforms_cifar_10(
            args.randomcrop, args.gauss)
        transformations = moco.loader.TwoCropsTransform(augmentation)
    elif args.rand_aug_orig:
        print("Using random aug original")
        augmentation = [
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            RandAugment(args.rand_aug_n, args.rand_aug_m),
            transforms.ToTensor(), normalize
        ]
    elif args.rand_aug:
        randaug_n = args.rand_aug_n
        if args.rand_aug_linear_m:
            print("Using random aug with linear m")
            randaug_m = args.rand_aug_m_min
        else:
            randaug_m = args.rand_aug_m
            print("Using random aug")
        if args.rand_aug_top_k > 0:
            randaug = TopRandAugment(randaug_n, randaug_m, args.rand_aug_top_k)
        else:
            randaug = RandAugment(randaug_n, randaug_m)
        augmentation = [
            random_resized_crop,
            transforms.RandomHorizontalFlip(), randaug,
            transforms.ToTensor(), normalize
        ]
    elif args.rand_resize_only and args.custom_aug_name == None:
        print("Using random resize only")
        augmentation = [
            random_resized_crop,
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize
        ]
    elif not args.custom_aug_name == None:
        augmentation, _ = slm_utils.get_faa_transforms.load_custom_transforms(
            name=args.custom_aug_name,
            randomcrop=args.randomcrop,
            aug_idx=args.single_aug_idx,
            dataid=args.dataid)

        print('using custom augs', augmentation)

        transformations = moco.loader.TwoCropsTransform(augmentation)
    else:
        # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978
        print('using v1 augs')
        augmentation = [
            random_resized_crop,
            transforms.RandomGrayscale(p=0.2),
            transforms.ColorJitter(0.4, 0.4, 0.4, 0.4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(), normalize
        ]

    if not args.faa_aug and args.custom_aug_name == None:
        print('using augmentation', augmentation)
        transformations = moco.loader.TwoCropsTransform(
            transforms.Compose(augmentation))

    print('xforms', transformations)

    if args.dataid == "imagenet" and not args.reduced_imgnet:
        train_dataset = datasets.ImageFolder(args.data, transformations)

    elif args.dataid == "logos" and not args.reduced_imgnet:
        train_dataset = data_loader.GetLoader(
            data_root=args.data,
            data_list='train_images_root.txt',
            transform=transformations)

    elif (args.dataid == "imagenet"
          or args.dataid == 'logos') and args.reduced_imgnet:
        # idx120 = [16, 23, 52, 57, 76, 93, 95, 96, 99, 121, 122, 128, 148, 172, 181, 189, 202, 210, 232, 238, 257, 258, 259, 277, 283, 289, 295, 304, 307, 318, 322, 331, 337, 338, 345, 350, 361, 375, 376, 381, 388, 399, 401, 408, 424, 431, 432, 440, 447, 462, 464, 472, 483, 497, 506, 512, 530, 541, 553, 554, 557, 564, 570, 584, 612, 614, 619, 626, 631, 632, 650, 657, 658, 660, 674, 675, 680, 682, 691, 695, 699, 711, 734, 736, 741, 754, 757, 764, 769, 770, 780, 781, 787, 797, 799, 811, 822, 829, 830, 835, 837, 842, 843, 845, 873, 883, 897, 900, 902, 905, 913, 920, 925, 937, 938, 940, 941, 944, 949, 959]

        if args.dataid == "imagenet":
            total_trainset = ImageNet(
                root=args.data, transform=transformations
            )  # TODO for LINCLS, make this train and test xforms.

        else:
            total_trainset = data_loader.GetLoader(
                data_root=args.data,
                data_list='train_images_root.txt',
                transform=transformations)

        train_idx = np.arange(len(total_trainset))

        np.random.seed(1337)  #fingers crossed.
        np.random.shuffle(train_idx)
        train_idx = train_idx[:50000]

        kfold = args.kfold

        print('KFOLD BEING USED', kfold)
        subset = np.arange(kfold * 10000, (kfold + 1) * 10000)
        print('start', 'end', kfold * 10000, (kfold + 1) * 10000)
        valid_idx = train_idx[subset]
        train_idx = np.delete(train_idx, subset)

        print('first val_idx', valid_idx[:10])

        train_dataset = total_trainset

        train_dataset = Subset(train_dataset, train_idx)
        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetSampler(valid_idx)

        print(len(train_dataset))
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
        print(len(train_dataset))

        print('first 10 train', train_idx[:10])
        print('first 10 valid', valid_idx[:10])
        print('len train', len(train_idx))
        print('len valid', len(valid_idx))

        for i in valid_idx:
            if i in train_idx:
                raise Exception("Valid idx in train idx: this is unexpected")
        print('train_sampler', train_sampler)

    elif args.dataid == "cifar10":
        train_dataset = torchvision.datasets.CIFAR10(args.data,
                                                     transform=transformations,
                                                     download=True)

    elif args.dataid == "svhn":
        train_dataset = torchvision.datasets.SVHN(args.data,
                                                  transform=transformations,
                                                  download=True)
    else:
        raise NotImplementedError(
            "Support for the following dataset is not yet implemented: {}".
            format(args.dataid))

    if not args.kfold == None and not args.reduced_imgnet:
        torch.manual_seed(1337)
        print('before: K FOLD', args.kfold, len(train_dataset))
        lengths = [len(train_dataset) // 5] * 5
        print(lengths)
        lengths[-1] = int(lengths[-1] + (len(train_dataset) - np.sum(lengths)))
        print(lengths)
        folds = torch.utils.data.random_split(train_dataset, lengths)
        print(len(folds))
        folds.pop(args.kfold)
        print(len(folds))
        train_dataset = torch.utils.data.ConcatDataset(folds)
        print(len(train_dataset))

    else:
        print("NO KFOLD ARG", args.kfold, ' or ', args.reduced_imgnet)

    if args.distributed and not args.reduced_imgnet:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    elif not args.reduced_imgnet:
        train_sampler = None
    print('train sampler', train_sampler)

    torch.manual_seed(1337)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler,
                                               drop_last=True)

    print(len(train_loader))

    # CR: only the master will report to wandb for now
    if not args.multiprocessing_distributed or args.rank % ngpus_per_node == 0:
        wandb.init(project=args.wandbproj,
                   name=CHECKPOINT_ID,
                   id=args.id,
                   resume=args.resume,
                   config=args.__dict__,
                   notes=args.notes)
        print(model)

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        if args.rand_aug_linear_m:
            mval = args.rand_aug_m_min + math.floor(
                float(epoch) / float(args.epochs) *
                (args.rand_aug_m_max - args.rand_aug_m_min + 1))
            print("Rand aug m: {}".format(mval))
            randaug.m = mval

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args,
              CHECKPOINT_ID)

        # save current epoch
        if not args.nosave_latest and (not args.multiprocessing_distributed
                                       or args.rank % ngpus_per_node == 0):
            print("saving latest epoch")
            cp_filename = "{}_latest.tar".format(CHECKPOINT_ID[:5])
            cp_fullpath = os.path.join(args.checkpoint_fp, cp_filename)
            torch.save(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'id': args.id,
                    'name': CHECKPOINT_ID,
                }, cp_fullpath)
            print("saved latest epoch")


        if (epoch % args.checkpoint_interval == 0 or epoch == args.epochs-1) \
           and (not args.multiprocessing_distributed or
                (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0)):
            cp_filename = "{}_{:04d}.tar".format(CHECKPOINT_ID, epoch)
            cp_fullpath = os.path.join(args.checkpoint_fp, cp_filename)
            torch.save(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'id': args.id,
                    'name': CHECKPOINT_ID,
                }, cp_fullpath)
            if args.upload_checkpoints:
                print("Uploading wandb checkpoint")
                wandb.save(cp_fullpath)
            if epoch == args.epochs - 1:
                print("Saving final results to wandb")
                wandb.save(cp_fullpath)

    print("Done - wrapping up")
Esempio n. 18
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    # suppress printing if not master
    if args.multiprocessing_distributed and args.gpu != 0:
        def print_pass(*args):
            pass
        builtins.print = print_pass

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)
    # create model
    print("=> creating model '{}'".format(args.arch))
    model = models.__dict__[args.arch]()

    # CIFAR 10 mod

    if args.dataid =="cifar10" or args.dataid =="svhn":
    # use the layer the SIMCLR authors used for cifar10 input conv, checked all padding/strides too.
        model.conv1 = nn.Conv2d(3, 64, kernel_size=(3, 3), stride=(1,1), padding=(1,1), bias=False)
        model.maxpool = nn.Identity()
        n_output_classes = 10
        model.fc = torch.nn.Linear(model.fc.in_features, n_output_classes)

    if args.dataid == "logos": 
        n_output_classes = 2341

        print('in feats', model.fc.in_features)
        model.fc = torch.nn.Linear(model.fc.in_features, n_output_classes)
        print(model.avgpool)
        model.avgpool = torch.nn.AdaptiveAvgPool2d(1)



    if args.task == "rotation":
        print("Using 4 output classes for rotation")
        n_output_classes = 4
        model.fc = torch.nn.Linear(model.fc.in_features, n_output_classes)

    # freeze all layers but the last fc
    if not args.finetune:
        print("\n\n\nNOT finetuning\n\n\n")
        for name, param in model.named_parameters():
            if name not in ['fc.weight', 'fc.bias']:
                param.requires_grad = False
    else:
        print("\n\n\nFINETUNING ALL PARAMS\n\n\n")        



    # Initialize the weights and biases in the way they did in the paper.
    model.fc.weight.data.normal_(mean=0.0, std=0.01)
    model.fc.bias.data.zero_()

    # hack the mlp into the final layer    
    if args.mlp:
        print('training mlp final layer')
        model.fc = nn.Sequential(nn.Linear(model.fc.in_features, model.fc.in_features), model.fc)
        # model.fc[0].weight.data.normal_(mean=0.0, std=0.01)
        model.fc[0].bias.data.zero_()

    # random checkpointing
    # savefile = os.path.join(args.checkpoint_fp, "resnet50-random-weights.tar")
    # torch.save({
    #     'epoch': 0,
    #     'arch': args.arch,
    #     'state_dict': model.state_dict(),
    # }, savefile)
    # import sys
    # sys.exit(0)

    # load from pre-trained, before DistributedDataParallel constructor
    wandb_resume = args.resume
    name = args.id
    if args.pretrained:
        if os.path.isfile(args.pretrained):
            print("=> loading checkpoint '{}'".format(args.pretrained))
            checkpoint = torch.load(args.pretrained, map_location="cpu")
            
            # rename moco pre-trained keys
            state_dict = checkpoint['state_dict']
            only_encoder = 'encoder' in state_dict
            if only_encoder:
                state_dict = state_dict['encoder']
            if checkpoint.get('id'):
                # sync the ids for wandb
                if args.newid:
                    args.id = checkpoint['id'] + "_{}".format(args.id[:5])
                else:
                    args.id = checkpoint['id']
                name = args.id
                wandb_resume = True
            if checkpoint.get('name'):
                name = checkpoint['name']
            
            for k in list(state_dict.keys()):
                # retain only encoder_q up to before the embedding layer
                if only_encoder:
                    state_dict[k[len("module."):]] = state_dict[k]
                elif k.startswith('module.model.encoder'):
                    # remove prefix
                    state_dict[k[len("module.model.encoder."):]] = state_dict[k]
                elif k.startswith('module.encoder_q'):
                    if k.find("fc") < 0:
                        state_dict[k[len("module.encoder_q."):]] = state_dict[k]
                # delete renamed or unused k
                del state_dict[k]
            args.start_epoch = 0

            msg = model.load_state_dict(state_dict, strict=False)
            

            if args.mlp:
                assert set(msg.missing_keys) == {"fc.0.weight", "fc.0.bias", "fc.1.weight", "fc.1.bias"}
            else:
                assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}

            print("=> loaded pre-trained model '{}'".format(args.pretrained))
        else:
            print("=> no checkpoint found at '{}'".format(args.pretrained))

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    # optimize only the linear classifier
    parameters = list(filter(lambda p: p.requires_grad, model.parameters()))
    if not args.finetune:
        if args.mlp:
            assert len(parameters) == 4  # fc.{0,1}.weight, fc.{0,1}.bias
        else:
            assert len(parameters) == 2  # fc.weight, fc.bias
    optimizer = torch.optim.SGD(parameters, args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code


    # Chanigng this for CIFAR10.

    if args.dataid =="cifar10" or args.dataid=="svhn":
        _CIFAR_MEAN, _CIFAR_STD = (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
        normalize = transforms.Normalize(mean=_CIFAR_MEAN, std=_CIFAR_STD)
    #  Original normalization
    else:
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

    # Readded some data augmentations for training this part.

    if args.dataid == "cifar10" or args.dataid=="svhn":
        crop_size = 28
        orig_size = 32
    else:
        crop_size = 224
        orig_size = 256

    if not args.randomcrop:
        crop_transform = transforms.RandomResizedCrop(crop_size)
    else:
        crop_transform = transforms.RandomCrop(crop_size)


    print(args.data)

    if args.dataid == "cifar10":
        train_dataset = torchvision.datasets.CIFAR10(args.data,
                                                     transform= transforms.Compose([
                                                         crop_transform,
                                                         transforms.RandomHorizontalFlip(),
                                                         transforms.ToTensor(),
                                                         normalize,
                                                     ]), download=False)
        if args.percent < 100:
            train_size = math.floor(50000 * (args.percent / 100.0))
            print("Using {} percent of cifar training data: {} samples".format(args.percent, train_size))
            sss = StratifiedShuffleSplit(n_splits=1, test_size=50000-train_size, random_state=0)
            sss = sss.split(list(range(len(train_dataset))), train_dataset.targets)
            train_idx, valid_idx = next(sss)
            targets = [train_dataset.targets[idx] for idx in train_idx]
            train_dataset = torch.utils.data.Subset(train_dataset, train_idx)
            train_dataset.targets = targets

    elif args.dataid == "svhn": 
        train_dataset = torchvision.datasets.SVHN(args.data,
                                                     transform= transforms.Compose([
                                                         crop_transform,
                                                         transforms.RandomHorizontalFlip(),
                                                         transforms.ToTensor(),
                                                         normalize,
                                                     ]), download=False)

        if args.percent < 100:
            raise Exception("Percent setting not yet implemented for svhn")

    elif args.dataid == 'imagenet' and not args.reduced_imgnet:
        train_dataset = datasets.ImageFolder(
            os.path.join(args.data, "train"),
            transforms.Compose([
                crop_transform,
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
        ]))
        if args.percent < 100:
            raise Exception("Percent setting not yet implemented for imagenet")




    elif args.dataid == 'logos' and not args.reduced_imgnet: 
        train_dataset = data_loader.GetLoader(data_root=args.data + '/train/',
                                data_list='train_images_root.txt',
                                transform=transforms.Compose([
                                crop_transform,
                                # transforms.RandomHorizontalFlip(),
                                transforms.ToTensor(),
                                normalize,
                        ]))



    elif (args.dataid == "imagenet" or args.dataid =="logos") and args.reduced_imgnet: 

        import numpy as np
        idx120 = [16, 23, 52, 57, 76, 93, 95, 96, 99, 121, 122, 128, 148, 172, 181, 189, 202, 210, 232, 238, 257, 258, 259, 277, 283, 289, 295, 304, 307, 318, 322, 331, 337, 338, 345, 350, 361, 375, 376, 381, 388, 399, 401, 408, 424, 431, 432, 440, 447, 462, 464, 472, 483, 497, 506, 512, 530, 541, 553, 554, 557, 564, 570, 584, 612, 614, 619, 626, 631, 632, 650, 657, 658, 660, 674, 675, 680, 682, 691, 695, 699, 711, 734, 736, 741, 754, 757, 764, 769, 770, 780, 781, 787, 797, 799, 811, 822, 829, 830, 835, 837, 842, 843, 845, 873, 883, 897, 900, 902, 905, 913, 920, 925, 937, 938, 940, 941, 944, 949, 959]
        
        if args.dataid == "imagenet":
            total_trainset = ImageNet(root=args.data, transform=transforms.Compose([
                    crop_transform,
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    normalize,
            ])) 

            total_valset = ImageNet(root=args.data, transform=transforms.Compose([
                transforms.Resize(orig_size),
                transforms.CenterCrop(crop_size),
                transforms.ToTensor(),
                normalize,
            ])) 

        else:

            total_trainset = data_loader.GetLoader(data_root=args.data,
                    data_list='train_images_root.txt',
                    transform=transforms.Compose([
                    crop_transform,
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    normalize,
            ]))

            total_valset = data_loader.GetLoader(data_root=args.data, 
                data_list='train_images_root.txt', transform=transforms.Compose([
                transforms.Resize(orig_size),
                transforms.CenterCrop(crop_size),
                transforms.ToTensor(),
                normalize,
            ]))

        train_idx = np.arange(len(total_trainset))

        np.random.seed(1337) #fingers crossed. 
        np.random.shuffle(train_idx)
        train_idx = train_idx[:50000]

        kfold = args.kfold

        print('KFOLD BEING USED', kfold)
        subset = np.arange(kfold*10000, (kfold+1)*10000)
        print('start', 'end', kfold*10000, (kfold+1)*10000)
        valid_idx = train_idx[subset]
        train_idx = np.delete(train_idx, subset)

        print('first val_idx', valid_idx[:10])
        print('firstidx', valid_idx[:10])

        train_dataset = total_trainset
        valid_dataset = total_valset

        train_dataset = Subset(train_dataset, train_idx)
        valid_dataset = Subset(valid_dataset, valid_idx)

        val_dataset = valid_dataset

        # train_sampler = SubsetRandomSampler(train_idx)
        # valid_sampler = SubsetSampler(valid_idx)

        print('len train', len(train_dataset))

        print('len valid', len(val_dataset))
        # train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
        print(len(train_dataset))


        print('first 10 train', train_idx[:10])
        print('first 10 valid', valid_idx[:10])
        print('len train', len(train_idx))
        print('len valid', len(valid_idx))
        print('first val_idx', valid_idx[:10])


    val_transform = transforms.Compose([
            transforms.Resize(orig_size),
            transforms.CenterCrop(crop_size),
            transforms.ToTensor(),
            normalize,
            ])

    if args.kfold == None:
        if args.dataid == "cifar10":
            val_dataset = torchvision.datasets.CIFAR10(args.data, transform=val_transform,
                                                       download=True, train=False)
        elif args.dataid == "svhn": 
            val_dataset = torchvision.datasets.SVHN(args.data, transform=val_transform,
                                                       download=True, split='test')
        else:
            if not args.reduced_imgnet and args.dataid == 'imagenet':
                valdir = os.path.join(args.data, 'val')
                print('loaded full validation set')
                val_dataset = datasets.ImageFolder(valdir, transforms.Compose([
                    transforms.Resize(256),
                    transforms.CenterCrop(224),
                    transforms.ToTensor(),
                    normalize,
                ]))

            if args.dataid == 'logos': 

                print('using logos VAL')
                val_dataset = data_loader.GetLoader(data_root=args.data + '/test/', 
                data_list='test_images_root.txt', transform=transforms.Compose([
                transforms.Resize(orig_size),
                transforms.CenterCrop(crop_size),
                transforms.ToTensor(),
                normalize]))


    else: 
        # use the held out train data as the validation data. 
        if args.dataid == "cifar10": 
            val_dataset = torchvision.datasets.CIFAR10(args.data,
                transform= val_transform, download=True)
        elif args.dataid == "svhn": 
            val_dataset = torchvision.datasets.SVHN(args.data,
            transform= val_transform, download=True)

    if args.dataid == 'logos':
        assert val_dataset.label_dict == train_dataset.label_dict

    if not args.kfold == None and not args.reduced_imgnet: 
        torch.manual_seed(1337)
        print('before: K FOLD', args.kfold, len(train_dataset))
        lengths = [len(train_dataset)//5]*5
        import numpy as np
        lengths[-1] = int(lengths[-1] + (len(train_dataset)-np.sum(lengths)))
        print(lengths)
        folds = torch.utils.data.random_split(train_dataset, lengths)
        folds.pop(args.kfold)
        train_dataset = torch.utils.data.ConcatDataset(folds)

        # Get the validation split
        print('pre split val', val_dataset)
        torch.manual_seed(1337)
        lengths = [len(val_dataset)//5]*5
        lengths[-1] = int(lengths[-1] + (len(val_dataset)-np.sum(lengths)))
        print(lengths)
        folds = torch.utils.data.random_split(val_dataset, lengths)
        val_dataset = folds[args.kfold]
        print('len val', len(val_dataset))

    else: 
        print("NO KFOLD ARG", args.kfold, 'or ur using reduced imgnet', args.reduced_imgnet)


    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

    print('train sampler', train_sampler)

    # CR: only the master will report to wandb for now
    is_main_node = not args.multiprocessing_distributed or args.gpu == 0
    if is_main_node:
        # use lcls prefix so we don't overwrite the training args
        wandb_args = {"lcls_{}".format(key): val for key, val in args.__dict__.items()}
        wandb.init(project=args.wandbproj,
                   name=name,
                   id=args.id, resume=wandb_resume,
                   config=wandb_args, job_type='linclass')

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    # if not args.reduced_imgnet:

    print('length of val dataset', len(val_dataset))
    val_loader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)
    # else: # we add the sampler
    #     val_loader = torch.utils.data.DataLoader(
    #         val_dataset,
    #         batch_size=args.batch_size, shuffle=False,
    #         num_workers=args.workers, pin_memory=True, sampler=valid_sampler)

    if args.evaluate:
        validate(val_loader, model, criterion, args, is_main_node)
        return
    print("Doing task: {}".format(args.task))
    for epoch in range(args.start_epoch, args.epochs):

        print(epoch)
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args, is_main_node, args.id[:5])

        # evaluate on validation set
        if epoch % args.evaluate_interval == 0 or epoch >= (args.epochs-1):
            acc1, acc5 = validate(val_loader, model, criterion, args)
            if is_main_node:
                val_str = "val-{}"
                if args.mlp:
                    val_str = "val-mlp-{}"
                if args.loss_prefix:
                    val_str = args.loss_prefix + "-" + val_str
                acc1str = val_str.format(args.task)
                acc5str = val_str.format(args.task) + "-top5"
                wandb.log({acc1str: acc1, acc5str: acc5})

        # remember best acc@1 and save checkpoint
        if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.gpu == 0):
            is_best = acc1 > best_acc1
            best_acc1 = max(acc1, best_acc1)
            if is_best:
                if args.task == "rotation": 
                    savefile = os.path.join(args.checkpoint_fp, "{}_lincls_best_rotation.tar".format(args.id[:5]))
                else: 
                    savefile = os.path.join(args.checkpoint_fp, "{}_lincls_best.tar".format(args.id[:5]))
                torch.save({
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer' : optimizer.state_dict(),
                }, savefile)
                wandb.save(savefile)

            # save the current epoch
            if args.task == "rotation": 
                savefile = os.path.join(args.checkpoint_fp, "{}_lincls_rotation_current.tar".format(args.id[:5]))
            else: 
                savefile = os.path.join(args.checkpoint_fp, "{}_lincls_current.tar".format(args.id[:5]))
            torch.save({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_acc1': best_acc1,
                'optimizer' : optimizer.state_dict(),
            }, savefile)
Esempio n. 19
0
#!/usr/bin/env python

from nltk.corpus import wordnet as wn

from imagenet import ImageNet

CLASSES = '../../Classes/classes.txt'
OUTFILE = 'definitions.txt'

inet = ImageNet(CLASSES)

with open(OUTFILE, 'w') as f:
    for i in range(0, 1000):
        w = inet.name(i)
        w_ = w.replace(' ', '_')
        ss = wn.synsets(w_)
        print w
        if not ss:
            print "ERR: %d %s not found" % (i, w)
        for s in ss:
            f.write(w_ + " " + s.definition() + "\n")
Esempio n. 20
0
def main() -> None:

    # Horovod Init
    hvd.init()
    size = hvd.size()

    # Config GPUs
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

    if gpus:
        tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()],
                                                   'GPU')

    # get optimizer & loss function
    loss_function = tf.keras.losses.SparseCategoricalCrossentropy()
    opt = tf.keras.optimizers.Adam(lr=Config.LEARNING_RATE * size)

    # Data
    imagenet = ImageNet(take=20)
    train_ds, val_ds = imagenet.train_ds, imagenet.val_ds
    n_train_batches = train_ds.cardinality().numpy()
    n_val_batches = val_ds.cardinality().numpy()

    # Callbacks
    callbacks = []
    callbacks.append(hvdK.callbacks.BroadcastGlobalVariablesCallback(0))
    callbacks.append(hvdK.callbacks.MetricAverageCallback())
    callbacks.append(
        hvdK.callbacks.LearningRateWarmupCallback(
            warmup_epochs=5, initial_lr=Config.LEARNING_RATE))
    callbacks.append(
        tf.keras.callbacks.ReduceLROnPlateau(patience=10, verbose=1))

    if hvd.rank() == 0:

        ckpt_dir = Config.SAVED_WEIGHTS_DIR + "/" + Config.RUN_NAME
        if not os.path.exists(ckpt_dir):
            os.makedirs(ckpt_dir)

        ckpt = tf.keras.callbacks.ModelCheckpoint(filepath=ckpt_dir+ \
                                                    "/epoch-{epoch:02d}-loss={val_loss:.2f}.h5",
                                                    monitor='val_loss', save_best_only=True, mode='min')

        log_dir = Config.LOG_DIR + "/" + Config.RUN_NAME
        tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir)

        callbacks.append(ckpt)
        callbacks.append(tensorboard)
        callbacks.append(tfa.callbacks.TQDMProgressBar())

    # Model
    model = ResNet50()
    model.loss_function = loss_function
    model.train_step = types.MethodType(distributed_train_step, model)
    model.compile(optimizer=opt, loss=loss_function)

    # Train
    model.fit(train_ds,
              steps_per_epoch=n_train_batches // size,
              validation_data=val_ds,
              validation_steps=n_val_batches // size,
              epochs=Config.EPOCHS,
              verbose=0,
              callbacks=callbacks)