#train_imgs.mean(), train_imgs.std()

# In[17]:

#group_kfold = GroupShuffleSplit(n_splits=5, random_state = 4321)
group_kfold = GroupKFold(n_splits=5)

# In[18]:

data_transforms = {
    'train':
    transforms.Compose([
        transforms.Resize(224),
        #transforms.Grayscale(3),
        transforms.RandomAffine(degrees=45, scale=(0.9, 1.1)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val':
    transforms.Compose([
        transforms.Resize(224),
        #transforms.Grayscale(3),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# In[19]:
    def __init__(self,
                 which_set='Cifar-10',
                 root=None,
                 train=True,
                 download=True,
                 return_idxs=False,
                 num_classes=10,
                 aug=('random_order', 'random_h_flip', 'random_crop', 'random_rot_10', 'random_scale_0.9_1.1', 'random_shear_5', 'cutout'),
                 cut_n_holes=1,
                 cut_length=16,
                 dataset_norm_type='standardize'
                 ):

        image_length = 28 if 'MNIST' in which_set else 32
        self.norm_means, self.norm_stds = (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
        if dataset_norm_type == 'zeroone':
            self.norm_means, self.norm_stds = (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)
        if 'MNIST' in which_set:
            self.norm_means, self.norm_stds = (0.1307,), (0.3081,)
            if dataset_norm_type == 'zeroone':
                self.norm_means, self.norm_stds = (0.5,), (0.5,)
        normalizer = transforms.Normalize(self.norm_means, self.norm_stds)

        transforms_list = []
        for augment in aug:
            # First do the things that don't change where the image is in the box
            if augment == 'random_h_flip':
                transforms_list.append(transforms.RandomHorizontalFlip())
            if augment == 'random_v_flip':
                transforms_list.append(transforms.RandomVerticalFlip())
            # Then mess with brightness etc.
            if augment == 'color_jitter':
                transforms_list.append(transforms.ColorJitter(brightness=0.3, contrast=0.3, hue=0.0))
            # Now do some sheering/cropping/rotation that changes where the images is
            if augment == 'affine':
                rot_degrees = 0
                scale_low = None
                scale_high = None
                shear_degrees = None
                for augment_inner in aug:
                    if 'random_rot' in augment_inner:
                        rot_degrees = int(augment_inner.split('_')[-1])
                    if 'random_scale' in augment_inner:
                        scale_low = float(augment_inner.split('_')[-2])
                        scale_high = float(augment_inner.split('_')[-1])
                    if 'random_shear' in augment_inner:
                        shear_degrees = int(augment_inner.split('_')[-1])

                transforms_list.append(transforms.RandomAffine(degrees=rot_degrees,
                                                               scale=None if (scale_low is None) or (scale_high is None) else (scale_low, scale_high),
                                                               shear=shear_degrees))
            if augment == 'random_crop':
                transforms_list.append(transforms.RandomCrop(size=[image_length, image_length], padding=4))

        transform = transforms.Compose(transforms_list) if 'random_order' not in aug else \
            transforms.Compose([transforms. RandomOrder(transforms=transforms_list)])

        transform.transforms.append(transforms.ToTensor())
        transform.transforms.append(normalizer)

        for augment in aug:
            # Finally do things that are related to regularisation
            if augment == 'cutout':
                transform.transforms.append(Cutout(n_holes=cut_n_holes, length=cut_length))

        if which_set == 'MNIST':
            self.dataset = datasets.MNIST(root='../data/MNIST' if root is None else root,
                                          train=train,
                                          download=download,
                                          transform=transform)

        elif which_set == 'Fashion-MNIST':
            self.dataset = datasets.FashionMNIST(root='../data/Fashion-MNIST' if root is None else root,
                                                 train=train,
                                                 download=download,
                                                 transform=transform)

        elif which_set == 'Cifar-100':
            self.dataset = datasets.CIFAR100(root='../data/Cifar-100' if root is None else root,
                                             train=train, download=download,
                                             transform=transform)
        elif 'Cinic-10' in which_set:
            root_to_cinic = '../data/Cinic-10' if root is None else root
            if download:
                download_cinic(root_to_cinic.replace('-enlarged',''))
                if '-enlarged' in which_set:
                    enlarge_cinic_10(root_to_cinic.replace('-enlarged',''))
            self.dataset = datasets.ImageFolder(root=('../data/Cinic-10' if root is None else root) + ('/train' if train else '/test'),
                                                transform=transform)
        else:
            self.dataset = datasets.CIFAR10(root='../data/Cifar-10' if root is None else root,
                                            train=train, download=download,
                                            transform=transform)

        self.return_idxs = return_idxs
        self.num_classes = num_classes
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
예제 #3
0
    normalize = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    augcolor = [
        transforms.ColorJitter(brightness=0.5,
                               contrast=0.5,
                               saturation=0.5,
                               hue=0.5)
    ]
    augaffine = [
        transforms.RandomAffine(20,
                                scale=(0.9, 1.1),
                                shear=20,
                                resample=PIL.Image.BICUBIC,
                                fillcolor=(100, 100, 100))
    ]
    augtrans = transforms.Compose([
        transforms.RandomApply(augcolor, p=0.8),
        transforms.RandomApply(augaffine, p=0.8),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    bulk = ResNet_Bulk()
    head = ResNet_Head()

    curltrainer = curl.CURL(datasets.STL10,
예제 #4
0
def get_datasets(data_dir, cfg, mode="train"):

    common_transforms = []
    train_transforms = []
    test_transforms = []
    #if cfg.transform.transform_resize_match:
    common_transforms.append(transforms.Resize((cfg.transform.transform_resize,cfg.transform.transform_resize)))
    
    if cfg.transform.transform_random_resized_crop:
        train_transforms.append(transforms.RandomResizedCrop(cfg.transform.transform_resize))
    if cfg.transform.transform_random_horizontal_flip:
        train_transforms.append(torchvision.transforms.RandomHorizontalFlip(p=0.5))
    if cfg.transform.transform_random_rotation:
        train_transforms.append(transforms.RandomRotation(cfg.transform.transform_random_rotation_degrees))#, fill=255))
    if cfg.transform.transform_random_shear:
        train_transforms.append(torchvision.transforms.RandomAffine(0,
                                                                    shear=(
                                                                        cfg.transform.transform_random_shear_x1,
                                                                        cfg.transform.transform_random_shear_x2,
                                                                        cfg.transform.transform_random_shear_y1,
                                                                        cfg.transform.transform_random_shear_y2
                                                                        ),
                                                                    fillcolor=255)) 
    if cfg.transform.transform_random_perspective:
        train_transforms.append(transforms.RandomPerspective(distortion_scale=cfg.transform.transform_perspective_scale, 
                                     p=0.5, 
                                     interpolation=3)
                                )
    if cfg.transform.transform_random_affine:
        train_transforms.append(transforms.RandomAffine(degrees=(cfg.transform.transform_degrees_min,
                                                                 cfg.transform.transform_degrees_max),
                                                        translate=(cfg.transform.transform_translate_a,
                                                                   cfg.transform.transform_translate_b),
                                                        fillcolor=255))
    data_transforms = {
            'train': transforms.Compose(common_transforms+train_transforms+[transforms.ToTensor()]),
            'test': transforms.Compose(common_transforms+[transforms.ToTensor()]),
            }

    train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"),
            data_transforms["train"])





    # for the final model we can join train, validation, validation samples datasets
    print(mode)
    if mode == "final_train":
        #train_dataset = torch.utils.data.ConcatDataset([train_dataset,
        #        val_dataset,
        #        val_samples_dataset])

        test_dataset = datasets.ImageFolder(os.path.join(data_dir, "test"),
                data_transforms["test"])

        samples_dataset = datasets.ImageFolder(os.path.join(data_dir, "samples"),
                data_transforms["test"])
        return train_dataset, test_dataset, samples_dataset
    else:
        if mode == "train":
            val_dataset = datasets.ImageFolder(os.path.join(data_dir, "val"),
                    data_transforms["test"])

            val_samples_dataset = datasets.ImageFolder(os.path.join(data_dir, "val_samples"),
                    data_transforms["test"])
            return train_dataset, val_dataset, val_samples_dataset

        if mode == "test":
            return train_dataset, test_dataset, samples_dataset
예제 #5
0
    out_dir = '../result'
    data_dir = '../data'
    test_interval = 1
    resume_interval = 1
    unit_num = 1000

    use_cuda = not no_cuda and torch.cuda.is_available()
    torch.manual_seed(seed)
    # device = torch.device('cuda:{}'.format(gpu_id) if use_cuda else 'cpu')
    device = torch.device('cuda:{}'.format(gpu_id))
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    ## データの水増しと正規化
    transform = transforms.Compose([
        transforms.RandomRotation(20),
        transforms.RandomAffine(degrees=0, translate=(0.2, 0.2)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5, ), (0.5, ))
    ])

    print('start to load train dataset')
    trainset = datasets.CIFAR10(root=data_dir,
                                train=True,
                                download=True,
                                transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              **kwargs)
예제 #6
0
print('Resize 256', file=open(filename, "a"))
print('randomcrop 224', file=open(filename, "a"))
print('batchsize - 50', file=open(filename, "a"))
print('transforms.RandomHorizontalFlip()', file=open(filename, "a"))
print('transforms.RandomRotation(10)', file=open(filename, "a"))
print('transforms.RandomAffine(0,shear=10,scale=(0.8,1.6)),',
      file=open(filename, "a"))
print('transforms.ColorJitter(brightness=0.2,contrast=0.2,saturation=0.2),',
      file=open(filename, "a"))

transform_train = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(0, shear=10, scale=(0.8, 1.6)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])

training_dataset = datasets.ImageFolder(root=PATHTrain,
                                        transform=transform_train)
validation_dataset = datasets.ImageFolder(root=PATHVal, transform=transform)
training_loader = torch.utils.data.DataLoader(dataset=training_dataset,
예제 #7
0
import torchvision.transforms as transforms

data_transforms = transforms.Compose([
    transforms.Resize((96, 96)),
    transforms.ColorJitter(0.8, contrast=0.3),
    transforms.RandomAffine(10, scale=(0.8, 1.2), translate=(0.2, 0.2)),
    transforms.RandomHorizontalFlip(),  #flip transform
    transforms.ToTensor(),
    transforms.Normalize((0.3337, 0.3064, 0.3171), (0.2672, 0.2564, 0.2629))
])

validation_data_transforms = transforms.Compose([
    transforms.Resize((96, 96)),
    transforms.ToTensor(),
    transforms.Normalize((0.3337, 0.3064, 0.3171), (0.2672, 0.2564, 0.2629))
])
예제 #8
0
                 dc_num_capsules, dc_num_routes, dc_in_channels, dc_out_channels, dc_routing_iters,
                 dec_caps_size, dec_num_caps, dec_img_size, dec_img_channels).cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer,0.95)
# loss and optimizer
# nn.CrossEntropyLoss() computes softmax internally
# criterion = nn.CrossEntropyLoss()
# optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

optimizer = torch.optim.Adam(model.parameters())
f = open('./checkpoint.txt','w')
f.close()

train_loader = torch.utils.data.DataLoader(datasets.MNIST(root='./MNIST',train=True,download=True,transform=transforms.Compose([
    transforms.RandomAffine(degrees=0,translate=(0.075,0.075)),
    transforms.ToTensor()])),batch_size=batch_size,shuffle=True)
test_loader = torch.utils.data.DataLoader(datasets.MNIST(root='./MNIST',train=False,download=True,transform=transforms.ToTensor()),batch_size=batch_size,shuffle=True)

def test(model, test_loader, batch_size):
    test_loss = 0.0
    correct = 0.0
    for batch_idx, (data, labels) in enumerate(test_loader):
        data, labels = data.cuda(), one_hot(labels.cuda())
        output, masked_output, recnstrcted = model(data)
        #loss = model.loss(outputs, recnstrcted, data, labels)
        #test_loss += loss.data
        masked_cpu, labels_cpu = masked_output.cpu(), labels.cpu()
#     print(masked_output.shape)
#     print("\n",sum(np.argmax(masked_cpu.data.numpy(), 1) == np.argmax(labels_cpu.data.numpy(), 1)).shape)
        correct += sum(np.argmax(masked_cpu.data.numpy(), 1) == np.argmax(labels_cpu.data.numpy(), 1))
예제 #9
0
def loadDataset(dataset, batch_size, train, transform=True, val=False):
    oargs = {}
    if dataset in ["MNIST", "CIFAR10", "CIFAR100", "FashionMNIST", "PhotoTour"]:
        oargs['train'] = train
    elif dataset in ["STL10", "SVHN"]:
        oargs['split'] = 'train' if train else 'test'
    elif dataset in ["LSUN"]:
        oargs['classes'] = 'train' if train else 'test'
    elif dataset in ["Imagenet12"]:
        pass
    elif dataset in ["AG"]:
        pass
    else:
        raise Exception(dataset + " is not yet supported")

    if dataset in ["MNIST"]:
        transformer = transforms.Compose([transforms.ToTensor()]
                                         + ([transforms.Normalize((0.1307,), (0.3081,))] if transform else []))
    elif dataset in ["CIFAR10", "CIFAR100"]:
        transformer = transforms.Compose(([  # transforms.RandomCrop(32, padding=4),
                                              transforms.RandomAffine(0, (0.125, 0.125), resample=PIL.Image.BICUBIC),
                                              transforms.RandomHorizontalFlip(),
                                              # transforms.RandomRotation(15, resample = PIL.Image.BILINEAR)
                                          ] if train else [])
                                         + [transforms.ToTensor()]
                                         + ([transforms.Normalize((0.4914, 0.4822, 0.4465),
                                                                  (0.2023, 0.1994, 0.2010))] if transform else []))
    elif dataset in ["SVHN"]:
        transformer = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.2, 0.2, 0.2))])
    else:
        transformer = transforms.ToTensor()

    if dataset in ["Imagenet12"]:
        # https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md#download-the-imagenet-dataset
        train_set = datasets.ImageFolder(
            '../data/Imagenet12/train' if train else '../data/Imagenet12/val',
            transforms.Compose([
                transforms.RandomResizedCrop(224),
                transforms.RandomHorizontalFlip(),
                normalize,
            ]))
    elif dataset in ["AG", "SST"]:
        X = np.load("./dataset/%s/X_%s.npy" % (dataset, 'train' if train else 'test'))
        y = np.load("./dataset/%s/y_%s.npy" % (dataset, 'train' if train else 'test'))
        if val:
            X = X[-1000:]
            y = y[-1000:]
        elif train:
            X = X[:-1000]
            y = y[:-1000]

        x = torch.from_numpy(X)
        train_set = torch.utils.data.TensorDataset(x, torch.from_numpy(y))
    else:
        train_set = getattr(datasets, dataset)('../data', download=True, transform=transformer, **oargs)
    return torch.utils.data.DataLoader(
        train_set
        , batch_size=batch_size
        , shuffle=True,
        **({'num_workers': 1, 'pin_memory': True} if use_cuda else {}))
예제 #10
0
    def __init__(self,
                 svhn_path,
                 curlfrac=0.5,
                 supfrac=0.5,
                 k=1,
                 shuffle=True,
                 augment=False,
                 use_cuda=False,
                 dload_dataset=False):
        self.k = k
        self.softplus = nn.Softplus()
        self.bulk = Net_Bulk()
        self.head = Net_Head()
        normalize = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        augcolor = [
            transforms.ColorJitter(brightness=0.5,
                                   contrast=0.5,
                                   saturation=0.5,
                                   hue=0.5)
        ]
        augaffine = [
            transforms.RandomAffine(20,
                                    scale=(0.9, 1.1),
                                    shear=20,
                                    resample=PIL.Image.BICUBIC,
                                    fillcolor=(100, 100, 100))
        ]
        augtrans = transforms.Compose([
            transforms.RandomApply(augcolor, p=0.8),
            transforms.RandomApply(augaffine, p=0.8),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        contrasttrans = transforms.Compose([
            transforms.ColorJitter(brightness=0.5,
                                   contrast=0.5,
                                   saturation=0.5,
                                   hue=0.5),
            transforms.RandomAffine(20,
                                    scale=(0.9, 1.1),
                                    shear=20,
                                    resample=PIL.Image.BICUBIC,
                                    fillcolor=(100, 100, 100)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        if augment:
            transform = augtrans
        else:
            transform = normalize
        self.suptrainset = datasets.SVHN(svhn_path,
                                         split='train',
                                         transform=transform,
                                         target_transform=None,
                                         download=dload_dataset)
        self.testset = datasets.SVHN(svhn_path,
                                     split='test',
                                     transform=normalize,
                                     target_transform=None,
                                     download=dload_dataset)

        if curlfrac + supfrac > 1.0:
            print("CURL fraction plus SUP fraction cannot exceed 1")
            print("Setting to defaults")
            curlfrac, supfrac = 0.5, 0.5
        trainset_size = len(self.suptrainset)
        indices = list(range(trainset_size))
        end = int(np.floor((curlfrac + supfrac) * trainset_size))
        curlend = int(np.floor(curlfrac / (supfrac + curlfrac) * end))
        if shuffle:
            np.random.shuffle(indices)
        curltrain_indices = indices[:curlend]
        suptrain_indices = indices[curlend:end]
        print(f"Number of labeled images: {len(suptrain_indices)}")
        print(f"Number of unlabeled images: {len(curltrain_indices)}")
        self.suptrain_sampler = SubsetRandomSampler(suptrain_indices)
        self.curltrain_sampler = SubsetRandomSampler(curltrain_indices)

        #self.curltrainset = ContrastedData(svhn_path, split='train', accepted_indices=curltrain_indices, contrast_transform=contrasttrans, k=k, transform=transform, download=dload_dataset)
        self.curltrainset = ApproxContrastedData(
            svhn_path,
            split='train',
            accepted_indices=curltrain_indices,
            contrast_transform=contrasttrans,
            k=k,
            transform=transform,
            download=dload_dataset)

        if use_cuda:
            if torch.cuda.is_available():
                self.device = torch.device('cuda')
            else:
                print("CUDA not available")
                self.device = torch.device('cpu')
        else:
            self.device = torch.device('cpu')
        self.bulk.to(self.device)
        self.head.to(self.device)
예제 #11
0
    def __init__(self,
                 svhn_path,
                 frac=0.5,
                 shuffle=True,
                 augment=True,
                 use_cuda=False,
                 dload_dataset=False):
        """
        frac : float
            fraction of dataset to use for training
        """

        self.net = Net_Full()
        normalize = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        augcolor = [
            transforms.ColorJitter(brightness=0.5,
                                   contrast=0.5,
                                   saturation=0.5,
                                   hue=0.5)
        ]
        augaffine = [
            transforms.RandomAffine(20,
                                    scale=(0.9, 1.1),
                                    shear=20,
                                    resample=PIL.Image.BICUBIC,
                                    fillcolor=(100, 100, 100))
        ]
        augtrans = transforms.Compose([
            transforms.RandomApply(augcolor, p=0.8),
            transforms.RandomApply(augaffine, p=0.8),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        if augment:
            transform = normalize
        else:
            transform = augtrans
        trainset = datasets.SVHN(svhn_path,
                                 split='train',
                                 transform=transform,
                                 target_transform=None,
                                 download=dload_dataset)
        self.trainset = trainset

        testset = datasets.SVHN(svhn_path,
                                split='test',
                                transform=normalize,
                                target_transform=None,
                                download=dload_dataset)
        self.testset = testset

        trainset_size = len(self.trainset)
        indices = list(range(trainset_size))
        end = int(np.floor(frac * trainset_size))
        if shuffle:
            np.random.shuffle(indices)
        train_indices = indices[:end]
        self.train_sampler = SubsetRandomSampler(train_indices)

        if use_cuda:
            if torch.cuda.is_available():
                self.device = torch.device('cuda')
            else:
                print("CUDA not available")
                self.device = torch.device('cpu')
        else:
            self.device = torch.device('cpu')
        self.net.to(self.device)
예제 #12
0
    def __init__(self, root, add_labeled=0, advanced_transforms=True, remove_classes=False,
                 expand_labeled=0, expand_unlabeled=0, unlabeled_subset_ratio=1, oversampling=True, stratified=False,
                 merged=True, unlabeled_augmentations=False, seed=9999, k_medoids=False, k_medoids_model=None,
                 k_medoids_n_clusters=10, start_labeled=300):
        self.root = root
        self.train_path = os.path.join(self.root, "isic", "train")
        self.test_path = os.path.join(self.root, "isic", "test")
        self.isic_mean = (0.6679, 0.5297, 0.5246)
        self.isic_std = (0.1338, 0.1470, 0.1577)
        self.input_size = 128
        self.crop_size = 128
        self.expand_labeled = expand_labeled
        self.expand_unlabeled = expand_unlabeled
        self.oversampling = oversampling
        self.stratified = stratified
        self.merged = merged
        self.merge_classes = []

        if advanced_transforms:
            self.transform_train = transforms.Compose([
                transforms.RandomCrop(self.crop_size),
                transforms.RandomAffine(degrees=90, translate=(0.2, 0.2)),
                transforms.Resize(size=self.input_size),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor(),
                transforms.RandomErasing(scale=(0.02, 0.2), ratio=(0.3, 0.9)),
            ])
            self.transform_test = transforms.Compose([
                transforms.Resize(size=self.input_size),
                transforms.ToTensor(),
            ])

        else:
            self.transform_train = transforms.Compose([
                transforms.Resize(size=self.input_size),
                transforms.ToTensor(),
            ])
            self.transform_test = transforms.Compose([
                transforms.Resize(size=self.input_size),
                transforms.ToTensor(),
            ])
        self.transform_autoencoder = transforms.Compose([
            transforms.RandomCrop(self.crop_size),
            transforms.RandomAffine(degrees=90, translate=(0.2, 0.2)),
            transforms.Resize(size=self.input_size),
            transforms.RandomHorizontalFlip(),
            transforms.RandomVerticalFlip(),
            transforms.ToTensor(),
            transforms.RandomErasing(scale=(0.02, 0.2), ratio=(0.3, 0.9)),
        ])
        self.transform_simclr = TransformsSimCLR(size=self.input_size)
        self.transform_fixmatch = TransformFix(crop_size=self.crop_size, input_size=self.input_size)
        self.merged_classes = 0 if self.merged else 0
        self.num_classes = 8 - self.merged_classes
        self.add_labeled = add_labeled
        self.unlabeled_subset_ratio = unlabeled_subset_ratio
        self.unlabeled_subset_num = None
        self.remove_classes = remove_classes
        self.unlabeled_augmentations = unlabeled_augmentations
        self.labeled_class_samples = None
        self.classes_to_remove = [2, 3, 4, 5, 6, 7]
        self.seed = seed
        self.labeled_amount = self.num_classes
        self.k_medoids = k_medoids
        self.k_medoids_model = k_medoids_model
        self.k_medoids_n_clusters = k_medoids_n_clusters
        self.start_labeled = start_labeled
예제 #13
0
def _main_(device=""):
    print("Import Complete")

    n_epochs = 16
    batch_size_train = 4
    batch_size_test = 4
    learning_rate = 0.01
    momentum = 0.5

    log_interval = 5

    torch.backends.cudnn.enabled = True
    # Set fixed random number seed
    #random_seed = 42
    #torch.manual_seed(random_seed)
    #print("Seeded Torch")

    data_transforms = {
        'train':
        transforms.Compose([
            transforms.RandomRotation(15),
            transforms.RandomAffine(15)
            #,transforms.Resize(224)
            ,
            transforms.CenterCrop(64),
            transforms.ColorJitter(0.1, 0.1, 0.1),
            transforms.Grayscale(),
            transforms.ToTensor()
            #,transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ]),
        'test':
        transforms.Compose([
            #transforms.Resize(224)
            transforms.CenterCrop(64),
            transforms.Grayscale(),
            transforms.ToTensor()
            #,transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    }

    location = r"Images"

    image_datasets = {
        x: ImageFolder(os.path.join(location, x), data_transforms[x])
        for x in ['train', 'test']
    }

    train_loader = torch.utils.data.DataLoader(image_datasets['train'],
                                               batch_size=batch_size_train,
                                               shuffle=True,
                                               num_workers=1)

    test_loader = torch.utils.data.DataLoader(image_datasets['test'],
                                              batch_size=batch_size_test,
                                              shuffle=True,
                                              num_workers=1)

    class_names = image_datasets['train'].classes

    print("Loaded Dataset")

    if device == "":
        # Work out if we can use the GPU
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print(f"Running on {device}")

    ## Transfer Learning Test
    import torchvision.models as models

    ## Other Models
    #model = models.resnet18(pretrained = True)
    #model = models.googlenet(pretrained= True)

    #num_features = model.fc.in_features
    #model.fc = nn.Linear(num_features, 6)

    # Original Model
    model = MLP()
    model.to(device)
    model.cuda()
    # Define the loss function and optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=learning_rate,
                                momentum=momentum)

    # Data
    train_losses = []
    train_counter = []
    test_losses = []

    train_losses_once = []
    test_losses_once = []

    def train(epoch, device, model):
        model.train()  # Set model to training mode

        len_dataset = len(train_loader.dataset)
        # Iterate over the DataLoader for training data
        for batch_no, data in enumerate(train_loader, 0):

            # Get inputs
            inputs, targets = data
            # Send to the right place
            inputs, targets = inputs.to(device), targets.to(device)

            # Zero the gradients
            optimizer.zero_grad()

            # Perform forward pass
            outputs = model(inputs)

            # Compute loss
            #loss = criterion(outputs, targets)
            loss = F.nll_loss(outputs, targets)

            # Perform backward pass
            loss.backward()

            # Perform optimization
            optimizer.step()

            # Print statistics
            #current_loss += loss.item()
            if batch_no % log_interval == 0:
                print(f"Epoch: {epoch}, Batch: {batch_no}")
                #print('Loss after mini-batch %5d: %.3f' %
                #      (i + 1, current_loss / 500))
                train_losses.append(loss.item())
                train_counter.append((batch_no * batch_size_train) +
                                     ((epoch - 1) * len_dataset))
                torch.save(model.state_dict(), r'.\results\model.pth')
                torch.save(optimizer.state_dict(), r'.\results\optimizer.pth')

    def test(device, model):
        model.eval()  # Set model to training mode

        test_loss = 0
        correct = 0

        len_dataset = len(test_loader.dataset)
        with torch.no_grad():
            for inputs, targets in test_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                output = model(inputs)
                test_loss += F.nll_loss(output, targets,
                                        size_average=False).item()
                prediction = output.data.max(1, keepdim=True)[1]
                correct += prediction.eq(
                    targets.data.view_as(prediction)).sum()
        test_loss /= len_dataset
        test_losses.append(test_loss)
        print(
            f'\nTest set: Avg. loss: {test_loss:.4f}, Accuracy: {correct}/{len_dataset} ({100. * correct / len_dataset:.0f}%)\n'
        )

    def imshow(inp, title=None):
        """Imshow for Tensor."""
        inp = inp.numpy().transpose((1, 2, 0))
        mean = np.array([0.485, 0.456, 0.406])
        std = np.array([0.229, 0.224, 0.225])
        inp = std * inp + mean
        inp = np.clip(inp, 0, 1)
        plt.imshow(inp)
        if title is not None:
            plt.title(title)
        plt.pause(0.001)  # pause a bit so that plots are updated

    def visualize_model(model, num_images=6):
        was_training = model.training
        model.eval()
        images_so_far = 0
        fig = plt.figure()

        with torch.no_grad():
            for i, (inputs, labels) in enumerate(test_loader):
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)

                for j in range(inputs.size()[0]):
                    images_so_far += 1
                    ax = plt.subplot(num_images // 2, 2, images_so_far)
                    ax.axis('off')
                    ax.set_title('predicted: {}'.format(class_names[preds[j]]))
                    imshow(inputs.cpu().data[j])

                    if images_so_far == num_images:
                        model.train(mode=was_training)
                        return
            model.train(mode=was_training)

    #Starting Test. How good is the Model RAW
    test(device, model)

    for epoch in range(1, n_epochs + 1):
        train(epoch, device, model)
        test(device, model)

        train_losses_once.append(train_losses[-1])
        test_losses_once.append(test_losses[-1])
    print("Training Complete")

    visualize_model(model)

    ## Loss Graph
    plt.figure()

    plt.axis([0, len(train_losses), 0, ceil(max(train_losses))])
    plt.xlabel('Number of Batches')
    plt.ylabel('Loss')

    plt.plot(train_losses, label="Training")

    print(len(train_losses))

    plt.plot(test_losses, label="Testing"
             )  #[x for x in range(0, len(train_losses), 10)], test_losses)

    plt.legend()
    plt.figure()

    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.axis([1, 16, 0, 4])

    plt.plot([x for x in range(1, n_epochs + 1)],
             train_losses_once,
             label="Training")
    plt.plot([x for x in range(1, n_epochs + 1)],
             test_losses_once,
             label="Testing")
    plt.legend()
            return tensor

    def __repr__(self):
        return self.__class__.__name__ + '(mean={0}, std={1})'.format(
            self.mean, self.std)


### transform.Normalize(torch.mean(

### Set Augmentations:

data_transforms = {
    'train':
    transforms.Compose([
        transforms.RandomResizedCrop(224, scale=(0.5, 1.0)),
        transforms.RandomAffine(30),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        AddGaussianNoise(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val':
    transforms.Compose([
        transforms.Resize(224),
        #transforms.CenterCrop(100),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# %% ----------------------------------- Helper Functions --------------------------------------------------------------
    # transforms.ToTensor(),
    # transforms.RandomErasing(p=1, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=(254/255, 0, 0)),
    # transforms.RandomErasing(p=.3, scale=(0.02, 0.33), ratio=(0.3, 3.3), value='1234'),

    # 1 RandomChoice
    # transforms.RandomChoice([transforms.RandomVerticalFlip(p=1), transforms.RandomHorizontalFlip(p=1)]),

    # 2 RandomApply
    # transforms.RandomApply([transforms.RandomAffine(degrees=0, shear=45, fillcolor=(255, 0, 0)),
    #                         transforms.Grayscale(num_output_channels=3)], p=0.5),
    # 3 RandomOrder
    transforms.RandomOrder([
        transforms.RandomRotation(15),
        transforms.Pad(padding=32),
        transforms.RandomAffine(degrees=0,
                                translate=(0.01, 0.1),
                                scale=(0.9, 1.1))
    ]),
    transforms.ToTensor(),
    transforms.Normalize(norm_mean, norm_std),
])

train_data = RMBDataset(data_dir=train_dir, transform=train_transform)
train_loader = DataLoader(dataset=train_data,
                          batch_size=BATCH_SIZE,
                          shuffle=True)

# ============================ step 5/5 训练 ============================
for i, data in enumerate(train_loader):
    inputs, labels = data  # B C H W
예제 #16
0

data_transforms_train = transforms.Compose([
    transforms.Resize((456, 456)),
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(25),
    transforms.RandomResizedCrop((456, 456),
                                 scale=(0.7, 1.3),
                                 ratio=(0.8, 1.2)),
    transforms.GaussianBlur(5, sigma=(0.1, 2.0)),
    transforms.ColorJitter(brightness=0.15,
                           contrast=0.15,
                           saturation=0.15,
                           hue=0.15),
    transforms.RandomAffine(0, translate=(0.07, 0.07), scale=(0.6, 1.4)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dirName = args.dirname  #'crop_dataset/train_images' # 'bird_dataset/train_images'

listOfFile = os.listdir(dirName)
completeFileList = list()
for file in tqdm(listOfFile):
    completePath = os.path.join(dirName, file)
    image_paths = os.listdir(completePath)
    for image in image_paths:
        try:
            img_path = os.path.join(completePath, image)
            img = Image.open(img_path)
            img_aug = data_transforms_train(img)
# augment['1'] = transforms.Compose(
#     [
#         transforms.ToPILImage(),
#         transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=20),
#         transforms.ToTensor(),
#         transforms.Normalize((0.5, ), (0.5, ))])  # change to [C, H, W]

# augment['2'] = transforms.Compose(
#     [
#         transforms.ToTensor(),
#         transforms.Normalize((0.5, ), (0.5, ))])  # change to [C, H, W]

augment['1'] = transforms.Compose(
    [
        transforms.ToPILImage(),
        transforms.RandomAffine(degrees=10, translate=(
            0.1, 0.1), scale=(0.8, 1.2), shear=20),
        transforms.ToTensor()])  # change to [C, H, W]

augment['2'] = transforms.Compose(
    [
        transforms.ToTensor()])  # change to [C, H, W]

train_dataset = RegularDataset(opt, augment)

train_dataloader = DataLoader(train_dataset,
                              batch_size=opt.batchSize,
                              shuffle=True,
                              num_workers=int(opt.nThreads),
                              pin_memory=True)

dataset_size = len(train_dataset)
예제 #18
0
def Prepare_DataLoaders(Results_parameters, split, input_size=224):

    Dataset = Results_parameters['Dataset']
    data_dir = Results_parameters['data_dir']

    # Data augmentation and normalization for training
    # Just normalization and resize for test
    # Data transformations as described in:
    # http://openaccess.thecvf.com/content_cvpr_2018/papers/Xue_Deep_Texture_Manifold_CVPR_2018_paper.pdf
    if not (Results_parameters['rotation']):
        data_transforms = {
            'train':
            transforms.Compose([
                transforms.Resize(Results_parameters['resize_size']),
                transforms.RandomResizedCrop(input_size, scale=(.8, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ]),
            'test':
            transforms.Compose([
                transforms.Resize(Results_parameters['center_size']),
                transforms.CenterCrop(input_size),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ]),
        }
    else:
        data_transforms = {
            'train':
            transforms.Compose([
                transforms.Resize(Results_parameters['resize_size']),
                transforms.RandomResizedCrop(input_size, scale=(.8, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ]),
            'test':
            transforms.Compose([
                transforms.Resize(Results_parameters['center_size']),
                transforms.CenterCrop(input_size),
                transforms.RandomAffine(Results_parameters['degrees']),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
            ]),
        }

        # Create training and test datasets, for results, apply test transforms
        # to both training and test datasets
    if Dataset == 'DTD':
        train_dataset = DTD_data(data_dir,
                                 data='train',
                                 numset=split + 1,
                                 img_transform=data_transforms['test'])
        validation_dataset = DTD_data(data_dir,
                                      data='val',
                                      numset=split + 1,
                                      img_transform=data_transforms['test'])

        test_dataset = DTD_data(data_dir,
                                data='test',
                                numset=split + 1,
                                img_transform=data_transforms['test'])
        #Combine training and test datasets
        train_dataset = torch.utils.data.ConcatDataset(
            (train_dataset, validation_dataset))

    elif Dataset == 'MINC_2500':
        train_dataset = MINC_2500_data(data_dir,
                                       data='train',
                                       numset=split + 1,
                                       img_transform=data_transforms['test'])

        test_dataset = MINC_2500_data(data_dir,
                                      data='test',
                                      numset=split + 1,
                                      img_transform=data_transforms['test'])
    else:
        # Create training and test datasets
        train_dataset = GTOS_mobile_single_data(
            data_dir,
            train=True,
            image_size=Results_parameters['resize_size'],
            img_transform=data_transforms['test'])
        test_dataset = GTOS_mobile_single_data(
            data_dir, train=False, img_transform=data_transforms['test'])

    image_datasets = {'train': train_dataset, 'test': test_dataset}

    #If training dataset is larger than number of images for TSNE, subsample
    if len(image_datasets['train']) > Results_parameters['Num_TSNE_images']:
        indices = np.arange(len(image_datasets['train']))
        y = image_datasets['train'].targets
        #Use stratified split to balance training validation splits,
        #set random state to be same for each encoding method
        _, _, _, _, _, TSNE_indices = train_test_split(
            y,
            y,
            indices,
            stratify=y,
            test_size=Results_parameters['Num_TSNE_images'],
            random_state=split + 1)

        # Creating PT data samplers and loaders:
        TSNE_sampler = {
            'train': SubsetRandomSampler(TSNE_indices),
            'test': None
        }
    else:
        TSNE_sampler = {'train': None, 'test': None}
    # Create training and test dataloaders
    dataloaders_dict = {
        x: torch.utils.data.DataLoader(
            image_datasets[x],
            batch_size=Results_parameters['batch_size'][x],
            shuffle=False,
            sampler=TSNE_sampler[x],
            num_workers=Results_parameters['num_workers'],
            pin_memory=Results_parameters['pin_memory'])
        for x in ['train', 'test']
    }

    return dataloaders_dict
예제 #19
0
def train(args):
    if not os.path.exists('checkpoints'):
        os.mkdir('checkpoints')

    # Setup Augmentations
    data_aug = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomAffine(degrees=10,
                                translate=(0.05, 0.05),
                                scale=(0.95, 1.05)),
    ])

    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    t_loader = data_loader(data_path,
                           is_transform=True,
                           split='train',
                           version='simplified',
                           img_size=(args.img_rows, args.img_cols),
                           augmentations=data_aug,
                           train_fold_num=args.train_fold_num,
                           num_train_folds=args.num_train_folds,
                           seed=args.seed)
    v_loader = data_loader(data_path,
                           is_transform=True,
                           split='val',
                           version='simplified',
                           img_size=(args.img_rows, args.img_cols),
                           num_val=args.num_val,
                           seed=args.seed)

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    n_classes = t_loader.n_classes
    trainloader = data.DataLoader(t_loader,
                                  batch_size=args.batch_size,
                                  num_workers=2,
                                  shuffle=True,
                                  pin_memory=True,
                                  drop_last=True)
    valloader = data.DataLoader(v_loader,
                                batch_size=args.batch_size,
                                num_workers=2,
                                pin_memory=True)

    # Setup Metrics
    running_metrics = runningScore(n_classes)

    # Setup Model
    model = get_model(args.arch, n_classes, use_cbam=args.use_cbam)
    model.cuda()

    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                        model.parameters()),
                                 lr=args.l_rate,
                                 weight_decay=args.weight_decay)

    if args.num_cycles > 0:
        len_trainloader = int(5e6)  # 4960414
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer,
            T_max=args.num_train_folds * len_trainloader // args.num_cycles,
            eta_min=args.l_rate * 1e-1)
    else:
        scheduler = torch.optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=[2, 4, 6, 8], gamma=0.5)

    loss_fn = F.cross_entropy

    start_epoch = 0
    if args.resume is not None:
        if os.path.isfile(args.resume):
            print("Loading model and optimizer from checkpoint '{}'".format(
                args.resume))
            checkpoint = torch.load(args.resume)

            model_dict = model.state_dict()
            if checkpoint.get('model_state', -1) == -1:
                model_dict.update(
                    convert_state_dict(checkpoint,
                                       load_classifier=args.load_classifier))
            else:
                model_dict.update(
                    convert_state_dict(checkpoint['model_state'],
                                       load_classifier=args.load_classifier))

                print(
                    "Loaded checkpoint '{}' (epoch {}, mapk {:.5f}, top1_acc {:7.3f}, top2_acc {:7.3f} top3_acc {:7.3f})"
                    .format(args.resume, checkpoint['epoch'],
                            checkpoint['mapk'], checkpoint['top1_acc'],
                            checkpoint['top2_acc'], checkpoint['top3_acc']))
            model.load_state_dict(model_dict)

            if checkpoint.get('optimizer_state', None) is not None:
                optimizer.load_state_dict(checkpoint['optimizer_state'])
                start_epoch = checkpoint['epoch']
        else:
            print("No checkpoint found at '{}'".format(args.resume))

    loss_sum = 0.0
    for epoch in range(start_epoch, args.n_epoch):
        start_train_time = timeit.default_timer()

        if args.num_cycles == 0:
            scheduler.step(epoch)

        model.train()
        optimizer.zero_grad()
        for i, (images, labels, recognized, _) in enumerate(trainloader):
            if args.num_cycles > 0:
                iter_num = i + epoch * len_trainloader
                scheduler.step(
                    iter_num %
                    (args.num_train_folds * len_trainloader //
                     args.num_cycles))  # Cosine Annealing with Restarts

            images = images.cuda()
            labels = labels.cuda()
            recognized = recognized.cuda()

            outputs = model(images)

            loss = (loss_fn(outputs,
                            labels.view(-1),
                            ignore_index=t_loader.ignore_index,
                            reduction='none') * recognized.view(-1)).mean()
            loss = loss / float(args.iter_size)  # Accumulated gradients
            loss_sum = loss_sum + loss

            loss.backward()

            if (i + 1) % args.print_train_freq == 0:
                print("Epoch [%d/%d] Iter [%6d/%6d] Loss: %.4f" %
                      (epoch + 1, args.n_epoch, i + 1, len(trainloader),
                       loss_sum))

            if (i + 1) % args.iter_size == 0 or i == len(trainloader) - 1:
                optimizer.step()
                optimizer.zero_grad()
                loss_sum = 0.0

        mapk_val = AverageMeter()
        top1_acc_val = AverageMeter()
        top2_acc_val = AverageMeter()
        top3_acc_val = AverageMeter()
        mean_loss_val = AverageMeter()
        model.eval()
        with torch.no_grad():
            for i_val, (images_val, labels_val, recognized_val,
                        _) in tqdm(enumerate(valloader)):
                images_val = images_val.cuda()
                labels_val = labels_val.cuda()
                recognized_val = recognized_val.cuda()

                outputs_val = model(images_val)

                loss_val = (loss_fn(outputs_val,
                                    labels_val.view(-1),
                                    ignore_index=v_loader.ignore_index,
                                    reduction='none') *
                            recognized_val.view(-1)).mean()
                mean_loss_val.update(loss_val, n=images_val.size(0))

                _, pred = outputs_val.topk(k=3,
                                           dim=1,
                                           largest=True,
                                           sorted=True)
                running_metrics.update(labels_val, pred[:, 0])

                acc1, acc2, acc3 = accuracy(outputs_val,
                                            labels_val,
                                            topk=(1, 2, 3))
                top1_acc_val.update(acc1, n=images_val.size(0))
                top2_acc_val.update(acc2, n=images_val.size(0))
                top3_acc_val.update(acc3, n=images_val.size(0))

                mapk_v = mapk(labels_val, pred, k=3)
                mapk_val.update(mapk_v, n=images_val.size(0))

        print('Mean Average Precision (MAP) @ 3: {:.5f}'.format(mapk_val.avg))
        print('Top 3 accuracy: {:7.3f} / {:7.3f} / {:7.3f}'.format(
            top1_acc_val.avg, top2_acc_val.avg, top3_acc_val.avg))
        print('Mean val loss: {:.4f}'.format(mean_loss_val.avg))

        score, class_iou = running_metrics.get_scores()

        for k, v in score.items():
            print(k, v)

        state = {
            'epoch': epoch + 1,
            'model_state': model.state_dict(),
            'optimizer_state': optimizer.state_dict(),
            'mapk': mapk_val.avg,
            'top1_acc': top1_acc_val.avg,
            'top2_acc': top2_acc_val.avg,
            'top3_acc': top3_acc_val.avg,
        }
        torch.save(
            state, "checkpoints/{}_{}_{}_{}x{}_{}-{}-{}_model.pth".format(
                args.arch, args.dataset, epoch + 1, args.img_rows,
                args.img_cols, args.train_fold_num, args.num_train_folds,
                args.num_val))

        running_metrics.reset()
        mapk_val.reset()
        top1_acc_val.reset()
        top2_acc_val.reset()
        top3_acc_val.reset()
        mean_loss_val.reset()

        elapsed_train_time = timeit.default_timer() - start_train_time
        print('Training time (epoch {0:5d}): {1:10.5f} seconds'.format(
            epoch + 1, elapsed_train_time))
예제 #20
0
def get_transforms_test():
    transforms_test_list = []

    if config['transforms'] == 'pytorch':
        if config['pytorch']['resize']['test']:
            transforms_test_list.append(
                transforms.Resize(
                    size=(config['pytorch']['resize']['test_size'],
                          config['pytorch']['resize']['test_size'])))
        if config['pytorch']['centerCrop']['test']:
            transforms_test_list.append(
                transforms.Resize(
                    size=(config['pytorch']['centerCrop']['test_size'],
                          config['pytorch']['centerCrop']['test_size'])))
        if config['pytorch']['colorJitter']['test']:
            transforms_test_list.append(
                transforms.RandomApply([
                    transforms.ColorJitter(
                        config['pytorch']['colorJitter']['brightness'])
                ],
                                       p=0.75))
        if config['pytorch']['randomCrop']['test']:
            transforms_test_list.append(
                transforms.RandomCrop(
                    size=config['pytorch']['randomCrop']['test_size'],
                    padding=config['pytorch']['randomCrop']['padding']))
        if config['pytorch']['randomResizedCrop']['test']:
            transforms_test_list.append(
                transforms.RandomResizedCrop(
                    size=config['pytorch']['randomResizedCrop']['test_size']))
        if config['pytorch']['randomHorizontalFlip']['test']:
            transforms_test_list.append(transforms.RandomHorizontalFlip())
        if config['pytorch']['randomAffine']['test']:
            transforms_test_list.append(
                transforms.RandomAffine(
                    degrees=config['pytorch']['randomAffine']['degrees'],
                    scale=config['pytorch']['randomAffine']['scale']))
        if config['pytorch']['randomRotation']['test']:
            transforms_test_list.append(
                transforms.RandomRotation(
                    degrees=config['pytorch']['randomRotation']['degrees']))
        if config['pytorch']['toTensor']['test']:
            transforms_test_list.append(transforms.ToTensor())
        if config['pytorch']['normalize']['test']:
            transforms_test_list.append(
                transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225]))
        if config['pytorch']['randomErasing']['test']:
            transforms_test_list.append(
                transforms.RandomErasing(
                    p=config['pytorch']['randomErasing']['p'],
                    value=config['pytorch']['randomErasing']['value']))
        test_transforms = transforms.Compose(transforms_test_list)
    else:
        if config['albumentations']['resize']['test']:
            transforms_test_list.append(
                A.Resize(config['albumentations']['resize']['test_size'],
                         config['albumentations']['resize']['test_size']))
        if config['albumentations']['centerCrop']['test']:
            transforms_test_list.append(
                transforms.Resize(
                    config['albumentations']['centerCrop']['test_size'],
                    config['albumentations']['centerCrop']['test_size']))
        if config['albumentations']['horizontalFlip']['test']:
            transforms_test_list.append(A.HorizontalFlip())
        if config['albumentations']['rotate']['test']:
            transforms_test_list.append(
                A.Rotate(config['albumentations']['rotate']['limit']))
        if config['albumentations']['clahe']['test']:
            transforms_test_list.append(A.CLAHE())
        if config['albumentations']['gaussNoise']['test']:
            transforms_test_list.append(A.GaussNoise())
        if config['albumentations']['randomBrightness']['test']:
            transforms_test_list.append(A.RandomBrightness())
        if config['albumentations']['randomContrast']['test']:
            transforms_test_list.append(A.RandomContrast())
        if config['albumentations']['randomBrightnrssContrast']['test']:
            transforms_test_list.append(A.RandomBrightnessContrast())
        if config['albumentations']['hueSaturationValue']['test']:
            transforms_test_list.append(A.HueSaturationValue())
        if config['albumentations']['normalize']['test']:
            transforms_test_list.append(
                A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]))
        if config['albumentations']['toTensor']['test']:
            transforms_test_list.append(AT.ToTensor())
        test_transforms = A.Compose(transforms_test_list)

    return test_transforms
예제 #21
0
    gflags.DEFINE_float("lr", 0.00006, "learning rate")
    gflags.DEFINE_integer("show_every", 10,
                          "show result after each show_every iter.")
    gflags.DEFINE_integer("save_every", 100,
                          "save model after each save_every iter.")
    gflags.DEFINE_integer("test_every", 100,
                          "test model after each test_every iter.")
    gflags.DEFINE_integer("max_iter", 50000,
                          "number of iterations before stopping")
    gflags.DEFINE_string("model_path", "/model", "path to store model")
    gflags.DEFINE_string("gpu_ids", "0,1,2,3", "gpu ids used to train")

    Flags(sys.argv)

    data_transforms = transforms.Compose(
        [transforms.RandomAffine(15),
         transforms.ToTensor()])

    # train_dataset = dset.ImageFolder(root=Flags.train_path)
    # test_dataset = dset.ImageFolder(root=Flags.test_path)

    os.environ["CUDA_VISIBLE_DEVICES"] = Flags.gpu_ids
    print("use gpu:", Flags.gpu_ids, "to train.")

    trainSet = OmniglotTrain(Flags.train_path, transform=data_transforms)
    testSet = OmniglotTest(Flags.test_path,
                           transform=transforms.ToTensor(),
                           times=Flags.times,
                           way=Flags.way)
    testLoader = DataLoader(testSet,
                            batch_size=Flags.way,
예제 #22
0
def main():
  parser = argparse.ArgumentParser(description="LSTM VAE Agents: ST-GS Language Emergence.")
  parser.add_argument("--seed", type=int, default=0)
  parser.add_argument("--parent_folder", type=str, help="folder to save into.",default="")
  parser.add_argument("--symbolic", action="store_true", default=False)
  parser.add_argument("--use_cuda", action="store_true", default=False)
  parser.add_argument("--dataset", type=str, 
    choices=["Sort-of-CLEVR",
             "tiny-Sort-of-CLEVR",
             "XSort-of-CLEVR",
             "tiny-XSort-of-CLEVR",
             "dSprites",
             ], 
    help="dataset to train on.",
    default="dSprites")
  parser.add_argument("--arch", type=str, 
    choices=["CNN",
             "CNN3x3",
             "BN+CNN",
             "BN+CNN3x3",
             "BN+BetaVAE3x3",
             "BN+Coord2CNN3x3",
             "BN+Coord4CNN3x3",
             ], 
    help="model architecture to train",
    default="BN+BetaVAE3x3")
  parser.add_argument("--graphtype", type=str,
    choices=["straight_through_gumbel_softmax",
             "reinforce",
             "baseline_reduced_reinforce",
             "normalized_reinforce",
             "baseline_reduced_normalized_reinforce",
             "max_entr_reinforce",
             "baseline_reduced_normalized_max_entr_reinforce",
             "argmax_reinforce",
             "obverter"],
    help="type of graph to use during training of the speaker and listener.",
    default="straight_through_gumbel_softmax")
  parser.add_argument("--max_sentence_length", type=int, default=20)
  parser.add_argument("--vocab_size", type=int, default=100)
  parser.add_argument("--optimizer_type", type=str, 
    choices=[
      "adam",
      "sgd"
      ],
    default="adam")
  parser.add_argument("--agent_loss_type", type=str,
    choices=[
      "Hinge",
      "NLL",
      "CE",
      "BCE",
      ],
    default="Hinge")
  parser.add_argument("--agent_type", type=str,
    choices=[
      "Baseline",
      ],
    default="Baseline")
  parser.add_argument("--rnn_type", type=str,
    choices=[
      "LSTM",
      "GRU",
      ],
    default="LSTM")
  parser.add_argument("--lr", type=float, default=1e-4)
  parser.add_argument("--epoch", type=int, default=1875)
  parser.add_argument("--metric_epoch_period", type=int, default=20)
  parser.add_argument("--dataloader_num_worker", type=int, default=4)
  parser.add_argument("--metric_fast", action="store_true", default=False)
  parser.add_argument("--batch_size", type=int, default=8)
  parser.add_argument("--mini_batch_size", type=int, default=128)
  parser.add_argument("--dropout_prob", type=float, default=0.0)
  parser.add_argument("--emb_dropout_prob", type=float, default=0.8)
  parser.add_argument("--nbr_experience_repetition", type=int, default=1)
  parser.add_argument("--nbr_train_dataset_repetition", type=int, default=1)
  parser.add_argument("--nbr_test_dataset_repetition", type=int, default=1)
  parser.add_argument("--nbr_test_distractors", type=int, default=63)
  parser.add_argument("--nbr_train_distractors", type=int, default=47)
  parser.add_argument("--resizeDim", default=32, type=int,help="input image resize")
  #TODO: make sure it is understood....!
  parser.add_argument("--shared_architecture", action="store_true", default=True)
  parser.add_argument("--with_baseline", action="store_true", default=False)
  parser.add_argument("--homoscedastic_multitasks_loss", action="store_true", default=False)
  parser.add_argument("--use_curriculum_nbr_distractors", action="store_true", default=False)
  parser.add_argument("--use_feat_converter", action="store_true", default=False)
  parser.add_argument("--descriptive", action="store_true", default=False)
  parser.add_argument("--descriptive_ratio", type=float, default=0.0)
  parser.add_argument("--egocentric", action="store_true", default=False)
  parser.add_argument("--distractor_sampling", type=str,
    choices=[ "uniform",
              "similarity-0.98",
              "similarity-0.90",
              "similarity-0.75",
              ],
    default="uniform")
  # Obverter Hyperparameters:
  parser.add_argument("--use_sentences_one_hot_vectors", action="store_true", default=False)
  parser.add_argument("--differentiable", action="store_true", default=False)
  parser.add_argument("--obverter_threshold_to_stop_message_generation", type=float, default=0.95)
  parser.add_argument("--obverter_nbr_games_per_round", type=int, default=4)
  # Cultural Bottleneck:
  parser.add_argument("--iterated_learning_scheme", action="store_true", default=False)
  parser.add_argument("--iterated_learning_period", type=int, default=4)
  parser.add_argument("--iterated_learning_rehearse_MDL", action="store_true", default=False)
  parser.add_argument("--iterated_learning_rehearse_MDL_factor", type=float, default=1.0)
  
  # Dataset Hyperparameters:
  parser.add_argument("--train_test_split_strategy", type=str, 
    choices=["combinatorial2-Y-2-8-X-2-8-Orientation-40-N-Scale-6-N-Shape-3-N", # Exp : DoRGsFurtherDise interweaved split simple XY normal             
             "combinatorial2-Y-2-S8-X-2-S8-Orientation-40-N-Scale-4-N-Shape-1-N",
             "combinatorial2-Y-32-N-X-32-N-Orientation-5-S4-Scale-1-S3-Shape-3-N",  #Sparse 2 Attributes: Orient.+Scale 64 imgs, 48 train, 16 test
             "combinatorial2-Y-2-S8-X-2-S8-Orientation-40-N-Scale-6-N-Shape-3-N",  # 4x Denser 2 Attributes: 256 imgs, 192 train, 64 test,
             
             # Heart shape: interpolation:
             "combinatorial2-Y-4-2-X-4-2-Orientation-40-N-Scale-6-N-Shape-3-N",  #Sparse 2 Attributes: X+Y 64 imgs, 48 train, 16 test
             "combinatorial2-Y-2-2-X-2-2-Orientation-40-N-Scale-6-N-Shape-3-N",  #Dense 2 Attributes: X+Y 256 imgs, 192 train, 64 test
             "combinatorial2-Y-8-2-X-8-2-Orientation-10-2-Scale-1-2-Shape-3-N", #COMB2:Sparser 4 Attributes: 264 test / 120 train
             "combinatorial2-Y-4-2-X-4-2-Orientation-5-2-Scale-1-2-Shape-3-N", #COMB2:Sparse 4 Attributes: 2112 test / 960 train
             "combinatorial2-Y-2-2-X-2-2-Orientation-2-2-Scale-1-2-Shape-3-N", #COMB2:Dense 4 Attributes: ? test / ? train
             "combinatorial2-Y-4-2-X-4-2-Orientation-5-2-Scale-6-N-Shape-3-N",  #COMB2 Sparse: 3 Attributes: XYOrientation 256 test / 256 train
             # Heart shape: Extrapolation:
             "combinatorial2-Y-4-S4-X-4-S4-Orientation-40-N-Scale-6-N-Shape-3-N",  #Sparse 2 Attributes: X+Y 64 imgs, 48 train, 16 test
             "combinatorial2-Y-8-S2-X-8-S2-Orientation-10-S2-Scale-1-S3-Shape-3-N", #COMB2:Sparser 4 Attributes: 264 test / 120 train
             "combinatorial2-Y-4-S4-X-4-S4-Orientation-5-S4-Scale-1-S3-Shape-3-N", #COMB2:Sparse 4 Attributes: 2112 test / 960 train
             "combinatorial2-Y-2-S8-X-2-S8-Orientation-2-S10-Scale-1-S3-Shape-3-N", #COMB2:Dense 4 Attributes: ? test / ? train
             "combinatorial2-Y-4-S4-X-4-S4-Orientation-5-S4-Scale-6-N-Shape-3-N",  #COMB2 Sparse: 3 Attributes: XYOrientation 256 test / 256 train

             # Ovale shape:
             "combinatorial2-Y-1-S16-X-1-S16-Orientation-40-N-Scale-6-N-Shape-2-N", # Denser 2 Attributes X+Y X 16/ Y 16/ --> 256 test / 768 train 
             "combinatorial2-Y-8-S2-X-8-S2-Orientation-10-S2-Scale-1-S3-Shape-2-N", #COMB2:Sparser 4 Attributes: 264 test / 120 train
             "combinatorial2-Y-4-S4-X-4-S4-Orientation-5-S4-Scale-1-S3-Shape-2-N", #COMB2:Sparse 4 Attributes: 2112 test / 960 train
             "combinatorial2-Y-2-S8-X-2-S8-Orientation-2-S10-Scale-1-S3-Shape-2-N", #COMB2:Dense 4 Attributes: ? test / ? train
             
             #3 Attributes: denser 2 attributes(X+Y) with the sample size of Dense 4 attributes:
             "combinatorial2-Y-1-S16-X-1-S16-Orientation-2-S10-Scale-6-N-Shape-2-N", 
  
             "combinatorial4-Y-4-S4-X-4-S4-Orientation-5-S4-Scale-1-S3-Shape-3-N", #Sparse 4 Attributes: 192 test / 1344 train
            ],
    help="train/test split strategy",
    # INTER:
    #default="combinatorial2-Y-4-2-X-4-2-Orientation-40-N-Scale-6-N-Shape-3-N")
    # EXTRA:
    #default="combinatorial2-Y-4-S4-X-4-S4-Orientation-40-N-Scale-6-N-Shape-3-N")
    # EXTRA-3:
    default="combinatorial2-Y-4-S4-X-4-S4-Orientation-5-S4-Scale-6-N-Shape-3-N")
  parser.add_argument("--fast", action="store_true", default=False, 
    help="Disable the deterministic CuDNN. It is likely to make the computation faster.")
  
  #--------------------------------------------------------------------------
  #--------------------------------------------------------------------------
  # VAE Hyperparameters:
  #--------------------------------------------------------------------------
  #--------------------------------------------------------------------------
  parser.add_argument("--vae_detached_featout", action="store_true", default=False)

  parser.add_argument("--vae_lambda", type=float, default=1.0)
  parser.add_argument("--vae_use_mu_value", action="store_true", default=False)
  
  parser.add_argument("--vae_nbr_latent_dim", type=int, default=32)
  parser.add_argument("--vae_decoder_nbr_layer", type=int, default=3)
  parser.add_argument("--vae_decoder_conv_dim", type=int, default=32)
  
  parser.add_argument("--vae_gaussian", action="store_true", default=False)
  parser.add_argument("--vae_gaussian_sigma", type=float, default=0.25)
  
  parser.add_argument("--vae_beta", type=float, default=1.0)
  parser.add_argument("--vae_factor_gamma", type=float, default=0.0)
  
  parser.add_argument("--vae_constrained_encoding", action="store_true", default=False)
  parser.add_argument("--vae_max_capacity", type=float, default=1e3)
  parser.add_argument("--vae_nbr_epoch_till_max_capacity", type=int, default=10)

  #--------------------------------------------------------------------------
  #--------------------------------------------------------------------------
  #--------------------------------------------------------------------------
  #--------------------------------------------------------------------------
  
  
  args = parser.parse_args()
  print(args)

  gaussian = args.vae_gaussian 
  vae_observation_sigma = args.vae_gaussian_sigma
  
  vae_beta = args.vae_beta 
  factor_vae_gamma = args.vae_factor_gamma
  
  vae_constrainedEncoding = args.vae_constrained_encoding
  maxCap = args.vae_max_capacity #1e2
  nbrepochtillmaxcap = args.vae_nbr_epoch_till_max_capacity

  monet_gamma = 5e-1
  
  #--------------------------------------------------------------------------
  #--------------------------------------------------------------------------
  #--------------------------------------------------------------------------
  #--------------------------------------------------------------------------
  
  seed = args.seed 

  # Following: https://pytorch.org/docs/stable/notes/randomness.html
  torch.manual_seed(seed)
  if hasattr(torch.backends, "cudnn") and not(args.fast):
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

  np.random.seed(seed)
  random.seed(seed)
  # # Hyperparameters:

  nbr_epoch = args.epoch
  
  cnn_feature_size = -1 #600 #128 #256 #
  # Except for VAEs...!
  
  stimulus_resize_dim = args.resizeDim #64 #28
  
  normalize_rgb_values = False 
  
  rgb_scaler = 1.0 #255.0
  from ReferentialGym.datasets.utils import ResizeNormalize
  transform = ResizeNormalize(size=stimulus_resize_dim, 
                              normalize_rgb_values=normalize_rgb_values,
                              rgb_scaler=rgb_scaler)

  from ReferentialGym.datasets.utils import AddEgocentricInvariance
  ego_inv_transform = AddEgocentricInvariance()

  transform_degrees = 25
  transform_translate = (0.0625, 0.0625)

  default_descriptive_ratio = 1-(1/(args.nbr_train_distractors+2))
  # Default: 1-(1/(nbr_distractors+2)), 
  # otherwise the agent find the local minimum
  # where it only predicts "no-target"...
  if args.descriptive_ratio <=0.001:
    descriptive_ratio = default_descriptive_ratio
  else:
    descriptive_ratio = args.descriptive_ratio

  rg_config = {
      "observability":            "partial",
      "max_sentence_length":      args.max_sentence_length,
      "nbr_communication_round":  1,
      "nbr_distractors":          {"train":args.nbr_train_distractors, "test":args.nbr_test_distractors},
      "distractor_sampling":      args.distractor_sampling,
      # Default: use "similarity-0.5"
      # otherwise the emerging language 
      # will have very high ambiguity...
      # Speakers find the strategy of uttering
      # a word that is relevant to the class/label
      # of the target, seemingly.  
      
      "descriptive":              args.descriptive,
      "descriptive_target_ratio": descriptive_ratio,

      "object_centric":           False,
      "nbr_stimulus":             1,

      "graphtype":                args.graphtype,
      "tau0":                     0.2,
      "gumbel_softmax_eps":       1e-6,
      "vocab_size":               args.vocab_size,
      "symbol_embedding_size":    256, #64

      "agent_architecture":       args.arch, #"CoordResNet18AvgPooled-2", #"BetaVAE", #"ParallelMONet", #"BetaVAE", #"CNN[-MHDPA]"/"[pretrained-]ResNet18[-MHDPA]-2"
      "agent_learning":           "learning",  #"transfer_learning" : CNN"s outputs are detached from the graph...
      "agent_loss_type":          args.agent_loss_type, #"NLL"

      "cultural_pressure_it_period": None,
      "cultural_speaker_substrate_size":  1,
      "cultural_listener_substrate_size":  1,
      "cultural_reset_strategy":  "oldestL", # "uniformSL" #"meta-oldestL-SGD"
      "cultural_reset_meta_learning_rate":  1e-3,

      # Obverter"s Cultural Bottleneck:
      "iterated_learning_scheme": args.iterated_learning_scheme,
      "iterated_learning_period": args.iterated_learning_period,
      "iterated_learning_rehearse_MDL": args.iterated_learning_rehearse_MDL,
      "iterated_learning_rehearse_MDL_factor": args.iterated_learning_rehearse_MDL_factor,
      
      "obverter_stop_threshold":  args.obverter_threshold_to_stop_message_generation,  #0.0 if not in use.
      "obverter_nbr_games_per_round": args.obverter_nbr_games_per_round,

      "obverter_least_effort_loss": False,
      "obverter_least_effort_loss_weights": [1.0 for x in range(0, 10)],

      "batch_size":               args.batch_size,
      "dataloader_num_worker":    args.dataloader_num_worker,
      "stimulus_depth_dim":       1 if "dSprites" in args.dataset else 3,
      "stimulus_resize_dim":      stimulus_resize_dim, 
      
      "learning_rate":            args.lr, #1e-3,
      "adam_eps":                 1e-8,
      "dropout_prob":             args.dropout_prob,
      "embedding_dropout_prob":   args.emb_dropout_prob,
      
      "with_gradient_clip":       False,
      "gradient_clip":            1e0,
      
      "use_homoscedastic_multitasks_loss": args.homoscedastic_multitasks_loss,

      "use_feat_converter":       args.use_feat_converter,

      "use_curriculum_nbr_distractors": args.use_curriculum_nbr_distractors,
      "curriculum_distractors_window_size": 25, #100,

      "unsupervised_segmentation_factor": None, #1e5
      "nbr_experience_repetition":  args.nbr_experience_repetition,
      
      "with_utterance_penalization":  False,
      "with_utterance_promotion":     False,
      "utterance_oov_prob":  0.5,  # Expected penalty of observing out-of-vocabulary words. 
                                                # The greater this value, the greater the loss/cost.
      "utterance_factor":    1e-2,

      "with_speaker_entropy_regularization":  False,
      "with_listener_entropy_regularization":  False,
      "entropy_regularization_factor":    -1e-2,

      "with_mdl_principle":       False,
      "mdl_principle_factor":     5e-2,

      "with_weight_maxl1_loss":   False,

      "use_cuda":                 args.use_cuda,
  
      "train_transform":            transform,
      "test_transform":             transform,
  }

  if args.egocentric:
    rg_config["train_transform"]= T.Compose(
      [
        ego_inv_transform,
        T.RandomAffine(degrees=transform_degrees, 
                     translate=transform_translate, 
                     scale=None, 
                     shear=None, 
                     resample=False, 
                     fillcolor=0),
        transform
      ]
    )
    rg_config["test_transform"]=  T.Compose(
      [
        ego_inv_transform,
        T.RandomAffine(degrees=transform_degrees, 
                     translate=transform_translate, 
                     scale=None, 
                     shear=None, 
                     resample=False, 
                     fillcolor=0),
        transform
      ]
    )
  
  ## Train set:
  train_split_strategy = args.train_test_split_strategy
  test_split_strategy = train_split_strategy
  
  ## Agent Configuration:
  agent_config = copy.deepcopy(rg_config)
  agent_config["use_cuda"] = rg_config["use_cuda"]
  agent_config["homoscedastic_multitasks_loss"] = rg_config["use_homoscedastic_multitasks_loss"]
  agent_config["use_feat_converter"] = rg_config["use_feat_converter"]
  agent_config["max_sentence_length"] = rg_config["max_sentence_length"]
  agent_config["nbr_distractors"] = rg_config["nbr_distractors"]["train"] if rg_config["observability"] == "full" else 0
  agent_config["nbr_stimulus"] = rg_config["nbr_stimulus"]
  agent_config["nbr_communication_round"] = rg_config["nbr_communication_round"]
  agent_config["descriptive"] = rg_config["descriptive"]
  agent_config["gumbel_softmax_eps"] = rg_config["gumbel_softmax_eps"]
  agent_config["agent_learning"] = rg_config["agent_learning"]

  # Obverter:
  agent_config["use_obverter_threshold_to_stop_message_generation"] = args.obverter_threshold_to_stop_message_generation
  
  agent_config["symbol_embedding_size"] = rg_config["symbol_embedding_size"]

  # Recurrent Convolutional Architecture:
  agent_config["architecture"] = rg_config["agent_architecture"]
  agent_config["decoder_architecture"] = "DCNN"
  if args.symbolic:
    agent_config["decoder_architecture"] = "BN+MLP"
    
  agent_config["dropout_prob"] = rg_config["dropout_prob"]
  agent_config["embedding_dropout_prob"] = rg_config["embedding_dropout_prob"]
  
  if "BetaVAE" in agent_config["architecture"]:
    agent_config['VAE_lambda'] = args.vae_lambda
    agent_config['vae_beta'] = args.vae_beta
    agent_config['factor_vae_gamma'] = args.vae_factor_gamma
    agent_config['vae_constrainedEncoding'] =  args.vae_constrained_encoding
    agent_config['vae_use_gaussian_observation_model'] = args.vae_gaussian 
    agent_config['vae_observation_sigma'] = args.vae_gaussian_sigma
    agent_config['vae_max_capacity'] = args.vae_max_capacity #1e2
    agent_config['vae_nbr_epoch_till_max_capacity'] = args.vae_nbr_epoch_till_max_capacity

    agent_config['vae_decoder_conv_dim'] = args.vae_decoder_conv_dim
    agent_config['vae_decoder_nbr_layer'] = args.vae_decoder_nbr_layer
    agent_config['vae_nbr_latent_dim'] = args.vae_nbr_latent_dim
    agent_config['vae_detached_featout'] = args.vae_detached_featout
    agent_config['vae_use_mu_value'] = args.vae_use_mu_value

    rg_config["use_feat_converter"] = False
    agent_config["use_feat_converter"] = False
    
    if "BN" in args.arch:
      agent_config["cnn_encoder_channels"] = ["BN32","BN32","BN64","BN64"]
    else:
      agent_config["cnn_encoder_channels"] = [32,32,64,64]
    
    if "3x3" in agent_config["architecture"]:
      agent_config["cnn_encoder_kernels"] = [3,3,3,3]
    elif "7x4x4x3" in agent_config["architecture"]:
      agent_config["cnn_encoder_kernels"] = [7,4,4,3]
    else:
      agent_config["cnn_encoder_kernels"] = [4,4,4,4]
    agent_config["cnn_encoder_strides"] = [2,2,2,2]
    agent_config["cnn_encoder_paddings"] = [1,1,1,1]
    agent_config["cnn_encoder_fc_hidden_units"] = []#[128,] 
    # the last FC layer is provided by the cnn_encoder_feature_dim parameter below...
    
    # For a fair comparison between CNN an VAEs:
    agent_config["cnn_encoder_feature_dim"] = args.vae_nbr_latent_dim
    #agent_config["cnn_encoder_feature_dim"] = cnn_feature_size
    # N.B.: if cnn_encoder_fc_hidden_units is [],
    # then this last parameter does not matter.
    # The cnn encoder is not topped by a FC network.

    agent_config["cnn_encoder_mini_batch_size"] = args.mini_batch_size
    #agent_config["feat_converter_output_size"] = cnn_feature_size
    agent_config["feat_converter_output_size"] = 256

    if "MHDPA" in agent_config["architecture"]:
      agent_config["mhdpa_nbr_head"] = 4
      agent_config["mhdpa_nbr_rec_update"] = 1
      agent_config["mhdpa_nbr_mlp_unit"] = 256
      agent_config["mhdpa_interaction_dim"] = 128

    agent_config["temporal_encoder_nbr_hidden_units"] = 0
    agent_config["temporal_encoder_nbr_rnn_layers"] = 0
    agent_config["temporal_encoder_mini_batch_size"] = args.mini_batch_size
    agent_config["symbol_processing_nbr_hidden_units"] = agent_config["temporal_encoder_nbr_hidden_units"]
    agent_config["symbol_processing_nbr_rnn_layers"] = 1

    ## Decoder:
    ### CNN:
    if "BN" in agent_config["decoder_architecture"]:
      agent_config["cnn_decoder_channels"] = ["BN64","BN64","BN32","BN32"]
    else:
      agent_config["cnn_decoder_channels"] = [64,64,32,32]
    
    if "3x3" in agent_config["decoder_architecture"]:
      agent_config["cnn_decoder_kernels"] = [3,3,3,3]
    elif "3x4x4x7" in agent_config["decoder_architecture"]:
      agent_config["cnn_decoder_kernels"] = [3,4,4,7]
    else:
      agent_config["cnn_decoder_kernels"] = [4,4,4,4]
    agent_config["cnn_decoder_strides"] = [2,2,2,2]
    agent_config["cnn_decoder_paddings"] = [1,1,1,1]
    
    ### MLP:
    if "BN" in agent_config["decoder_architecture"]:
      agent_config['mlp_decoder_fc_hidden_units'] = ["BN256", "BN256"]
    else:
      agent_config['mlp_decoder_fc_hidden_units'] = [256, 256]
    agent_config['mlp_decoder_fc_hidden_units'].append(40*6)
                                                   
  else:
    raise NotImplementedError


  save_path = "./"
  if args.parent_folder != '':
    save_path += args.parent_folder+'/'
  save_path += f"{args.dataset}+DualLabeled/"
  if args.symbolic:
    save_path += f"Symbolic/"
  save_path += f"{nbr_epoch}Ep_Emb{rg_config['symbol_embedding_size']}_CNN{cnn_feature_size}to{args.vae_nbr_latent_dim}"
  if args.shared_architecture:
    save_path += "/shared_architecture"
  save_path += f"Dropout{rg_config['dropout_prob']}_DPEmb{rg_config['embedding_dropout_prob']}"
  save_path += f"_BN_{rg_config['agent_learning']}/"
  save_path += f"{rg_config['agent_loss_type']}"
  
  if 'dSprites' in args.dataset: 
    train_test_strategy = f"-{test_split_strategy}"
    if test_split_strategy != train_split_strategy:
      train_test_strategy = f"/train_{train_split_strategy}/test_{test_split_strategy}"
    save_path += f"/dSprites{train_test_strategy}"
  
  save_path += f"/OBS{rg_config['stimulus_resize_dim']}X{rg_config['stimulus_depth_dim']}C-Rep{rg_config['nbr_experience_repetition']}"
  
  if rg_config['use_curriculum_nbr_distractors']:
    save_path += f"+W{rg_config['curriculum_distractors_window_size']}Curr"
  if rg_config['with_utterance_penalization']:
    save_path += "+Tau-10-OOV{}PenProb{}".format(rg_config['utterance_factor'], rg_config['utterance_oov_prob'])  
  if rg_config['with_utterance_promotion']:
    save_path += "+Tau-10-OOV{}ProProb{}".format(rg_config['utterance_factor'], rg_config['utterance_oov_prob'])  
  
  if rg_config['with_gradient_clip']:
    save_path += '+ClipGrad{}'.format(rg_config['gradient_clip'])
  
  if rg_config['with_speaker_entropy_regularization']:
    save_path += 'SPEntrReg{}'.format(rg_config['entropy_regularization_factor'])
  if rg_config['with_listener_entropy_regularization']:
    save_path += 'LSEntrReg{}'.format(rg_config['entropy_regularization_factor'])
  
  if rg_config['iterated_learning_scheme']:
    save_path += f"-ILM{rg_config['iterated_learning_period']}{'+RehearseMDL{}'.format(rg_config['iterated_learning_rehearse_MDL_factor']) if rg_config['iterated_learning_rehearse_MDL'] else ''}"
  
  if rg_config['with_mdl_principle']:
    save_path += '-MDL{}'.format(rg_config['mdl_principle_factor'])
  
  if rg_config['cultural_pressure_it_period'] != 'None':  
    save_path += '-S{}L{}-{}-Reset{}'.\
      format(rg_config['cultural_speaker_substrate_size'], 
      rg_config['cultural_listener_substrate_size'],
      rg_config['cultural_pressure_it_period'],
      rg_config['cultural_reset_strategy']+str(rg_config['cultural_reset_meta_learning_rate']) if 'meta' in rg_config['cultural_reset_strategy'] else rg_config['cultural_reset_strategy'])
  
  save_path += '-{}{}CulturalAgent-SEED{}-{}-obs_b{}_minib{}_lr{}-{}-tau0-{}-{}DistrTrain{}Test{}-stim{}-vocab{}over{}_{}{}'.\
    format(
    'ObjectCentric' if rg_config['object_centric'] else '',
    'Descriptive{}'.format(rg_config['descriptive_target_ratio']) if rg_config['descriptive'] else '',
    seed,
    rg_config['observability'], 
    rg_config['batch_size'], 
    args.mini_batch_size,
    rg_config['learning_rate'],
    rg_config['graphtype'], 
    rg_config['tau0'], 
    rg_config['distractor_sampling'],
    *rg_config['nbr_distractors'].values(), 
    rg_config['nbr_stimulus'], 
    rg_config['vocab_size'], 
    rg_config['max_sentence_length'], 
    rg_config['agent_architecture'],
    f"/{'Detached' if args.vae_detached_featout else ''}beta{vae_beta}-factor{factor_vae_gamma}" if 'BetaVAE' in rg_config['agent_architecture'] else ''
  )

  if 'MONet' in rg_config['agent_architecture'] or 'BetaVAE' in rg_config['agent_architecture']:
    save_path += f"beta{vae_beta}-factor{factor_vae_gamma}-gamma{monet_gamma}-sigma{vae_observation_sigma}" if 'MONet' in rg_config['agent_architecture'] else ''
    save_path += f"CEMC{maxCap}over{nbrepochtillmaxcap}" if vae_constrainedEncoding else ''
    save_path += f"UnsupSeg{rg_config['unsupervised_segmentation_factor']}" if rg_config['unsupervised_segmentation_factor'] is not None else ''
    save_path += f"LossVAECoeff{args.vae_lambda}_{'UseMu' if args.vae_use_mu_value else ''}"

  if rg_config['use_feat_converter']:
    save_path += f"+FEATCONV"
  
  if rg_config['use_homoscedastic_multitasks_loss']:
    save_path += '+H**o'
  
  save_path += f"/{args.optimizer_type}/"

  if 'reinforce' in args.graphtype:
    save_path += f'/REINFORCE_EntropyCoeffNeg1m3/UnnormalizedDetLearningSignalHavrylovLoss/NegPG/'

  if 'obverter' in args.graphtype:
    save_path += f"Obverter{args.obverter_threshold_to_stop_message_generation}-{args.obverter_nbr_games_per_round}GPR/DEBUG/"
  else:
    save_path += f"STGS-{args.agent_type}-{args.rnn_type}-CNN-Agent/"

  save_path += f"Periodic{args.metric_epoch_period}TS+DISComp-{'fast-' if args.metric_fast else ''}/"#TestArchTanh/"
  
  save_path += f'DatasetRepTrain{args.nbr_train_dataset_repetition}Test{args.nbr_test_dataset_repetition}'
  
  rg_config['save_path'] = save_path
  
  print(save_path)

  from ReferentialGym.utils import statsLogger
  logger = statsLogger(path=save_path,dumpPeriod=100)
  
  # # Agents
  batch_size = 4
  nbr_distractors = 1 if "partial" in rg_config["observability"] else agent_config["nbr_distractors"]["train"]
  nbr_stimulus = agent_config["nbr_stimulus"]
  obs_shape = [nbr_distractors+1,nbr_stimulus, rg_config["stimulus_depth_dim"],rg_config["stimulus_resize_dim"],rg_config["stimulus_resize_dim"]]
  vocab_size = rg_config["vocab_size"]
  max_sentence_length = rg_config["max_sentence_length"]

  if "obverter" in args.graphtype:
    from ReferentialGym.agents import DifferentiableObverterAgent
    speaker = DifferentiableObverterAgent(
      kwargs=agent_config, 
      obs_shape=obs_shape, 
      vocab_size=vocab_size, 
      max_sentence_length=max_sentence_length,
      agent_id="s0",
      logger=logger,
      use_sentences_one_hot_vectors=args.use_sentences_one_hot_vectors,
      differentiable=args.differentiable
    )
  else:
    if "Baseline" in args.agent_type:
      if 'lstm' in args.rnn_type.lower():
        from ReferentialGym.agents import LSTMCNNSpeaker
        speaker = LSTMCNNSpeaker(
          kwargs=agent_config, 
          obs_shape=obs_shape, 
          vocab_size=vocab_size, 
          max_sentence_length=max_sentence_length,
          agent_id="s0",
          logger=logger
        )
      elif 'gru' in args.rnn_type.lower():
        from ReferentialGym.agents import GRUCNNSpeaker
        speaker = GRUCNNSpeaker(
          kwargs=agent_config, 
          obs_shape=obs_shape, 
          vocab_size=vocab_size, 
          max_sentence_length=max_sentence_length,
          agent_id="s0",
          logger=logger
        )
      else:
        raise NotImplementedError
    elif "EoSPriored" in args.agent_type:
      from ReferentialGym.agents import EoSPrioredLSTMCNNSpeaker
      speaker = EoSPrioredLSTMCNNSpeaker(
        kwargs=agent_config, 
        obs_shape=obs_shape, 
        vocab_size=vocab_size, 
        max_sentence_length=max_sentence_length,
        agent_id="s0",
        logger=logger
      )
  print("Speaker:", speaker)

  listener_config = copy.deepcopy(agent_config)
  if args.shared_architecture:
    listener_config["cnn_encoder"] = speaker.cnn_encoder 
  listener_config["nbr_distractors"] = rg_config["nbr_distractors"]["train"]
  batch_size = 4
  nbr_distractors = listener_config["nbr_distractors"]
  nbr_stimulus = listener_config["nbr_stimulus"]
  obs_shape = [nbr_distractors+1,nbr_stimulus, rg_config["stimulus_depth_dim"],rg_config["stimulus_resize_dim"],rg_config["stimulus_resize_dim"]]
  vocab_size = rg_config["vocab_size"]
  max_sentence_length = rg_config["max_sentence_length"]

  if "obverter" in args.graphtype:
    raise NotImplementedError
  else:
    if 'lstm' in args.rnn_type.lower():
      from ReferentialGym.agents import LSTMCNNListener
      listener = LSTMCNNListener(
        kwargs=listener_config, 
        obs_shape=obs_shape, 
        vocab_size=vocab_size, 
        max_sentence_length=max_sentence_length,
        agent_id="l0",
        logger=logger
      )
    elif 'gru' in args.rnn_type.lower():
      from ReferentialGym.agents import GRUCNNListener
      listener = GRUCNNListener(
        kwargs=listener_config, 
        obs_shape=obs_shape, 
        vocab_size=vocab_size, 
        max_sentence_length=max_sentence_length,
        agent_id="l0",
        logger=logger
      )
    else:
      raise NotImplementedError

  if args.symbolic:
    assert args.agent_loss_type.lower() == 'ce'
    listener.input_stream_ids["listener"]["target_output"] = "current_dataloader:sample:speaker_exp_latents"
        
  print("Listener:", listener)

  # # Dataset:
  need_dict_wrapping = {}

  if "dSprites" in args.dataset:
    root = "./datasets/dsprites-dataset"
    train_dataset = ReferentialGym.datasets.dSpritesDataset(root=root, train=True, transform=rg_config["train_transform"], split_strategy=train_split_strategy)
    test_dataset = ReferentialGym.datasets.dSpritesDataset(root=root, train=False, transform=rg_config["test_transform"], split_strategy=test_split_strategy)
  else:
    raise NotImplementedError
  
  
  ## Modules:
  modules = {}

  from ReferentialGym import modules as rg_modules

  # Population:
  population_handler_id = "population_handler_0"
  population_handler_config = rg_config
  population_handler_stream_ids = {
    "current_speaker_streams_dict":"modules:current_speaker",
    "current_listener_streams_dict":"modules:current_listener",
    "epoch":"signals:epoch",
    "mode":"signals:mode",
    "global_it_datasample":"signals:global_it_datasample",
  }

  # Current Speaker:
  current_speaker_id = "current_speaker"

  # Current Listener:
  current_listener_id = "current_listener"

  modules[population_handler_id] = rg_modules.build_PopulationHandlerModule(
      id=population_handler_id,
      prototype_speaker=speaker,
      prototype_listener=listener,
      config=population_handler_config,
      input_stream_ids=population_handler_stream_ids)

  modules[current_speaker_id] = rg_modules.CurrentAgentModule(id=current_speaker_id,role="speaker")
  modules[current_listener_id] = rg_modules.CurrentAgentModule(id=current_listener_id,role="listener")
  
  homo_id = "homo0"
  homo_config = {"use_cuda":args.use_cuda}
  if args.homoscedastic_multitasks_loss:
    modules[homo_id] = rg_modules.build_HomoscedasticMultiTasksLossModule(
      id=homo_id,
      config=homo_config,
    )
  
  ## Pipelines:
  pipelines = {}

  # 0) Now that all the modules are known, let us build the optimization module:
  optim_id = "global_optim"
  optim_config = {
    "modules":modules,
    "learning_rate":args.lr,
    "optimizer_type":args.optimizer_type,
    "with_gradient_clip":rg_config["with_gradient_clip"],
    "adam_eps":rg_config["adam_eps"],
  }

  optim_module = rg_modules.build_OptimizationModule(
    id=optim_id,
    config=optim_config,
  )
  modules[optim_id] = optim_module

  grad_recorder_id = "grad_recorder"
  grad_recorder_module = rg_modules.build_GradRecorderModule(id=grad_recorder_id)
  modules[grad_recorder_id] = grad_recorder_module

  topo_sim_metric_id = "topo_sim_metric"
  topo_sim_metric_module = rg_modules.build_TopographicSimilarityMetricModule(id=topo_sim_metric_id,
    config = {
      "parallel_TS_computation_max_workers":16,
      "epoch_period":args.metric_epoch_period,
      "fast":args.metric_fast,
      "verbose":False,
      "vocab_size":rg_config["vocab_size"],
    }
  )
  modules[topo_sim_metric_id] = topo_sim_metric_module

  inst_coord_metric_id = "inst_coord_metric"
  inst_coord_metric_module = rg_modules.build_InstantaneousCoordinationMetricModule(id=inst_coord_metric_id,
    config = {
      "epoch_period":1,
    }
  )
  modules[inst_coord_metric_id] = inst_coord_metric_module
  
  dsprites_latent_metric_id = "dsprites_latent_metric"
  dsprites_latent_metric_module = rg_modules.build_dSpritesPerLatentAccuracyMetricModule(id=dsprites_latent_metric_id,
    config = {
      "epoch_period":1,
    }
  )
  modules[dsprites_latent_metric_id] = dsprites_latent_metric_module
  
  speaker_factor_vae_disentanglement_metric_id = "speaker_factor_vae_disentanglement_metric"
  speaker_factor_vae_disentanglement_metric_input_stream_ids = {
    "model":"modules:current_speaker:ref:ref_agent:cnn_encoder",
    "representations":"modules:current_speaker:ref:ref_agent:features",
    "experiences":"current_dataloader:sample:speaker_experiences", 
    "latent_representations":"current_dataloader:sample:speaker_exp_latents", 
    "latent_values_representations":"current_dataloader:sample:speaker_exp_latents_values",
    "indices":"current_dataloader:sample:speaker_indices", 
  }
  speaker_factor_vae_disentanglement_metric_module = rg_modules.build_FactorVAEDisentanglementMetricModule(
    id=speaker_factor_vae_disentanglement_metric_id,
    input_stream_ids=speaker_factor_vae_disentanglement_metric_input_stream_ids,
    config = {
      "epoch_period":args.metric_epoch_period,
      "batch_size":64,#5,
      "nbr_train_points":10000,#3000,
      "nbr_eval_points":5000,#2000,
      "resample":False,
      "threshold":5e-2,#0.0,#1.0,
      "random_state_seed":args.seed,
      "verbose":False,
      "active_factors_only":True,
    }
  )
  modules[speaker_factor_vae_disentanglement_metric_id] = speaker_factor_vae_disentanglement_metric_module

  listener_factor_vae_disentanglement_metric_id = "listener_factor_vae_disentanglement_metric"
  listener_factor_vae_disentanglement_metric_input_stream_ids = {
    "model":"modules:current_listener:ref:ref_agent:cnn_encoder",
    "representations":"modules:current_listener:ref:ref_agent:rnn_outputs",
    "experiences":"current_dataloader:sample:speaker_experiences", 
    "latent_representations":"current_dataloader:sample:speaker_exp_latents", 
    "latent_values_representations":"current_dataloader:sample:speaker_exp_latents_values",
    "indices":"current_dataloader:sample:speaker_indices", 
  }
  listener_factor_vae_disentanglement_metric_module = rg_modules.build_FactorVAEDisentanglementMetricModule(
    id=listener_factor_vae_disentanglement_metric_id,
    input_stream_ids=listener_factor_vae_disentanglement_metric_input_stream_ids,
    config = {
      "epoch_period":args.metric_epoch_period,
      "batch_size":64,#5,
      "nbr_train_points":10000,#3000,
      "nbr_eval_points":5000,#2000,
      "resample":False,
      "threshold":5e-2,#0.0,#1.0,
      "random_state_seed":args.seed,
      "verbose":False,
      "active_factors_only":True,
    }
  )
  modules[listener_factor_vae_disentanglement_metric_id] = listener_factor_vae_disentanglement_metric_module

  logger_id = "per_epoch_logger"
  logger_module = rg_modules.build_PerEpochLoggerModule(id=logger_id)
  modules[logger_id] = logger_module

  pipelines["referential_game"] = [
    population_handler_id,
    current_speaker_id,
    current_listener_id
  ]

  pipelines[optim_id] = []
  if args.homoscedastic_multitasks_loss:
    pipelines[optim_id].append(homo_id)
  pipelines[optim_id].append(optim_id)
  """
  # Add gradient recorder module for debugging purposes:
  pipelines[optim_id].append(grad_recorder_id)
  """
  pipelines[optim_id].append(speaker_factor_vae_disentanglement_metric_id)
  pipelines[optim_id].append(listener_factor_vae_disentanglement_metric_id)
  pipelines[optim_id].append(topo_sim_metric_id)
  pipelines[optim_id].append(inst_coord_metric_id)
  pipelines[optim_id].append(dsprites_latent_metric_id)
  pipelines[optim_id].append(logger_id)

  rg_config["modules"] = modules
  rg_config["pipelines"] = pipelines


  dataset_args = {
      "dataset_class":            "DualLabeledDataset",
      "modes": {"train": train_dataset,
                "test": test_dataset,
                },
      "need_dict_wrapping":       need_dict_wrapping,
      "nbr_stimulus":             rg_config["nbr_stimulus"],
      "distractor_sampling":      rg_config["distractor_sampling"],
      "nbr_distractors":          rg_config["nbr_distractors"],
      "observability":            rg_config["observability"],
      "object_centric":           rg_config["object_centric"],
      "descriptive":              rg_config["descriptive"],
      "descriptive_target_ratio": rg_config["descriptive_target_ratio"],
  }

  refgame = ReferentialGym.make(config=rg_config, dataset_args=dataset_args)

  # In[22]:

  refgame.train(nbr_epoch=nbr_epoch,
                logger=logger,
                verbose_period=1)

  logger.flush()
예제 #23
0
parser.add_argument('--GPU_ids', default=0)

if __name__ == '__main__':
    # Dataset
    img_size = 224
    resize_img = 300
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform_train = transforms.Compose([
        transforms.Resize(resize_img),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(0.02, 0.02, 0.02, 0.01),
        transforms.RandomRotation([-180, 180]),
        transforms.RandomAffine([-180, 180],
                                translate=[0.1, 0.1],
                                scale=[0.7, 1.3]),
        transforms.RandomCrop(img_size),
        transforms.ToTensor(), normalize
    ])
    print('==> Preparing data..')
    trainset = dataloader(train=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=1,
                                              num_workers=50,
                                              shuffle=True)

    model = args.model
    # Use args.model as pretrain model
    if model == 'resnet152':
        net = resnet.resnet152().to(device)
예제 #24
0
    net_D = Discriminator().to(device)

    optim_G = optim.RMSprop(net_G.parameters(), lr=args.lr)
    optim_D = optim.RMSprop(net_D.parameters(), lr=args.lr)

    train_writer = SummaryWriter(os.path.join(log_dir, 'train'))
    valid_writer = SummaryWriter(os.path.join(log_dir, 'valid'))

    os.makedirs(os.path.join(log_dir, 'sample'), exist_ok=True)
    sample_z = torch.randn(args.sample_size, args.z_dim).to(device)

    valid_dataset = GenerativeDataset(net_G, args.z_dim, 10000, device)
    looper = loop(dataloader)
    consistency_transforms = transforms.Compose([
        transforms.ToPILImage(mode='RGB'),
        transforms.RandomAffine(0, translate=(0.1, 0.1)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    def consistency_transform_func(images):
        images = deepcopy(images)
        for idx, img in enumerate(images):
            images[idx] = consistency_transforms(img)
        return images

    cs_lambda = args.consistency

    with trange(args.iterations, dynamic_ncols=True) as pbar:
        for step in pbar:
예제 #25
0
def display_images(img_list, row, col):
    if (len(img_list) > 0):
        images = {}
        n = 0
        for img in img_list:
            n += 1
            images[str(n)] = img
        plot_images(images, row, col, cmap='gray')


train_data = torchvision.datasets.ImageFolder(
    root='custom_dataset/',
    transform=transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.RandomApply(
            [transforms.RandomAffine(degrees=(-30, 30), shear=(-30, 30))],
            p=1.0),
        transforms.ToTensor()
    ]))
print(f'dataset size: {len(train_data)}')

NUM_IMAGES = 36

groundtruth = [
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E',
    'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
    'U', 'V', 'W', 'X', 'Y', 'Z'
]

data_loader = torch.utils.data.DataLoader(train_data,
                                          batch_size=NUM_IMAGES,
예제 #26
0
    def __init__(self,
                 dataset_dir,
                 images_path,
                 list=[],
                 numpatches=900,
                 numneg=3,
                 pos_thr=50.0,
                 reject=True,
                 mode='train',
                 rejection_radius=3000,
                 dist_type='3D',
                 patch_radius=None,
                 use_depth=False,
                 use_normals=False,
                 use_silhouettes=False,
                 color_jitter=False,
                 greyscale=False,
                 maxres=4096,
                 scale_jitter=False,
                 photo_jitter=False,
                 uniform_negatives=False,
                 needles=0,
                 render_only=False):
        """Loads the patches dataset.
           @param dataset_dir String directory where the dataset of sampled
           points is located
           @param images_path path to the images to sample patches from
           @param list List of subdirectory names to be loaded with this
           loader. Use this to specify train/test/val splits.
           @param numneg Int number of generated negatives per positive pair.
           @param pos_thr Float threshold in meters used to define negatives.
           If the distance of two 3D points exceeds this threshold, the
           correspondence is considered negative. The lower the threshold, the
           harder the negatives are.
           @param reject [bool] True turns on rejetion sampling - for each
           patch we calculate density of 3D reprojected point cloud within 1km
           radius. Then the probability of rejection is calculated as
           num_points_1km_radius/max_num_points, where max_num_points is
           maximum taken across all queried samples until the current one.
           @param mode options: train|eval, default: train. If train is used,
           then the additional metadata per patch (which are used for some
           plots during validation are not generated and therefore the training
           shall be faster.
           @type string
           @param dist_type type of the distance used to generate positives and
           negatives. Can be `2D` or `3D`. Default: 3D.
           @type int
           @param patch_radius when set to None, the patch radius will be
           loaded from the patches dataset. Otherwise the defined patch radius
           will be used. Please note that if you use larger patch_radius than
           the one defined within the patches dataset, the source image will be
           padded automatically and so the patch may contain black edges.
           @param needles If number greater than zero is used, then instead of
           a single patch a whole needle of patches will be extracted. Our
           network then takes several patches in a form of a needle encoded to
           channels of the input. This approach is described here:
           Lotan and Irani: Needle-Match: Reliable Patch Matching under
           High Uncertainty, CVPR 2016.
        """
        self.item_idx = -1
        self.dataset_dir = dataset_dir
        self.images_path = images_path
        self.numneg = numneg
        self.pos_thr = pos_thr
        self.loaded_imgs_pts = []
        self.all_coords3d = []
        self.max_num_points = 0
        self.reject = reject
        self.query_radius = rejection_radius
        self.dist_type = dist_type
        self.use_depth = use_depth
        self.use_normals = use_normals
        self.use_silhouettes = use_silhouettes
        self.color_jitter = color_jitter
        self.greyscale = greyscale
        self.left_maxres = maxres
        self.right_maxres = maxres
        self.scale_jitter = scale_jitter
        self.photo_jitter = photo_jitter
        self.uniform_negatives = uniform_negatives
        self.needles = needles
        self.render_only = render_only

        scene_info_file = os.path.join(os.path.dirname(images_path),
                                       "scene_info.txt")
        self.scene_center = MultimodalPatchesDataset.getSceneCenter(
            scene_info_file)

        self.numch_1 = 3
        self.numch_2 = 3
        if self.greyscale:
            self.numch_1 = 1
            self.numch_2 = 1

        if self.use_depth:
            self.numch_2 += 1
        if self.use_normals:
            self.numch_2 += 3
        if self.use_silhouettes:
            self.numch_2 += 1

        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.ColorJitter(0.5, 0.5, 1.0, 0.5),
            transforms.ToTensor()
        ])

        print("Rejection radius: ", self.query_radius, "mode", mode)
        self.mode = mode
        if len(list) == 0:
            self.dataset_items = [
                d for d in os.listdir(self.dataset_dir)
                if os.path.isdir(os.path.join(self.dataset_dir, d))
            ]
        else:
            self.dataset_items = []
            if self.mode == 'eval':
                # choose only pairs where left view does not repeat
                print("Choosing non-repeating photographs for validation...")
                keyset = set()
                for item in tqdm(list):
                    item_path = os.path.join(self.dataset_dir, item)
                    info_path = os.path.join(item_path, "info.npy")
                    info = np.load(info_path,
                                   encoding='latin1',
                                   allow_pickle=True).flatten()[0]
                    img1_base = os.path.basename(info['img1_name'])
                    key = os.path.splitext(img1_base)[0]
                    if key in keyset:
                        continue
                    keyset.add(key)
                    self.dataset_items.append(item)
            else:
                self.dataset_items = list

        if (len(self.dataset_items) > 0):
            item_path = os.path.join(self.dataset_dir, self.dataset_items[0])
            info_path = os.path.join(item_path, "info.npy")
            self.info = np.load(info_path,
                                encoding='latin1',
                                allow_pickle=True).flatten()[0]
            self.numpatches = self.info['coords2d_1'].shape[0]
            if patch_radius is not None:
                self.patch_radius = patch_radius
            else:
                self.patch_radius = self.info['patch_radius']
            if numpatches != self.numpatches:
                raise RuntimeError("Wrong number of patches in the first \
                        item of the dataset. Expected: " + str(numpatches) +
                                   ", obtained: " + str(self.numpatches))
            self.load3DPoints()
            self.kdt = KDTree(self.all_coords3d[:, :3],
                              leaf_size=40,
                              metric='euclidean')

            translation_frac = np.sqrt(5) / (self.patch_radius * 2
                                             )  # at most 5px
            self.photo_transform = transforms.Compose([
                transforms.ToPILImage(),
                transforms.ColorJitter(0.2, (0.9, 1.001), 0.2, 0.2),
                transforms.RandomAffine(22.5,
                                        (translation_frac, translation_frac),
                                        shear=5),
                transforms.CenterCrop(self.patch_radius * 2),
                transforms.ToTensor()
            ])

            if self.photo_jitter:
                self.prcoef = 1.25
            else:
                self.prcoef = 1

            # FIXME: remove since this is unneeded for training and is slow. Just for research.
            #self.saveDensityPointcloud()

        else:
            raise RuntimeError("No dataset items at specified location.")
예제 #27
0
파일: train_.py 프로젝트: AUGGEI/RL-Mnist
def rotation(degree):
    return transforms.RandomAffine(degrees=degree)
예제 #28
0
 def __init__(self):
     # TODO: try RandomPerspective and Normalize
     self.affine = transforms.RandomAffine(degrees=45, scale=(0.8, 1.6))
     self.flip = transforms.RandomHorizontalFlip(0.5)
     self.perspective = transforms.RandomPerspective()
예제 #29
0
    accuracy = 100. * correct.to(torch.float32) / len(loader.dataset)

    print('Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        loss, correct, len(loader.dataset), accuracy))

    if log is not None and epoch is not None:
        log.add_scalar('val_loss', loss, epoch-1)
        log.add_scalar('val_acc', accuracy, epoch-1)


input_image_size = (150, 150)

data_transform = transforms.Compose([
        transforms.Resize(input_image_size),
        transforms.RandomAffine(degrees=0, translate=None,
                                scale=(0.8, 1.2), shear=0.2),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
    ])

noop_transform = transforms.Compose([
        transforms.Resize(input_image_size),
        transforms.ToTensor()
    ])


def get_train_loader(batch_size=25):
    print('Train: ', end="")
    train_dataset = datasets.ImageFolder(root=datapath+'/train',
                                         transform=data_transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size,
예제 #30
0
                               track_git=False)

    if torch.cuda.is_available() and not args.use_gpu:
        logger.info(
            'You have a GPU device so you should probably run with --use_gpu')
        device = torch.device('cpu')
    elif torch.cuda.is_available() and args.use_gpu:
        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    else:
        device = torch.device('cpu')

    logger.info('Running with device %s', device)
    logger.info('Creates datasets')

    train_transform = transforms.Compose([
        transforms.RandomAffine(0, translate=(0, 0.1), scale=(1, 1.10)),
        transforms.RandomRotation((-20, 20)),
        transforms.ToTensor(),
    ])

    transform = torchvision.transforms.Compose(
        [torchvision.transforms.ToTensor()])

    train_dataset = XrayImageFolder(os.path.join(args.root_dir, 'train'),
                                    transform=train_transform)
    val_dataset = XrayImageFolder(os.path.join(args.root_dir, 'val'),
                                  transform=transform)
    test_dataset = XrayImageFolder(os.path.join(args.root_dir, 'test'),
                                   transform=transform)

    train_dataloader = DataLoader(train_dataset,