#train_imgs.mean(), train_imgs.std() # In[17]: #group_kfold = GroupShuffleSplit(n_splits=5, random_state = 4321) group_kfold = GroupKFold(n_splits=5) # In[18]: data_transforms = { 'train': transforms.Compose([ transforms.Resize(224), #transforms.Grayscale(3), transforms.RandomAffine(degrees=45, scale=(0.9, 1.1)), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Resize(224), #transforms.Grayscale(3), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } # In[19]:
def __init__(self, which_set='Cifar-10', root=None, train=True, download=True, return_idxs=False, num_classes=10, aug=('random_order', 'random_h_flip', 'random_crop', 'random_rot_10', 'random_scale_0.9_1.1', 'random_shear_5', 'cutout'), cut_n_holes=1, cut_length=16, dataset_norm_type='standardize' ): image_length = 28 if 'MNIST' in which_set else 32 self.norm_means, self.norm_stds = (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) if dataset_norm_type == 'zeroone': self.norm_means, self.norm_stds = (0.5, 0.5, 0.5), (0.5, 0.5, 0.5) if 'MNIST' in which_set: self.norm_means, self.norm_stds = (0.1307,), (0.3081,) if dataset_norm_type == 'zeroone': self.norm_means, self.norm_stds = (0.5,), (0.5,) normalizer = transforms.Normalize(self.norm_means, self.norm_stds) transforms_list = [] for augment in aug: # First do the things that don't change where the image is in the box if augment == 'random_h_flip': transforms_list.append(transforms.RandomHorizontalFlip()) if augment == 'random_v_flip': transforms_list.append(transforms.RandomVerticalFlip()) # Then mess with brightness etc. if augment == 'color_jitter': transforms_list.append(transforms.ColorJitter(brightness=0.3, contrast=0.3, hue=0.0)) # Now do some sheering/cropping/rotation that changes where the images is if augment == 'affine': rot_degrees = 0 scale_low = None scale_high = None shear_degrees = None for augment_inner in aug: if 'random_rot' in augment_inner: rot_degrees = int(augment_inner.split('_')[-1]) if 'random_scale' in augment_inner: scale_low = float(augment_inner.split('_')[-2]) scale_high = float(augment_inner.split('_')[-1]) if 'random_shear' in augment_inner: shear_degrees = int(augment_inner.split('_')[-1]) transforms_list.append(transforms.RandomAffine(degrees=rot_degrees, scale=None if (scale_low is None) or (scale_high is None) else (scale_low, scale_high), shear=shear_degrees)) if augment == 'random_crop': transforms_list.append(transforms.RandomCrop(size=[image_length, image_length], padding=4)) transform = transforms.Compose(transforms_list) if 'random_order' not in aug else \ transforms.Compose([transforms. RandomOrder(transforms=transforms_list)]) transform.transforms.append(transforms.ToTensor()) transform.transforms.append(normalizer) for augment in aug: # Finally do things that are related to regularisation if augment == 'cutout': transform.transforms.append(Cutout(n_holes=cut_n_holes, length=cut_length)) if which_set == 'MNIST': self.dataset = datasets.MNIST(root='../data/MNIST' if root is None else root, train=train, download=download, transform=transform) elif which_set == 'Fashion-MNIST': self.dataset = datasets.FashionMNIST(root='../data/Fashion-MNIST' if root is None else root, train=train, download=download, transform=transform) elif which_set == 'Cifar-100': self.dataset = datasets.CIFAR100(root='../data/Cifar-100' if root is None else root, train=train, download=download, transform=transform) elif 'Cinic-10' in which_set: root_to_cinic = '../data/Cinic-10' if root is None else root if download: download_cinic(root_to_cinic.replace('-enlarged','')) if '-enlarged' in which_set: enlarge_cinic_10(root_to_cinic.replace('-enlarged','')) self.dataset = datasets.ImageFolder(root=('../data/Cinic-10' if root is None else root) + ('/train' if train else '/test'), transform=transform) else: self.dataset = datasets.CIFAR10(root='../data/Cifar-10' if root is None else root, train=train, download=download, transform=transform) self.return_idxs = return_idxs self.num_classes = num_classes self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
normalize = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) augcolor = [ transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5) ] augaffine = [ transforms.RandomAffine(20, scale=(0.9, 1.1), shear=20, resample=PIL.Image.BICUBIC, fillcolor=(100, 100, 100)) ] augtrans = transforms.Compose([ transforms.RandomApply(augcolor, p=0.8), transforms.RandomApply(augaffine, p=0.8), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) bulk = ResNet_Bulk() head = ResNet_Head() curltrainer = curl.CURL(datasets.STL10,
def get_datasets(data_dir, cfg, mode="train"): common_transforms = [] train_transforms = [] test_transforms = [] #if cfg.transform.transform_resize_match: common_transforms.append(transforms.Resize((cfg.transform.transform_resize,cfg.transform.transform_resize))) if cfg.transform.transform_random_resized_crop: train_transforms.append(transforms.RandomResizedCrop(cfg.transform.transform_resize)) if cfg.transform.transform_random_horizontal_flip: train_transforms.append(torchvision.transforms.RandomHorizontalFlip(p=0.5)) if cfg.transform.transform_random_rotation: train_transforms.append(transforms.RandomRotation(cfg.transform.transform_random_rotation_degrees))#, fill=255)) if cfg.transform.transform_random_shear: train_transforms.append(torchvision.transforms.RandomAffine(0, shear=( cfg.transform.transform_random_shear_x1, cfg.transform.transform_random_shear_x2, cfg.transform.transform_random_shear_y1, cfg.transform.transform_random_shear_y2 ), fillcolor=255)) if cfg.transform.transform_random_perspective: train_transforms.append(transforms.RandomPerspective(distortion_scale=cfg.transform.transform_perspective_scale, p=0.5, interpolation=3) ) if cfg.transform.transform_random_affine: train_transforms.append(transforms.RandomAffine(degrees=(cfg.transform.transform_degrees_min, cfg.transform.transform_degrees_max), translate=(cfg.transform.transform_translate_a, cfg.transform.transform_translate_b), fillcolor=255)) data_transforms = { 'train': transforms.Compose(common_transforms+train_transforms+[transforms.ToTensor()]), 'test': transforms.Compose(common_transforms+[transforms.ToTensor()]), } train_dataset = datasets.ImageFolder(os.path.join(data_dir, "train"), data_transforms["train"]) # for the final model we can join train, validation, validation samples datasets print(mode) if mode == "final_train": #train_dataset = torch.utils.data.ConcatDataset([train_dataset, # val_dataset, # val_samples_dataset]) test_dataset = datasets.ImageFolder(os.path.join(data_dir, "test"), data_transforms["test"]) samples_dataset = datasets.ImageFolder(os.path.join(data_dir, "samples"), data_transforms["test"]) return train_dataset, test_dataset, samples_dataset else: if mode == "train": val_dataset = datasets.ImageFolder(os.path.join(data_dir, "val"), data_transforms["test"]) val_samples_dataset = datasets.ImageFolder(os.path.join(data_dir, "val_samples"), data_transforms["test"]) return train_dataset, val_dataset, val_samples_dataset if mode == "test": return train_dataset, test_dataset, samples_dataset
out_dir = '../result' data_dir = '../data' test_interval = 1 resume_interval = 1 unit_num = 1000 use_cuda = not no_cuda and torch.cuda.is_available() torch.manual_seed(seed) # device = torch.device('cuda:{}'.format(gpu_id) if use_cuda else 'cpu') device = torch.device('cuda:{}'.format(gpu_id)) kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} ## データの水増しと正規化 transform = transforms.Compose([ transforms.RandomRotation(20), transforms.RandomAffine(degrees=0, translate=(0.2, 0.2)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ]) print('start to load train dataset') trainset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, **kwargs)
print('Resize 256', file=open(filename, "a")) print('randomcrop 224', file=open(filename, "a")) print('batchsize - 50', file=open(filename, "a")) print('transforms.RandomHorizontalFlip()', file=open(filename, "a")) print('transforms.RandomRotation(10)', file=open(filename, "a")) print('transforms.RandomAffine(0,shear=10,scale=(0.8,1.6)),', file=open(filename, "a")) print('transforms.ColorJitter(brightness=0.2,contrast=0.2,saturation=0.2),', file=open(filename, "a")) transform_train = transforms.Compose([ transforms.Resize((256, 256)), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomRotation(10), transforms.RandomAffine(0, shear=10, scale=(0.8, 1.6)), transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) transform = transforms.Compose([ transforms.Resize((256, 256)), transforms.ToTensor(), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) training_dataset = datasets.ImageFolder(root=PATHTrain, transform=transform_train) validation_dataset = datasets.ImageFolder(root=PATHVal, transform=transform) training_loader = torch.utils.data.DataLoader(dataset=training_dataset,
import torchvision.transforms as transforms data_transforms = transforms.Compose([ transforms.Resize((96, 96)), transforms.ColorJitter(0.8, contrast=0.3), transforms.RandomAffine(10, scale=(0.8, 1.2), translate=(0.2, 0.2)), transforms.RandomHorizontalFlip(), #flip transform transforms.ToTensor(), transforms.Normalize((0.3337, 0.3064, 0.3171), (0.2672, 0.2564, 0.2629)) ]) validation_data_transforms = transforms.Compose([ transforms.Resize((96, 96)), transforms.ToTensor(), transforms.Normalize((0.3337, 0.3064, 0.3171), (0.2672, 0.2564, 0.2629)) ])
dc_num_capsules, dc_num_routes, dc_in_channels, dc_out_channels, dc_routing_iters, dec_caps_size, dec_num_caps, dec_img_size, dec_img_channels).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer,0.95) # loss and optimizer # nn.CrossEntropyLoss() computes softmax internally # criterion = nn.CrossEntropyLoss() # optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate) optimizer = torch.optim.Adam(model.parameters()) f = open('./checkpoint.txt','w') f.close() train_loader = torch.utils.data.DataLoader(datasets.MNIST(root='./MNIST',train=True,download=True,transform=transforms.Compose([ transforms.RandomAffine(degrees=0,translate=(0.075,0.075)), transforms.ToTensor()])),batch_size=batch_size,shuffle=True) test_loader = torch.utils.data.DataLoader(datasets.MNIST(root='./MNIST',train=False,download=True,transform=transforms.ToTensor()),batch_size=batch_size,shuffle=True) def test(model, test_loader, batch_size): test_loss = 0.0 correct = 0.0 for batch_idx, (data, labels) in enumerate(test_loader): data, labels = data.cuda(), one_hot(labels.cuda()) output, masked_output, recnstrcted = model(data) #loss = model.loss(outputs, recnstrcted, data, labels) #test_loss += loss.data masked_cpu, labels_cpu = masked_output.cpu(), labels.cpu() # print(masked_output.shape) # print("\n",sum(np.argmax(masked_cpu.data.numpy(), 1) == np.argmax(labels_cpu.data.numpy(), 1)).shape) correct += sum(np.argmax(masked_cpu.data.numpy(), 1) == np.argmax(labels_cpu.data.numpy(), 1))
def loadDataset(dataset, batch_size, train, transform=True, val=False): oargs = {} if dataset in ["MNIST", "CIFAR10", "CIFAR100", "FashionMNIST", "PhotoTour"]: oargs['train'] = train elif dataset in ["STL10", "SVHN"]: oargs['split'] = 'train' if train else 'test' elif dataset in ["LSUN"]: oargs['classes'] = 'train' if train else 'test' elif dataset in ["Imagenet12"]: pass elif dataset in ["AG"]: pass else: raise Exception(dataset + " is not yet supported") if dataset in ["MNIST"]: transformer = transforms.Compose([transforms.ToTensor()] + ([transforms.Normalize((0.1307,), (0.3081,))] if transform else [])) elif dataset in ["CIFAR10", "CIFAR100"]: transformer = transforms.Compose(([ # transforms.RandomCrop(32, padding=4), transforms.RandomAffine(0, (0.125, 0.125), resample=PIL.Image.BICUBIC), transforms.RandomHorizontalFlip(), # transforms.RandomRotation(15, resample = PIL.Image.BILINEAR) ] if train else []) + [transforms.ToTensor()] + ([transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))] if transform else [])) elif dataset in ["SVHN"]: transformer = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.2, 0.2, 0.2))]) else: transformer = transforms.ToTensor() if dataset in ["Imagenet12"]: # https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md#download-the-imagenet-dataset train_set = datasets.ImageFolder( '../data/Imagenet12/train' if train else '../data/Imagenet12/val', transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), normalize, ])) elif dataset in ["AG", "SST"]: X = np.load("./dataset/%s/X_%s.npy" % (dataset, 'train' if train else 'test')) y = np.load("./dataset/%s/y_%s.npy" % (dataset, 'train' if train else 'test')) if val: X = X[-1000:] y = y[-1000:] elif train: X = X[:-1000] y = y[:-1000] x = torch.from_numpy(X) train_set = torch.utils.data.TensorDataset(x, torch.from_numpy(y)) else: train_set = getattr(datasets, dataset)('../data', download=True, transform=transformer, **oargs) return torch.utils.data.DataLoader( train_set , batch_size=batch_size , shuffle=True, **({'num_workers': 1, 'pin_memory': True} if use_cuda else {}))
def __init__(self, svhn_path, curlfrac=0.5, supfrac=0.5, k=1, shuffle=True, augment=False, use_cuda=False, dload_dataset=False): self.k = k self.softplus = nn.Softplus() self.bulk = Net_Bulk() self.head = Net_Head() normalize = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) augcolor = [ transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5) ] augaffine = [ transforms.RandomAffine(20, scale=(0.9, 1.1), shear=20, resample=PIL.Image.BICUBIC, fillcolor=(100, 100, 100)) ] augtrans = transforms.Compose([ transforms.RandomApply(augcolor, p=0.8), transforms.RandomApply(augaffine, p=0.8), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) contrasttrans = transforms.Compose([ transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), transforms.RandomAffine(20, scale=(0.9, 1.1), shear=20, resample=PIL.Image.BICUBIC, fillcolor=(100, 100, 100)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) if augment: transform = augtrans else: transform = normalize self.suptrainset = datasets.SVHN(svhn_path, split='train', transform=transform, target_transform=None, download=dload_dataset) self.testset = datasets.SVHN(svhn_path, split='test', transform=normalize, target_transform=None, download=dload_dataset) if curlfrac + supfrac > 1.0: print("CURL fraction plus SUP fraction cannot exceed 1") print("Setting to defaults") curlfrac, supfrac = 0.5, 0.5 trainset_size = len(self.suptrainset) indices = list(range(trainset_size)) end = int(np.floor((curlfrac + supfrac) * trainset_size)) curlend = int(np.floor(curlfrac / (supfrac + curlfrac) * end)) if shuffle: np.random.shuffle(indices) curltrain_indices = indices[:curlend] suptrain_indices = indices[curlend:end] print(f"Number of labeled images: {len(suptrain_indices)}") print(f"Number of unlabeled images: {len(curltrain_indices)}") self.suptrain_sampler = SubsetRandomSampler(suptrain_indices) self.curltrain_sampler = SubsetRandomSampler(curltrain_indices) #self.curltrainset = ContrastedData(svhn_path, split='train', accepted_indices=curltrain_indices, contrast_transform=contrasttrans, k=k, transform=transform, download=dload_dataset) self.curltrainset = ApproxContrastedData( svhn_path, split='train', accepted_indices=curltrain_indices, contrast_transform=contrasttrans, k=k, transform=transform, download=dload_dataset) if use_cuda: if torch.cuda.is_available(): self.device = torch.device('cuda') else: print("CUDA not available") self.device = torch.device('cpu') else: self.device = torch.device('cpu') self.bulk.to(self.device) self.head.to(self.device)
def __init__(self, svhn_path, frac=0.5, shuffle=True, augment=True, use_cuda=False, dload_dataset=False): """ frac : float fraction of dataset to use for training """ self.net = Net_Full() normalize = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) augcolor = [ transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5) ] augaffine = [ transforms.RandomAffine(20, scale=(0.9, 1.1), shear=20, resample=PIL.Image.BICUBIC, fillcolor=(100, 100, 100)) ] augtrans = transforms.Compose([ transforms.RandomApply(augcolor, p=0.8), transforms.RandomApply(augaffine, p=0.8), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) if augment: transform = normalize else: transform = augtrans trainset = datasets.SVHN(svhn_path, split='train', transform=transform, target_transform=None, download=dload_dataset) self.trainset = trainset testset = datasets.SVHN(svhn_path, split='test', transform=normalize, target_transform=None, download=dload_dataset) self.testset = testset trainset_size = len(self.trainset) indices = list(range(trainset_size)) end = int(np.floor(frac * trainset_size)) if shuffle: np.random.shuffle(indices) train_indices = indices[:end] self.train_sampler = SubsetRandomSampler(train_indices) if use_cuda: if torch.cuda.is_available(): self.device = torch.device('cuda') else: print("CUDA not available") self.device = torch.device('cpu') else: self.device = torch.device('cpu') self.net.to(self.device)
def __init__(self, root, add_labeled=0, advanced_transforms=True, remove_classes=False, expand_labeled=0, expand_unlabeled=0, unlabeled_subset_ratio=1, oversampling=True, stratified=False, merged=True, unlabeled_augmentations=False, seed=9999, k_medoids=False, k_medoids_model=None, k_medoids_n_clusters=10, start_labeled=300): self.root = root self.train_path = os.path.join(self.root, "isic", "train") self.test_path = os.path.join(self.root, "isic", "test") self.isic_mean = (0.6679, 0.5297, 0.5246) self.isic_std = (0.1338, 0.1470, 0.1577) self.input_size = 128 self.crop_size = 128 self.expand_labeled = expand_labeled self.expand_unlabeled = expand_unlabeled self.oversampling = oversampling self.stratified = stratified self.merged = merged self.merge_classes = [] if advanced_transforms: self.transform_train = transforms.Compose([ transforms.RandomCrop(self.crop_size), transforms.RandomAffine(degrees=90, translate=(0.2, 0.2)), transforms.Resize(size=self.input_size), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.RandomErasing(scale=(0.02, 0.2), ratio=(0.3, 0.9)), ]) self.transform_test = transforms.Compose([ transforms.Resize(size=self.input_size), transforms.ToTensor(), ]) else: self.transform_train = transforms.Compose([ transforms.Resize(size=self.input_size), transforms.ToTensor(), ]) self.transform_test = transforms.Compose([ transforms.Resize(size=self.input_size), transforms.ToTensor(), ]) self.transform_autoencoder = transforms.Compose([ transforms.RandomCrop(self.crop_size), transforms.RandomAffine(degrees=90, translate=(0.2, 0.2)), transforms.Resize(size=self.input_size), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.RandomErasing(scale=(0.02, 0.2), ratio=(0.3, 0.9)), ]) self.transform_simclr = TransformsSimCLR(size=self.input_size) self.transform_fixmatch = TransformFix(crop_size=self.crop_size, input_size=self.input_size) self.merged_classes = 0 if self.merged else 0 self.num_classes = 8 - self.merged_classes self.add_labeled = add_labeled self.unlabeled_subset_ratio = unlabeled_subset_ratio self.unlabeled_subset_num = None self.remove_classes = remove_classes self.unlabeled_augmentations = unlabeled_augmentations self.labeled_class_samples = None self.classes_to_remove = [2, 3, 4, 5, 6, 7] self.seed = seed self.labeled_amount = self.num_classes self.k_medoids = k_medoids self.k_medoids_model = k_medoids_model self.k_medoids_n_clusters = k_medoids_n_clusters self.start_labeled = start_labeled
def _main_(device=""): print("Import Complete") n_epochs = 16 batch_size_train = 4 batch_size_test = 4 learning_rate = 0.01 momentum = 0.5 log_interval = 5 torch.backends.cudnn.enabled = True # Set fixed random number seed #random_seed = 42 #torch.manual_seed(random_seed) #print("Seeded Torch") data_transforms = { 'train': transforms.Compose([ transforms.RandomRotation(15), transforms.RandomAffine(15) #,transforms.Resize(224) , transforms.CenterCrop(64), transforms.ColorJitter(0.1, 0.1, 0.1), transforms.Grayscale(), transforms.ToTensor() #,transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), 'test': transforms.Compose([ #transforms.Resize(224) transforms.CenterCrop(64), transforms.Grayscale(), transforms.ToTensor() #,transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) } location = r"Images" image_datasets = { x: ImageFolder(os.path.join(location, x), data_transforms[x]) for x in ['train', 'test'] } train_loader = torch.utils.data.DataLoader(image_datasets['train'], batch_size=batch_size_train, shuffle=True, num_workers=1) test_loader = torch.utils.data.DataLoader(image_datasets['test'], batch_size=batch_size_test, shuffle=True, num_workers=1) class_names = image_datasets['train'].classes print("Loaded Dataset") if device == "": # Work out if we can use the GPU device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') print(f"Running on {device}") ## Transfer Learning Test import torchvision.models as models ## Other Models #model = models.resnet18(pretrained = True) #model = models.googlenet(pretrained= True) #num_features = model.fc.in_features #model.fc = nn.Linear(num_features, 6) # Original Model model = MLP() model.to(device) model.cuda() # Define the loss function and optimizer optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, momentum=momentum) # Data train_losses = [] train_counter = [] test_losses = [] train_losses_once = [] test_losses_once = [] def train(epoch, device, model): model.train() # Set model to training mode len_dataset = len(train_loader.dataset) # Iterate over the DataLoader for training data for batch_no, data in enumerate(train_loader, 0): # Get inputs inputs, targets = data # Send to the right place inputs, targets = inputs.to(device), targets.to(device) # Zero the gradients optimizer.zero_grad() # Perform forward pass outputs = model(inputs) # Compute loss #loss = criterion(outputs, targets) loss = F.nll_loss(outputs, targets) # Perform backward pass loss.backward() # Perform optimization optimizer.step() # Print statistics #current_loss += loss.item() if batch_no % log_interval == 0: print(f"Epoch: {epoch}, Batch: {batch_no}") #print('Loss after mini-batch %5d: %.3f' % # (i + 1, current_loss / 500)) train_losses.append(loss.item()) train_counter.append((batch_no * batch_size_train) + ((epoch - 1) * len_dataset)) torch.save(model.state_dict(), r'.\results\model.pth') torch.save(optimizer.state_dict(), r'.\results\optimizer.pth') def test(device, model): model.eval() # Set model to training mode test_loss = 0 correct = 0 len_dataset = len(test_loader.dataset) with torch.no_grad(): for inputs, targets in test_loader: inputs, targets = inputs.to(device), targets.to(device) output = model(inputs) test_loss += F.nll_loss(output, targets, size_average=False).item() prediction = output.data.max(1, keepdim=True)[1] correct += prediction.eq( targets.data.view_as(prediction)).sum() test_loss /= len_dataset test_losses.append(test_loss) print( f'\nTest set: Avg. loss: {test_loss:.4f}, Accuracy: {correct}/{len_dataset} ({100. * correct / len_dataset:.0f}%)\n' ) def imshow(inp, title=None): """Imshow for Tensor.""" inp = inp.numpy().transpose((1, 2, 0)) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) inp = std * inp + mean inp = np.clip(inp, 0, 1) plt.imshow(inp) if title is not None: plt.title(title) plt.pause(0.001) # pause a bit so that plots are updated def visualize_model(model, num_images=6): was_training = model.training model.eval() images_so_far = 0 fig = plt.figure() with torch.no_grad(): for i, (inputs, labels) in enumerate(test_loader): inputs, labels = inputs.to(device), labels.to(device) outputs = model(inputs) _, preds = torch.max(outputs, 1) for j in range(inputs.size()[0]): images_so_far += 1 ax = plt.subplot(num_images // 2, 2, images_so_far) ax.axis('off') ax.set_title('predicted: {}'.format(class_names[preds[j]])) imshow(inputs.cpu().data[j]) if images_so_far == num_images: model.train(mode=was_training) return model.train(mode=was_training) #Starting Test. How good is the Model RAW test(device, model) for epoch in range(1, n_epochs + 1): train(epoch, device, model) test(device, model) train_losses_once.append(train_losses[-1]) test_losses_once.append(test_losses[-1]) print("Training Complete") visualize_model(model) ## Loss Graph plt.figure() plt.axis([0, len(train_losses), 0, ceil(max(train_losses))]) plt.xlabel('Number of Batches') plt.ylabel('Loss') plt.plot(train_losses, label="Training") print(len(train_losses)) plt.plot(test_losses, label="Testing" ) #[x for x in range(0, len(train_losses), 10)], test_losses) plt.legend() plt.figure() plt.xlabel('Epochs') plt.ylabel('Loss') plt.axis([1, 16, 0, 4]) plt.plot([x for x in range(1, n_epochs + 1)], train_losses_once, label="Training") plt.plot([x for x in range(1, n_epochs + 1)], test_losses_once, label="Testing") plt.legend()
return tensor def __repr__(self): return self.__class__.__name__ + '(mean={0}, std={1})'.format( self.mean, self.std) ### transform.Normalize(torch.mean( ### Set Augmentations: data_transforms = { 'train': transforms.Compose([ transforms.RandomResizedCrop(224, scale=(0.5, 1.0)), transforms.RandomAffine(30), transforms.RandomHorizontalFlip(), transforms.ToTensor(), AddGaussianNoise(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Resize(224), #transforms.CenterCrop(100), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } # %% ----------------------------------- Helper Functions --------------------------------------------------------------
# transforms.ToTensor(), # transforms.RandomErasing(p=1, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=(254/255, 0, 0)), # transforms.RandomErasing(p=.3, scale=(0.02, 0.33), ratio=(0.3, 3.3), value='1234'), # 1 RandomChoice # transforms.RandomChoice([transforms.RandomVerticalFlip(p=1), transforms.RandomHorizontalFlip(p=1)]), # 2 RandomApply # transforms.RandomApply([transforms.RandomAffine(degrees=0, shear=45, fillcolor=(255, 0, 0)), # transforms.Grayscale(num_output_channels=3)], p=0.5), # 3 RandomOrder transforms.RandomOrder([ transforms.RandomRotation(15), transforms.Pad(padding=32), transforms.RandomAffine(degrees=0, translate=(0.01, 0.1), scale=(0.9, 1.1)) ]), transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std), ]) train_data = RMBDataset(data_dir=train_dir, transform=train_transform) train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) # ============================ step 5/5 训练 ============================ for i, data in enumerate(train_loader): inputs, labels = data # B C H W
data_transforms_train = transforms.Compose([ transforms.Resize((456, 456)), transforms.ToTensor(), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomRotation(25), transforms.RandomResizedCrop((456, 456), scale=(0.7, 1.3), ratio=(0.8, 1.2)), transforms.GaussianBlur(5, sigma=(0.1, 2.0)), transforms.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.15, hue=0.15), transforms.RandomAffine(0, translate=(0.07, 0.07), scale=(0.6, 1.4)), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) dirName = args.dirname #'crop_dataset/train_images' # 'bird_dataset/train_images' listOfFile = os.listdir(dirName) completeFileList = list() for file in tqdm(listOfFile): completePath = os.path.join(dirName, file) image_paths = os.listdir(completePath) for image in image_paths: try: img_path = os.path.join(completePath, image) img = Image.open(img_path) img_aug = data_transforms_train(img)
# augment['1'] = transforms.Compose( # [ # transforms.ToPILImage(), # transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.8, 1.2), shear=20), # transforms.ToTensor(), # transforms.Normalize((0.5, ), (0.5, ))]) # change to [C, H, W] # augment['2'] = transforms.Compose( # [ # transforms.ToTensor(), # transforms.Normalize((0.5, ), (0.5, ))]) # change to [C, H, W] augment['1'] = transforms.Compose( [ transforms.ToPILImage(), transforms.RandomAffine(degrees=10, translate=( 0.1, 0.1), scale=(0.8, 1.2), shear=20), transforms.ToTensor()]) # change to [C, H, W] augment['2'] = transforms.Compose( [ transforms.ToTensor()]) # change to [C, H, W] train_dataset = RegularDataset(opt, augment) train_dataloader = DataLoader(train_dataset, batch_size=opt.batchSize, shuffle=True, num_workers=int(opt.nThreads), pin_memory=True) dataset_size = len(train_dataset)
def Prepare_DataLoaders(Results_parameters, split, input_size=224): Dataset = Results_parameters['Dataset'] data_dir = Results_parameters['data_dir'] # Data augmentation and normalization for training # Just normalization and resize for test # Data transformations as described in: # http://openaccess.thecvf.com/content_cvpr_2018/papers/Xue_Deep_Texture_Manifold_CVPR_2018_paper.pdf if not (Results_parameters['rotation']): data_transforms = { 'train': transforms.Compose([ transforms.Resize(Results_parameters['resize_size']), transforms.RandomResizedCrop(input_size, scale=(.8, 1.0)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'test': transforms.Compose([ transforms.Resize(Results_parameters['center_size']), transforms.CenterCrop(input_size), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } else: data_transforms = { 'train': transforms.Compose([ transforms.Resize(Results_parameters['resize_size']), transforms.RandomResizedCrop(input_size, scale=(.8, 1.0)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'test': transforms.Compose([ transforms.Resize(Results_parameters['center_size']), transforms.CenterCrop(input_size), transforms.RandomAffine(Results_parameters['degrees']), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } # Create training and test datasets, for results, apply test transforms # to both training and test datasets if Dataset == 'DTD': train_dataset = DTD_data(data_dir, data='train', numset=split + 1, img_transform=data_transforms['test']) validation_dataset = DTD_data(data_dir, data='val', numset=split + 1, img_transform=data_transforms['test']) test_dataset = DTD_data(data_dir, data='test', numset=split + 1, img_transform=data_transforms['test']) #Combine training and test datasets train_dataset = torch.utils.data.ConcatDataset( (train_dataset, validation_dataset)) elif Dataset == 'MINC_2500': train_dataset = MINC_2500_data(data_dir, data='train', numset=split + 1, img_transform=data_transforms['test']) test_dataset = MINC_2500_data(data_dir, data='test', numset=split + 1, img_transform=data_transforms['test']) else: # Create training and test datasets train_dataset = GTOS_mobile_single_data( data_dir, train=True, image_size=Results_parameters['resize_size'], img_transform=data_transforms['test']) test_dataset = GTOS_mobile_single_data( data_dir, train=False, img_transform=data_transforms['test']) image_datasets = {'train': train_dataset, 'test': test_dataset} #If training dataset is larger than number of images for TSNE, subsample if len(image_datasets['train']) > Results_parameters['Num_TSNE_images']: indices = np.arange(len(image_datasets['train'])) y = image_datasets['train'].targets #Use stratified split to balance training validation splits, #set random state to be same for each encoding method _, _, _, _, _, TSNE_indices = train_test_split( y, y, indices, stratify=y, test_size=Results_parameters['Num_TSNE_images'], random_state=split + 1) # Creating PT data samplers and loaders: TSNE_sampler = { 'train': SubsetRandomSampler(TSNE_indices), 'test': None } else: TSNE_sampler = {'train': None, 'test': None} # Create training and test dataloaders dataloaders_dict = { x: torch.utils.data.DataLoader( image_datasets[x], batch_size=Results_parameters['batch_size'][x], shuffle=False, sampler=TSNE_sampler[x], num_workers=Results_parameters['num_workers'], pin_memory=Results_parameters['pin_memory']) for x in ['train', 'test'] } return dataloaders_dict
def train(args): if not os.path.exists('checkpoints'): os.mkdir('checkpoints') # Setup Augmentations data_aug = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomAffine(degrees=10, translate=(0.05, 0.05), scale=(0.95, 1.05)), ]) # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) t_loader = data_loader(data_path, is_transform=True, split='train', version='simplified', img_size=(args.img_rows, args.img_cols), augmentations=data_aug, train_fold_num=args.train_fold_num, num_train_folds=args.num_train_folds, seed=args.seed) v_loader = data_loader(data_path, is_transform=True, split='val', version='simplified', img_size=(args.img_rows, args.img_cols), num_val=args.num_val, seed=args.seed) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=2, shuffle=True, pin_memory=True, drop_last=True) valloader = data.DataLoader(v_loader, batch_size=args.batch_size, num_workers=2, pin_memory=True) # Setup Metrics running_metrics = runningScore(n_classes) # Setup Model model = get_model(args.arch, n_classes, use_cbam=args.use_cbam) model.cuda() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.l_rate, weight_decay=args.weight_decay) if args.num_cycles > 0: len_trainloader = int(5e6) # 4960414 scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, T_max=args.num_train_folds * len_trainloader // args.num_cycles, eta_min=args.l_rate * 1e-1) else: scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[2, 4, 6, 8], gamma=0.5) loss_fn = F.cross_entropy start_epoch = 0 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format( args.resume)) checkpoint = torch.load(args.resume) model_dict = model.state_dict() if checkpoint.get('model_state', -1) == -1: model_dict.update( convert_state_dict(checkpoint, load_classifier=args.load_classifier)) else: model_dict.update( convert_state_dict(checkpoint['model_state'], load_classifier=args.load_classifier)) print( "Loaded checkpoint '{}' (epoch {}, mapk {:.5f}, top1_acc {:7.3f}, top2_acc {:7.3f} top3_acc {:7.3f})" .format(args.resume, checkpoint['epoch'], checkpoint['mapk'], checkpoint['top1_acc'], checkpoint['top2_acc'], checkpoint['top3_acc'])) model.load_state_dict(model_dict) if checkpoint.get('optimizer_state', None) is not None: optimizer.load_state_dict(checkpoint['optimizer_state']) start_epoch = checkpoint['epoch'] else: print("No checkpoint found at '{}'".format(args.resume)) loss_sum = 0.0 for epoch in range(start_epoch, args.n_epoch): start_train_time = timeit.default_timer() if args.num_cycles == 0: scheduler.step(epoch) model.train() optimizer.zero_grad() for i, (images, labels, recognized, _) in enumerate(trainloader): if args.num_cycles > 0: iter_num = i + epoch * len_trainloader scheduler.step( iter_num % (args.num_train_folds * len_trainloader // args.num_cycles)) # Cosine Annealing with Restarts images = images.cuda() labels = labels.cuda() recognized = recognized.cuda() outputs = model(images) loss = (loss_fn(outputs, labels.view(-1), ignore_index=t_loader.ignore_index, reduction='none') * recognized.view(-1)).mean() loss = loss / float(args.iter_size) # Accumulated gradients loss_sum = loss_sum + loss loss.backward() if (i + 1) % args.print_train_freq == 0: print("Epoch [%d/%d] Iter [%6d/%6d] Loss: %.4f" % (epoch + 1, args.n_epoch, i + 1, len(trainloader), loss_sum)) if (i + 1) % args.iter_size == 0 or i == len(trainloader) - 1: optimizer.step() optimizer.zero_grad() loss_sum = 0.0 mapk_val = AverageMeter() top1_acc_val = AverageMeter() top2_acc_val = AverageMeter() top3_acc_val = AverageMeter() mean_loss_val = AverageMeter() model.eval() with torch.no_grad(): for i_val, (images_val, labels_val, recognized_val, _) in tqdm(enumerate(valloader)): images_val = images_val.cuda() labels_val = labels_val.cuda() recognized_val = recognized_val.cuda() outputs_val = model(images_val) loss_val = (loss_fn(outputs_val, labels_val.view(-1), ignore_index=v_loader.ignore_index, reduction='none') * recognized_val.view(-1)).mean() mean_loss_val.update(loss_val, n=images_val.size(0)) _, pred = outputs_val.topk(k=3, dim=1, largest=True, sorted=True) running_metrics.update(labels_val, pred[:, 0]) acc1, acc2, acc3 = accuracy(outputs_val, labels_val, topk=(1, 2, 3)) top1_acc_val.update(acc1, n=images_val.size(0)) top2_acc_val.update(acc2, n=images_val.size(0)) top3_acc_val.update(acc3, n=images_val.size(0)) mapk_v = mapk(labels_val, pred, k=3) mapk_val.update(mapk_v, n=images_val.size(0)) print('Mean Average Precision (MAP) @ 3: {:.5f}'.format(mapk_val.avg)) print('Top 3 accuracy: {:7.3f} / {:7.3f} / {:7.3f}'.format( top1_acc_val.avg, top2_acc_val.avg, top3_acc_val.avg)) print('Mean val loss: {:.4f}'.format(mean_loss_val.avg)) score, class_iou = running_metrics.get_scores() for k, v in score.items(): print(k, v) state = { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer_state': optimizer.state_dict(), 'mapk': mapk_val.avg, 'top1_acc': top1_acc_val.avg, 'top2_acc': top2_acc_val.avg, 'top3_acc': top3_acc_val.avg, } torch.save( state, "checkpoints/{}_{}_{}_{}x{}_{}-{}-{}_model.pth".format( args.arch, args.dataset, epoch + 1, args.img_rows, args.img_cols, args.train_fold_num, args.num_train_folds, args.num_val)) running_metrics.reset() mapk_val.reset() top1_acc_val.reset() top2_acc_val.reset() top3_acc_val.reset() mean_loss_val.reset() elapsed_train_time = timeit.default_timer() - start_train_time print('Training time (epoch {0:5d}): {1:10.5f} seconds'.format( epoch + 1, elapsed_train_time))
def get_transforms_test(): transforms_test_list = [] if config['transforms'] == 'pytorch': if config['pytorch']['resize']['test']: transforms_test_list.append( transforms.Resize( size=(config['pytorch']['resize']['test_size'], config['pytorch']['resize']['test_size']))) if config['pytorch']['centerCrop']['test']: transforms_test_list.append( transforms.Resize( size=(config['pytorch']['centerCrop']['test_size'], config['pytorch']['centerCrop']['test_size']))) if config['pytorch']['colorJitter']['test']: transforms_test_list.append( transforms.RandomApply([ transforms.ColorJitter( config['pytorch']['colorJitter']['brightness']) ], p=0.75)) if config['pytorch']['randomCrop']['test']: transforms_test_list.append( transforms.RandomCrop( size=config['pytorch']['randomCrop']['test_size'], padding=config['pytorch']['randomCrop']['padding'])) if config['pytorch']['randomResizedCrop']['test']: transforms_test_list.append( transforms.RandomResizedCrop( size=config['pytorch']['randomResizedCrop']['test_size'])) if config['pytorch']['randomHorizontalFlip']['test']: transforms_test_list.append(transforms.RandomHorizontalFlip()) if config['pytorch']['randomAffine']['test']: transforms_test_list.append( transforms.RandomAffine( degrees=config['pytorch']['randomAffine']['degrees'], scale=config['pytorch']['randomAffine']['scale'])) if config['pytorch']['randomRotation']['test']: transforms_test_list.append( transforms.RandomRotation( degrees=config['pytorch']['randomRotation']['degrees'])) if config['pytorch']['toTensor']['test']: transforms_test_list.append(transforms.ToTensor()) if config['pytorch']['normalize']['test']: transforms_test_list.append( transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])) if config['pytorch']['randomErasing']['test']: transforms_test_list.append( transforms.RandomErasing( p=config['pytorch']['randomErasing']['p'], value=config['pytorch']['randomErasing']['value'])) test_transforms = transforms.Compose(transforms_test_list) else: if config['albumentations']['resize']['test']: transforms_test_list.append( A.Resize(config['albumentations']['resize']['test_size'], config['albumentations']['resize']['test_size'])) if config['albumentations']['centerCrop']['test']: transforms_test_list.append( transforms.Resize( config['albumentations']['centerCrop']['test_size'], config['albumentations']['centerCrop']['test_size'])) if config['albumentations']['horizontalFlip']['test']: transforms_test_list.append(A.HorizontalFlip()) if config['albumentations']['rotate']['test']: transforms_test_list.append( A.Rotate(config['albumentations']['rotate']['limit'])) if config['albumentations']['clahe']['test']: transforms_test_list.append(A.CLAHE()) if config['albumentations']['gaussNoise']['test']: transforms_test_list.append(A.GaussNoise()) if config['albumentations']['randomBrightness']['test']: transforms_test_list.append(A.RandomBrightness()) if config['albumentations']['randomContrast']['test']: transforms_test_list.append(A.RandomContrast()) if config['albumentations']['randomBrightnrssContrast']['test']: transforms_test_list.append(A.RandomBrightnessContrast()) if config['albumentations']['hueSaturationValue']['test']: transforms_test_list.append(A.HueSaturationValue()) if config['albumentations']['normalize']['test']: transforms_test_list.append( A.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])) if config['albumentations']['toTensor']['test']: transforms_test_list.append(AT.ToTensor()) test_transforms = A.Compose(transforms_test_list) return test_transforms
gflags.DEFINE_float("lr", 0.00006, "learning rate") gflags.DEFINE_integer("show_every", 10, "show result after each show_every iter.") gflags.DEFINE_integer("save_every", 100, "save model after each save_every iter.") gflags.DEFINE_integer("test_every", 100, "test model after each test_every iter.") gflags.DEFINE_integer("max_iter", 50000, "number of iterations before stopping") gflags.DEFINE_string("model_path", "/model", "path to store model") gflags.DEFINE_string("gpu_ids", "0,1,2,3", "gpu ids used to train") Flags(sys.argv) data_transforms = transforms.Compose( [transforms.RandomAffine(15), transforms.ToTensor()]) # train_dataset = dset.ImageFolder(root=Flags.train_path) # test_dataset = dset.ImageFolder(root=Flags.test_path) os.environ["CUDA_VISIBLE_DEVICES"] = Flags.gpu_ids print("use gpu:", Flags.gpu_ids, "to train.") trainSet = OmniglotTrain(Flags.train_path, transform=data_transforms) testSet = OmniglotTest(Flags.test_path, transform=transforms.ToTensor(), times=Flags.times, way=Flags.way) testLoader = DataLoader(testSet, batch_size=Flags.way,
def main(): parser = argparse.ArgumentParser(description="LSTM VAE Agents: ST-GS Language Emergence.") parser.add_argument("--seed", type=int, default=0) parser.add_argument("--parent_folder", type=str, help="folder to save into.",default="") parser.add_argument("--symbolic", action="store_true", default=False) parser.add_argument("--use_cuda", action="store_true", default=False) parser.add_argument("--dataset", type=str, choices=["Sort-of-CLEVR", "tiny-Sort-of-CLEVR", "XSort-of-CLEVR", "tiny-XSort-of-CLEVR", "dSprites", ], help="dataset to train on.", default="dSprites") parser.add_argument("--arch", type=str, choices=["CNN", "CNN3x3", "BN+CNN", "BN+CNN3x3", "BN+BetaVAE3x3", "BN+Coord2CNN3x3", "BN+Coord4CNN3x3", ], help="model architecture to train", default="BN+BetaVAE3x3") parser.add_argument("--graphtype", type=str, choices=["straight_through_gumbel_softmax", "reinforce", "baseline_reduced_reinforce", "normalized_reinforce", "baseline_reduced_normalized_reinforce", "max_entr_reinforce", "baseline_reduced_normalized_max_entr_reinforce", "argmax_reinforce", "obverter"], help="type of graph to use during training of the speaker and listener.", default="straight_through_gumbel_softmax") parser.add_argument("--max_sentence_length", type=int, default=20) parser.add_argument("--vocab_size", type=int, default=100) parser.add_argument("--optimizer_type", type=str, choices=[ "adam", "sgd" ], default="adam") parser.add_argument("--agent_loss_type", type=str, choices=[ "Hinge", "NLL", "CE", "BCE", ], default="Hinge") parser.add_argument("--agent_type", type=str, choices=[ "Baseline", ], default="Baseline") parser.add_argument("--rnn_type", type=str, choices=[ "LSTM", "GRU", ], default="LSTM") parser.add_argument("--lr", type=float, default=1e-4) parser.add_argument("--epoch", type=int, default=1875) parser.add_argument("--metric_epoch_period", type=int, default=20) parser.add_argument("--dataloader_num_worker", type=int, default=4) parser.add_argument("--metric_fast", action="store_true", default=False) parser.add_argument("--batch_size", type=int, default=8) parser.add_argument("--mini_batch_size", type=int, default=128) parser.add_argument("--dropout_prob", type=float, default=0.0) parser.add_argument("--emb_dropout_prob", type=float, default=0.8) parser.add_argument("--nbr_experience_repetition", type=int, default=1) parser.add_argument("--nbr_train_dataset_repetition", type=int, default=1) parser.add_argument("--nbr_test_dataset_repetition", type=int, default=1) parser.add_argument("--nbr_test_distractors", type=int, default=63) parser.add_argument("--nbr_train_distractors", type=int, default=47) parser.add_argument("--resizeDim", default=32, type=int,help="input image resize") #TODO: make sure it is understood....! parser.add_argument("--shared_architecture", action="store_true", default=True) parser.add_argument("--with_baseline", action="store_true", default=False) parser.add_argument("--homoscedastic_multitasks_loss", action="store_true", default=False) parser.add_argument("--use_curriculum_nbr_distractors", action="store_true", default=False) parser.add_argument("--use_feat_converter", action="store_true", default=False) parser.add_argument("--descriptive", action="store_true", default=False) parser.add_argument("--descriptive_ratio", type=float, default=0.0) parser.add_argument("--egocentric", action="store_true", default=False) parser.add_argument("--distractor_sampling", type=str, choices=[ "uniform", "similarity-0.98", "similarity-0.90", "similarity-0.75", ], default="uniform") # Obverter Hyperparameters: parser.add_argument("--use_sentences_one_hot_vectors", action="store_true", default=False) parser.add_argument("--differentiable", action="store_true", default=False) parser.add_argument("--obverter_threshold_to_stop_message_generation", type=float, default=0.95) parser.add_argument("--obverter_nbr_games_per_round", type=int, default=4) # Cultural Bottleneck: parser.add_argument("--iterated_learning_scheme", action="store_true", default=False) parser.add_argument("--iterated_learning_period", type=int, default=4) parser.add_argument("--iterated_learning_rehearse_MDL", action="store_true", default=False) parser.add_argument("--iterated_learning_rehearse_MDL_factor", type=float, default=1.0) # Dataset Hyperparameters: parser.add_argument("--train_test_split_strategy", type=str, choices=["combinatorial2-Y-2-8-X-2-8-Orientation-40-N-Scale-6-N-Shape-3-N", # Exp : DoRGsFurtherDise interweaved split simple XY normal "combinatorial2-Y-2-S8-X-2-S8-Orientation-40-N-Scale-4-N-Shape-1-N", "combinatorial2-Y-32-N-X-32-N-Orientation-5-S4-Scale-1-S3-Shape-3-N", #Sparse 2 Attributes: Orient.+Scale 64 imgs, 48 train, 16 test "combinatorial2-Y-2-S8-X-2-S8-Orientation-40-N-Scale-6-N-Shape-3-N", # 4x Denser 2 Attributes: 256 imgs, 192 train, 64 test, # Heart shape: interpolation: "combinatorial2-Y-4-2-X-4-2-Orientation-40-N-Scale-6-N-Shape-3-N", #Sparse 2 Attributes: X+Y 64 imgs, 48 train, 16 test "combinatorial2-Y-2-2-X-2-2-Orientation-40-N-Scale-6-N-Shape-3-N", #Dense 2 Attributes: X+Y 256 imgs, 192 train, 64 test "combinatorial2-Y-8-2-X-8-2-Orientation-10-2-Scale-1-2-Shape-3-N", #COMB2:Sparser 4 Attributes: 264 test / 120 train "combinatorial2-Y-4-2-X-4-2-Orientation-5-2-Scale-1-2-Shape-3-N", #COMB2:Sparse 4 Attributes: 2112 test / 960 train "combinatorial2-Y-2-2-X-2-2-Orientation-2-2-Scale-1-2-Shape-3-N", #COMB2:Dense 4 Attributes: ? test / ? train "combinatorial2-Y-4-2-X-4-2-Orientation-5-2-Scale-6-N-Shape-3-N", #COMB2 Sparse: 3 Attributes: XYOrientation 256 test / 256 train # Heart shape: Extrapolation: "combinatorial2-Y-4-S4-X-4-S4-Orientation-40-N-Scale-6-N-Shape-3-N", #Sparse 2 Attributes: X+Y 64 imgs, 48 train, 16 test "combinatorial2-Y-8-S2-X-8-S2-Orientation-10-S2-Scale-1-S3-Shape-3-N", #COMB2:Sparser 4 Attributes: 264 test / 120 train "combinatorial2-Y-4-S4-X-4-S4-Orientation-5-S4-Scale-1-S3-Shape-3-N", #COMB2:Sparse 4 Attributes: 2112 test / 960 train "combinatorial2-Y-2-S8-X-2-S8-Orientation-2-S10-Scale-1-S3-Shape-3-N", #COMB2:Dense 4 Attributes: ? test / ? train "combinatorial2-Y-4-S4-X-4-S4-Orientation-5-S4-Scale-6-N-Shape-3-N", #COMB2 Sparse: 3 Attributes: XYOrientation 256 test / 256 train # Ovale shape: "combinatorial2-Y-1-S16-X-1-S16-Orientation-40-N-Scale-6-N-Shape-2-N", # Denser 2 Attributes X+Y X 16/ Y 16/ --> 256 test / 768 train "combinatorial2-Y-8-S2-X-8-S2-Orientation-10-S2-Scale-1-S3-Shape-2-N", #COMB2:Sparser 4 Attributes: 264 test / 120 train "combinatorial2-Y-4-S4-X-4-S4-Orientation-5-S4-Scale-1-S3-Shape-2-N", #COMB2:Sparse 4 Attributes: 2112 test / 960 train "combinatorial2-Y-2-S8-X-2-S8-Orientation-2-S10-Scale-1-S3-Shape-2-N", #COMB2:Dense 4 Attributes: ? test / ? train #3 Attributes: denser 2 attributes(X+Y) with the sample size of Dense 4 attributes: "combinatorial2-Y-1-S16-X-1-S16-Orientation-2-S10-Scale-6-N-Shape-2-N", "combinatorial4-Y-4-S4-X-4-S4-Orientation-5-S4-Scale-1-S3-Shape-3-N", #Sparse 4 Attributes: 192 test / 1344 train ], help="train/test split strategy", # INTER: #default="combinatorial2-Y-4-2-X-4-2-Orientation-40-N-Scale-6-N-Shape-3-N") # EXTRA: #default="combinatorial2-Y-4-S4-X-4-S4-Orientation-40-N-Scale-6-N-Shape-3-N") # EXTRA-3: default="combinatorial2-Y-4-S4-X-4-S4-Orientation-5-S4-Scale-6-N-Shape-3-N") parser.add_argument("--fast", action="store_true", default=False, help="Disable the deterministic CuDNN. It is likely to make the computation faster.") #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- # VAE Hyperparameters: #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- parser.add_argument("--vae_detached_featout", action="store_true", default=False) parser.add_argument("--vae_lambda", type=float, default=1.0) parser.add_argument("--vae_use_mu_value", action="store_true", default=False) parser.add_argument("--vae_nbr_latent_dim", type=int, default=32) parser.add_argument("--vae_decoder_nbr_layer", type=int, default=3) parser.add_argument("--vae_decoder_conv_dim", type=int, default=32) parser.add_argument("--vae_gaussian", action="store_true", default=False) parser.add_argument("--vae_gaussian_sigma", type=float, default=0.25) parser.add_argument("--vae_beta", type=float, default=1.0) parser.add_argument("--vae_factor_gamma", type=float, default=0.0) parser.add_argument("--vae_constrained_encoding", action="store_true", default=False) parser.add_argument("--vae_max_capacity", type=float, default=1e3) parser.add_argument("--vae_nbr_epoch_till_max_capacity", type=int, default=10) #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- args = parser.parse_args() print(args) gaussian = args.vae_gaussian vae_observation_sigma = args.vae_gaussian_sigma vae_beta = args.vae_beta factor_vae_gamma = args.vae_factor_gamma vae_constrainedEncoding = args.vae_constrained_encoding maxCap = args.vae_max_capacity #1e2 nbrepochtillmaxcap = args.vae_nbr_epoch_till_max_capacity monet_gamma = 5e-1 #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- #-------------------------------------------------------------------------- seed = args.seed # Following: https://pytorch.org/docs/stable/notes/randomness.html torch.manual_seed(seed) if hasattr(torch.backends, "cudnn") and not(args.fast): torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(seed) random.seed(seed) # # Hyperparameters: nbr_epoch = args.epoch cnn_feature_size = -1 #600 #128 #256 # # Except for VAEs...! stimulus_resize_dim = args.resizeDim #64 #28 normalize_rgb_values = False rgb_scaler = 1.0 #255.0 from ReferentialGym.datasets.utils import ResizeNormalize transform = ResizeNormalize(size=stimulus_resize_dim, normalize_rgb_values=normalize_rgb_values, rgb_scaler=rgb_scaler) from ReferentialGym.datasets.utils import AddEgocentricInvariance ego_inv_transform = AddEgocentricInvariance() transform_degrees = 25 transform_translate = (0.0625, 0.0625) default_descriptive_ratio = 1-(1/(args.nbr_train_distractors+2)) # Default: 1-(1/(nbr_distractors+2)), # otherwise the agent find the local minimum # where it only predicts "no-target"... if args.descriptive_ratio <=0.001: descriptive_ratio = default_descriptive_ratio else: descriptive_ratio = args.descriptive_ratio rg_config = { "observability": "partial", "max_sentence_length": args.max_sentence_length, "nbr_communication_round": 1, "nbr_distractors": {"train":args.nbr_train_distractors, "test":args.nbr_test_distractors}, "distractor_sampling": args.distractor_sampling, # Default: use "similarity-0.5" # otherwise the emerging language # will have very high ambiguity... # Speakers find the strategy of uttering # a word that is relevant to the class/label # of the target, seemingly. "descriptive": args.descriptive, "descriptive_target_ratio": descriptive_ratio, "object_centric": False, "nbr_stimulus": 1, "graphtype": args.graphtype, "tau0": 0.2, "gumbel_softmax_eps": 1e-6, "vocab_size": args.vocab_size, "symbol_embedding_size": 256, #64 "agent_architecture": args.arch, #"CoordResNet18AvgPooled-2", #"BetaVAE", #"ParallelMONet", #"BetaVAE", #"CNN[-MHDPA]"/"[pretrained-]ResNet18[-MHDPA]-2" "agent_learning": "learning", #"transfer_learning" : CNN"s outputs are detached from the graph... "agent_loss_type": args.agent_loss_type, #"NLL" "cultural_pressure_it_period": None, "cultural_speaker_substrate_size": 1, "cultural_listener_substrate_size": 1, "cultural_reset_strategy": "oldestL", # "uniformSL" #"meta-oldestL-SGD" "cultural_reset_meta_learning_rate": 1e-3, # Obverter"s Cultural Bottleneck: "iterated_learning_scheme": args.iterated_learning_scheme, "iterated_learning_period": args.iterated_learning_period, "iterated_learning_rehearse_MDL": args.iterated_learning_rehearse_MDL, "iterated_learning_rehearse_MDL_factor": args.iterated_learning_rehearse_MDL_factor, "obverter_stop_threshold": args.obverter_threshold_to_stop_message_generation, #0.0 if not in use. "obverter_nbr_games_per_round": args.obverter_nbr_games_per_round, "obverter_least_effort_loss": False, "obverter_least_effort_loss_weights": [1.0 for x in range(0, 10)], "batch_size": args.batch_size, "dataloader_num_worker": args.dataloader_num_worker, "stimulus_depth_dim": 1 if "dSprites" in args.dataset else 3, "stimulus_resize_dim": stimulus_resize_dim, "learning_rate": args.lr, #1e-3, "adam_eps": 1e-8, "dropout_prob": args.dropout_prob, "embedding_dropout_prob": args.emb_dropout_prob, "with_gradient_clip": False, "gradient_clip": 1e0, "use_homoscedastic_multitasks_loss": args.homoscedastic_multitasks_loss, "use_feat_converter": args.use_feat_converter, "use_curriculum_nbr_distractors": args.use_curriculum_nbr_distractors, "curriculum_distractors_window_size": 25, #100, "unsupervised_segmentation_factor": None, #1e5 "nbr_experience_repetition": args.nbr_experience_repetition, "with_utterance_penalization": False, "with_utterance_promotion": False, "utterance_oov_prob": 0.5, # Expected penalty of observing out-of-vocabulary words. # The greater this value, the greater the loss/cost. "utterance_factor": 1e-2, "with_speaker_entropy_regularization": False, "with_listener_entropy_regularization": False, "entropy_regularization_factor": -1e-2, "with_mdl_principle": False, "mdl_principle_factor": 5e-2, "with_weight_maxl1_loss": False, "use_cuda": args.use_cuda, "train_transform": transform, "test_transform": transform, } if args.egocentric: rg_config["train_transform"]= T.Compose( [ ego_inv_transform, T.RandomAffine(degrees=transform_degrees, translate=transform_translate, scale=None, shear=None, resample=False, fillcolor=0), transform ] ) rg_config["test_transform"]= T.Compose( [ ego_inv_transform, T.RandomAffine(degrees=transform_degrees, translate=transform_translate, scale=None, shear=None, resample=False, fillcolor=0), transform ] ) ## Train set: train_split_strategy = args.train_test_split_strategy test_split_strategy = train_split_strategy ## Agent Configuration: agent_config = copy.deepcopy(rg_config) agent_config["use_cuda"] = rg_config["use_cuda"] agent_config["homoscedastic_multitasks_loss"] = rg_config["use_homoscedastic_multitasks_loss"] agent_config["use_feat_converter"] = rg_config["use_feat_converter"] agent_config["max_sentence_length"] = rg_config["max_sentence_length"] agent_config["nbr_distractors"] = rg_config["nbr_distractors"]["train"] if rg_config["observability"] == "full" else 0 agent_config["nbr_stimulus"] = rg_config["nbr_stimulus"] agent_config["nbr_communication_round"] = rg_config["nbr_communication_round"] agent_config["descriptive"] = rg_config["descriptive"] agent_config["gumbel_softmax_eps"] = rg_config["gumbel_softmax_eps"] agent_config["agent_learning"] = rg_config["agent_learning"] # Obverter: agent_config["use_obverter_threshold_to_stop_message_generation"] = args.obverter_threshold_to_stop_message_generation agent_config["symbol_embedding_size"] = rg_config["symbol_embedding_size"] # Recurrent Convolutional Architecture: agent_config["architecture"] = rg_config["agent_architecture"] agent_config["decoder_architecture"] = "DCNN" if args.symbolic: agent_config["decoder_architecture"] = "BN+MLP" agent_config["dropout_prob"] = rg_config["dropout_prob"] agent_config["embedding_dropout_prob"] = rg_config["embedding_dropout_prob"] if "BetaVAE" in agent_config["architecture"]: agent_config['VAE_lambda'] = args.vae_lambda agent_config['vae_beta'] = args.vae_beta agent_config['factor_vae_gamma'] = args.vae_factor_gamma agent_config['vae_constrainedEncoding'] = args.vae_constrained_encoding agent_config['vae_use_gaussian_observation_model'] = args.vae_gaussian agent_config['vae_observation_sigma'] = args.vae_gaussian_sigma agent_config['vae_max_capacity'] = args.vae_max_capacity #1e2 agent_config['vae_nbr_epoch_till_max_capacity'] = args.vae_nbr_epoch_till_max_capacity agent_config['vae_decoder_conv_dim'] = args.vae_decoder_conv_dim agent_config['vae_decoder_nbr_layer'] = args.vae_decoder_nbr_layer agent_config['vae_nbr_latent_dim'] = args.vae_nbr_latent_dim agent_config['vae_detached_featout'] = args.vae_detached_featout agent_config['vae_use_mu_value'] = args.vae_use_mu_value rg_config["use_feat_converter"] = False agent_config["use_feat_converter"] = False if "BN" in args.arch: agent_config["cnn_encoder_channels"] = ["BN32","BN32","BN64","BN64"] else: agent_config["cnn_encoder_channels"] = [32,32,64,64] if "3x3" in agent_config["architecture"]: agent_config["cnn_encoder_kernels"] = [3,3,3,3] elif "7x4x4x3" in agent_config["architecture"]: agent_config["cnn_encoder_kernels"] = [7,4,4,3] else: agent_config["cnn_encoder_kernels"] = [4,4,4,4] agent_config["cnn_encoder_strides"] = [2,2,2,2] agent_config["cnn_encoder_paddings"] = [1,1,1,1] agent_config["cnn_encoder_fc_hidden_units"] = []#[128,] # the last FC layer is provided by the cnn_encoder_feature_dim parameter below... # For a fair comparison between CNN an VAEs: agent_config["cnn_encoder_feature_dim"] = args.vae_nbr_latent_dim #agent_config["cnn_encoder_feature_dim"] = cnn_feature_size # N.B.: if cnn_encoder_fc_hidden_units is [], # then this last parameter does not matter. # The cnn encoder is not topped by a FC network. agent_config["cnn_encoder_mini_batch_size"] = args.mini_batch_size #agent_config["feat_converter_output_size"] = cnn_feature_size agent_config["feat_converter_output_size"] = 256 if "MHDPA" in agent_config["architecture"]: agent_config["mhdpa_nbr_head"] = 4 agent_config["mhdpa_nbr_rec_update"] = 1 agent_config["mhdpa_nbr_mlp_unit"] = 256 agent_config["mhdpa_interaction_dim"] = 128 agent_config["temporal_encoder_nbr_hidden_units"] = 0 agent_config["temporal_encoder_nbr_rnn_layers"] = 0 agent_config["temporal_encoder_mini_batch_size"] = args.mini_batch_size agent_config["symbol_processing_nbr_hidden_units"] = agent_config["temporal_encoder_nbr_hidden_units"] agent_config["symbol_processing_nbr_rnn_layers"] = 1 ## Decoder: ### CNN: if "BN" in agent_config["decoder_architecture"]: agent_config["cnn_decoder_channels"] = ["BN64","BN64","BN32","BN32"] else: agent_config["cnn_decoder_channels"] = [64,64,32,32] if "3x3" in agent_config["decoder_architecture"]: agent_config["cnn_decoder_kernels"] = [3,3,3,3] elif "3x4x4x7" in agent_config["decoder_architecture"]: agent_config["cnn_decoder_kernels"] = [3,4,4,7] else: agent_config["cnn_decoder_kernels"] = [4,4,4,4] agent_config["cnn_decoder_strides"] = [2,2,2,2] agent_config["cnn_decoder_paddings"] = [1,1,1,1] ### MLP: if "BN" in agent_config["decoder_architecture"]: agent_config['mlp_decoder_fc_hidden_units'] = ["BN256", "BN256"] else: agent_config['mlp_decoder_fc_hidden_units'] = [256, 256] agent_config['mlp_decoder_fc_hidden_units'].append(40*6) else: raise NotImplementedError save_path = "./" if args.parent_folder != '': save_path += args.parent_folder+'/' save_path += f"{args.dataset}+DualLabeled/" if args.symbolic: save_path += f"Symbolic/" save_path += f"{nbr_epoch}Ep_Emb{rg_config['symbol_embedding_size']}_CNN{cnn_feature_size}to{args.vae_nbr_latent_dim}" if args.shared_architecture: save_path += "/shared_architecture" save_path += f"Dropout{rg_config['dropout_prob']}_DPEmb{rg_config['embedding_dropout_prob']}" save_path += f"_BN_{rg_config['agent_learning']}/" save_path += f"{rg_config['agent_loss_type']}" if 'dSprites' in args.dataset: train_test_strategy = f"-{test_split_strategy}" if test_split_strategy != train_split_strategy: train_test_strategy = f"/train_{train_split_strategy}/test_{test_split_strategy}" save_path += f"/dSprites{train_test_strategy}" save_path += f"/OBS{rg_config['stimulus_resize_dim']}X{rg_config['stimulus_depth_dim']}C-Rep{rg_config['nbr_experience_repetition']}" if rg_config['use_curriculum_nbr_distractors']: save_path += f"+W{rg_config['curriculum_distractors_window_size']}Curr" if rg_config['with_utterance_penalization']: save_path += "+Tau-10-OOV{}PenProb{}".format(rg_config['utterance_factor'], rg_config['utterance_oov_prob']) if rg_config['with_utterance_promotion']: save_path += "+Tau-10-OOV{}ProProb{}".format(rg_config['utterance_factor'], rg_config['utterance_oov_prob']) if rg_config['with_gradient_clip']: save_path += '+ClipGrad{}'.format(rg_config['gradient_clip']) if rg_config['with_speaker_entropy_regularization']: save_path += 'SPEntrReg{}'.format(rg_config['entropy_regularization_factor']) if rg_config['with_listener_entropy_regularization']: save_path += 'LSEntrReg{}'.format(rg_config['entropy_regularization_factor']) if rg_config['iterated_learning_scheme']: save_path += f"-ILM{rg_config['iterated_learning_period']}{'+RehearseMDL{}'.format(rg_config['iterated_learning_rehearse_MDL_factor']) if rg_config['iterated_learning_rehearse_MDL'] else ''}" if rg_config['with_mdl_principle']: save_path += '-MDL{}'.format(rg_config['mdl_principle_factor']) if rg_config['cultural_pressure_it_period'] != 'None': save_path += '-S{}L{}-{}-Reset{}'.\ format(rg_config['cultural_speaker_substrate_size'], rg_config['cultural_listener_substrate_size'], rg_config['cultural_pressure_it_period'], rg_config['cultural_reset_strategy']+str(rg_config['cultural_reset_meta_learning_rate']) if 'meta' in rg_config['cultural_reset_strategy'] else rg_config['cultural_reset_strategy']) save_path += '-{}{}CulturalAgent-SEED{}-{}-obs_b{}_minib{}_lr{}-{}-tau0-{}-{}DistrTrain{}Test{}-stim{}-vocab{}over{}_{}{}'.\ format( 'ObjectCentric' if rg_config['object_centric'] else '', 'Descriptive{}'.format(rg_config['descriptive_target_ratio']) if rg_config['descriptive'] else '', seed, rg_config['observability'], rg_config['batch_size'], args.mini_batch_size, rg_config['learning_rate'], rg_config['graphtype'], rg_config['tau0'], rg_config['distractor_sampling'], *rg_config['nbr_distractors'].values(), rg_config['nbr_stimulus'], rg_config['vocab_size'], rg_config['max_sentence_length'], rg_config['agent_architecture'], f"/{'Detached' if args.vae_detached_featout else ''}beta{vae_beta}-factor{factor_vae_gamma}" if 'BetaVAE' in rg_config['agent_architecture'] else '' ) if 'MONet' in rg_config['agent_architecture'] or 'BetaVAE' in rg_config['agent_architecture']: save_path += f"beta{vae_beta}-factor{factor_vae_gamma}-gamma{monet_gamma}-sigma{vae_observation_sigma}" if 'MONet' in rg_config['agent_architecture'] else '' save_path += f"CEMC{maxCap}over{nbrepochtillmaxcap}" if vae_constrainedEncoding else '' save_path += f"UnsupSeg{rg_config['unsupervised_segmentation_factor']}" if rg_config['unsupervised_segmentation_factor'] is not None else '' save_path += f"LossVAECoeff{args.vae_lambda}_{'UseMu' if args.vae_use_mu_value else ''}" if rg_config['use_feat_converter']: save_path += f"+FEATCONV" if rg_config['use_homoscedastic_multitasks_loss']: save_path += '+H**o' save_path += f"/{args.optimizer_type}/" if 'reinforce' in args.graphtype: save_path += f'/REINFORCE_EntropyCoeffNeg1m3/UnnormalizedDetLearningSignalHavrylovLoss/NegPG/' if 'obverter' in args.graphtype: save_path += f"Obverter{args.obverter_threshold_to_stop_message_generation}-{args.obverter_nbr_games_per_round}GPR/DEBUG/" else: save_path += f"STGS-{args.agent_type}-{args.rnn_type}-CNN-Agent/" save_path += f"Periodic{args.metric_epoch_period}TS+DISComp-{'fast-' if args.metric_fast else ''}/"#TestArchTanh/" save_path += f'DatasetRepTrain{args.nbr_train_dataset_repetition}Test{args.nbr_test_dataset_repetition}' rg_config['save_path'] = save_path print(save_path) from ReferentialGym.utils import statsLogger logger = statsLogger(path=save_path,dumpPeriod=100) # # Agents batch_size = 4 nbr_distractors = 1 if "partial" in rg_config["observability"] else agent_config["nbr_distractors"]["train"] nbr_stimulus = agent_config["nbr_stimulus"] obs_shape = [nbr_distractors+1,nbr_stimulus, rg_config["stimulus_depth_dim"],rg_config["stimulus_resize_dim"],rg_config["stimulus_resize_dim"]] vocab_size = rg_config["vocab_size"] max_sentence_length = rg_config["max_sentence_length"] if "obverter" in args.graphtype: from ReferentialGym.agents import DifferentiableObverterAgent speaker = DifferentiableObverterAgent( kwargs=agent_config, obs_shape=obs_shape, vocab_size=vocab_size, max_sentence_length=max_sentence_length, agent_id="s0", logger=logger, use_sentences_one_hot_vectors=args.use_sentences_one_hot_vectors, differentiable=args.differentiable ) else: if "Baseline" in args.agent_type: if 'lstm' in args.rnn_type.lower(): from ReferentialGym.agents import LSTMCNNSpeaker speaker = LSTMCNNSpeaker( kwargs=agent_config, obs_shape=obs_shape, vocab_size=vocab_size, max_sentence_length=max_sentence_length, agent_id="s0", logger=logger ) elif 'gru' in args.rnn_type.lower(): from ReferentialGym.agents import GRUCNNSpeaker speaker = GRUCNNSpeaker( kwargs=agent_config, obs_shape=obs_shape, vocab_size=vocab_size, max_sentence_length=max_sentence_length, agent_id="s0", logger=logger ) else: raise NotImplementedError elif "EoSPriored" in args.agent_type: from ReferentialGym.agents import EoSPrioredLSTMCNNSpeaker speaker = EoSPrioredLSTMCNNSpeaker( kwargs=agent_config, obs_shape=obs_shape, vocab_size=vocab_size, max_sentence_length=max_sentence_length, agent_id="s0", logger=logger ) print("Speaker:", speaker) listener_config = copy.deepcopy(agent_config) if args.shared_architecture: listener_config["cnn_encoder"] = speaker.cnn_encoder listener_config["nbr_distractors"] = rg_config["nbr_distractors"]["train"] batch_size = 4 nbr_distractors = listener_config["nbr_distractors"] nbr_stimulus = listener_config["nbr_stimulus"] obs_shape = [nbr_distractors+1,nbr_stimulus, rg_config["stimulus_depth_dim"],rg_config["stimulus_resize_dim"],rg_config["stimulus_resize_dim"]] vocab_size = rg_config["vocab_size"] max_sentence_length = rg_config["max_sentence_length"] if "obverter" in args.graphtype: raise NotImplementedError else: if 'lstm' in args.rnn_type.lower(): from ReferentialGym.agents import LSTMCNNListener listener = LSTMCNNListener( kwargs=listener_config, obs_shape=obs_shape, vocab_size=vocab_size, max_sentence_length=max_sentence_length, agent_id="l0", logger=logger ) elif 'gru' in args.rnn_type.lower(): from ReferentialGym.agents import GRUCNNListener listener = GRUCNNListener( kwargs=listener_config, obs_shape=obs_shape, vocab_size=vocab_size, max_sentence_length=max_sentence_length, agent_id="l0", logger=logger ) else: raise NotImplementedError if args.symbolic: assert args.agent_loss_type.lower() == 'ce' listener.input_stream_ids["listener"]["target_output"] = "current_dataloader:sample:speaker_exp_latents" print("Listener:", listener) # # Dataset: need_dict_wrapping = {} if "dSprites" in args.dataset: root = "./datasets/dsprites-dataset" train_dataset = ReferentialGym.datasets.dSpritesDataset(root=root, train=True, transform=rg_config["train_transform"], split_strategy=train_split_strategy) test_dataset = ReferentialGym.datasets.dSpritesDataset(root=root, train=False, transform=rg_config["test_transform"], split_strategy=test_split_strategy) else: raise NotImplementedError ## Modules: modules = {} from ReferentialGym import modules as rg_modules # Population: population_handler_id = "population_handler_0" population_handler_config = rg_config population_handler_stream_ids = { "current_speaker_streams_dict":"modules:current_speaker", "current_listener_streams_dict":"modules:current_listener", "epoch":"signals:epoch", "mode":"signals:mode", "global_it_datasample":"signals:global_it_datasample", } # Current Speaker: current_speaker_id = "current_speaker" # Current Listener: current_listener_id = "current_listener" modules[population_handler_id] = rg_modules.build_PopulationHandlerModule( id=population_handler_id, prototype_speaker=speaker, prototype_listener=listener, config=population_handler_config, input_stream_ids=population_handler_stream_ids) modules[current_speaker_id] = rg_modules.CurrentAgentModule(id=current_speaker_id,role="speaker") modules[current_listener_id] = rg_modules.CurrentAgentModule(id=current_listener_id,role="listener") homo_id = "homo0" homo_config = {"use_cuda":args.use_cuda} if args.homoscedastic_multitasks_loss: modules[homo_id] = rg_modules.build_HomoscedasticMultiTasksLossModule( id=homo_id, config=homo_config, ) ## Pipelines: pipelines = {} # 0) Now that all the modules are known, let us build the optimization module: optim_id = "global_optim" optim_config = { "modules":modules, "learning_rate":args.lr, "optimizer_type":args.optimizer_type, "with_gradient_clip":rg_config["with_gradient_clip"], "adam_eps":rg_config["adam_eps"], } optim_module = rg_modules.build_OptimizationModule( id=optim_id, config=optim_config, ) modules[optim_id] = optim_module grad_recorder_id = "grad_recorder" grad_recorder_module = rg_modules.build_GradRecorderModule(id=grad_recorder_id) modules[grad_recorder_id] = grad_recorder_module topo_sim_metric_id = "topo_sim_metric" topo_sim_metric_module = rg_modules.build_TopographicSimilarityMetricModule(id=topo_sim_metric_id, config = { "parallel_TS_computation_max_workers":16, "epoch_period":args.metric_epoch_period, "fast":args.metric_fast, "verbose":False, "vocab_size":rg_config["vocab_size"], } ) modules[topo_sim_metric_id] = topo_sim_metric_module inst_coord_metric_id = "inst_coord_metric" inst_coord_metric_module = rg_modules.build_InstantaneousCoordinationMetricModule(id=inst_coord_metric_id, config = { "epoch_period":1, } ) modules[inst_coord_metric_id] = inst_coord_metric_module dsprites_latent_metric_id = "dsprites_latent_metric" dsprites_latent_metric_module = rg_modules.build_dSpritesPerLatentAccuracyMetricModule(id=dsprites_latent_metric_id, config = { "epoch_period":1, } ) modules[dsprites_latent_metric_id] = dsprites_latent_metric_module speaker_factor_vae_disentanglement_metric_id = "speaker_factor_vae_disentanglement_metric" speaker_factor_vae_disentanglement_metric_input_stream_ids = { "model":"modules:current_speaker:ref:ref_agent:cnn_encoder", "representations":"modules:current_speaker:ref:ref_agent:features", "experiences":"current_dataloader:sample:speaker_experiences", "latent_representations":"current_dataloader:sample:speaker_exp_latents", "latent_values_representations":"current_dataloader:sample:speaker_exp_latents_values", "indices":"current_dataloader:sample:speaker_indices", } speaker_factor_vae_disentanglement_metric_module = rg_modules.build_FactorVAEDisentanglementMetricModule( id=speaker_factor_vae_disentanglement_metric_id, input_stream_ids=speaker_factor_vae_disentanglement_metric_input_stream_ids, config = { "epoch_period":args.metric_epoch_period, "batch_size":64,#5, "nbr_train_points":10000,#3000, "nbr_eval_points":5000,#2000, "resample":False, "threshold":5e-2,#0.0,#1.0, "random_state_seed":args.seed, "verbose":False, "active_factors_only":True, } ) modules[speaker_factor_vae_disentanglement_metric_id] = speaker_factor_vae_disentanglement_metric_module listener_factor_vae_disentanglement_metric_id = "listener_factor_vae_disentanglement_metric" listener_factor_vae_disentanglement_metric_input_stream_ids = { "model":"modules:current_listener:ref:ref_agent:cnn_encoder", "representations":"modules:current_listener:ref:ref_agent:rnn_outputs", "experiences":"current_dataloader:sample:speaker_experiences", "latent_representations":"current_dataloader:sample:speaker_exp_latents", "latent_values_representations":"current_dataloader:sample:speaker_exp_latents_values", "indices":"current_dataloader:sample:speaker_indices", } listener_factor_vae_disentanglement_metric_module = rg_modules.build_FactorVAEDisentanglementMetricModule( id=listener_factor_vae_disentanglement_metric_id, input_stream_ids=listener_factor_vae_disentanglement_metric_input_stream_ids, config = { "epoch_period":args.metric_epoch_period, "batch_size":64,#5, "nbr_train_points":10000,#3000, "nbr_eval_points":5000,#2000, "resample":False, "threshold":5e-2,#0.0,#1.0, "random_state_seed":args.seed, "verbose":False, "active_factors_only":True, } ) modules[listener_factor_vae_disentanglement_metric_id] = listener_factor_vae_disentanglement_metric_module logger_id = "per_epoch_logger" logger_module = rg_modules.build_PerEpochLoggerModule(id=logger_id) modules[logger_id] = logger_module pipelines["referential_game"] = [ population_handler_id, current_speaker_id, current_listener_id ] pipelines[optim_id] = [] if args.homoscedastic_multitasks_loss: pipelines[optim_id].append(homo_id) pipelines[optim_id].append(optim_id) """ # Add gradient recorder module for debugging purposes: pipelines[optim_id].append(grad_recorder_id) """ pipelines[optim_id].append(speaker_factor_vae_disentanglement_metric_id) pipelines[optim_id].append(listener_factor_vae_disentanglement_metric_id) pipelines[optim_id].append(topo_sim_metric_id) pipelines[optim_id].append(inst_coord_metric_id) pipelines[optim_id].append(dsprites_latent_metric_id) pipelines[optim_id].append(logger_id) rg_config["modules"] = modules rg_config["pipelines"] = pipelines dataset_args = { "dataset_class": "DualLabeledDataset", "modes": {"train": train_dataset, "test": test_dataset, }, "need_dict_wrapping": need_dict_wrapping, "nbr_stimulus": rg_config["nbr_stimulus"], "distractor_sampling": rg_config["distractor_sampling"], "nbr_distractors": rg_config["nbr_distractors"], "observability": rg_config["observability"], "object_centric": rg_config["object_centric"], "descriptive": rg_config["descriptive"], "descriptive_target_ratio": rg_config["descriptive_target_ratio"], } refgame = ReferentialGym.make(config=rg_config, dataset_args=dataset_args) # In[22]: refgame.train(nbr_epoch=nbr_epoch, logger=logger, verbose_period=1) logger.flush()
parser.add_argument('--GPU_ids', default=0) if __name__ == '__main__': # Dataset img_size = 224 resize_img = 300 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) transform_train = transforms.Compose([ transforms.Resize(resize_img), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ColorJitter(0.02, 0.02, 0.02, 0.01), transforms.RandomRotation([-180, 180]), transforms.RandomAffine([-180, 180], translate=[0.1, 0.1], scale=[0.7, 1.3]), transforms.RandomCrop(img_size), transforms.ToTensor(), normalize ]) print('==> Preparing data..') trainset = dataloader(train=True, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=1, num_workers=50, shuffle=True) model = args.model # Use args.model as pretrain model if model == 'resnet152': net = resnet.resnet152().to(device)
net_D = Discriminator().to(device) optim_G = optim.RMSprop(net_G.parameters(), lr=args.lr) optim_D = optim.RMSprop(net_D.parameters(), lr=args.lr) train_writer = SummaryWriter(os.path.join(log_dir, 'train')) valid_writer = SummaryWriter(os.path.join(log_dir, 'valid')) os.makedirs(os.path.join(log_dir, 'sample'), exist_ok=True) sample_z = torch.randn(args.sample_size, args.z_dim).to(device) valid_dataset = GenerativeDataset(net_G, args.z_dim, 10000, device) looper = loop(dataloader) consistency_transforms = transforms.Compose([ transforms.ToPILImage(mode='RGB'), transforms.RandomAffine(0, translate=(0.1, 0.1)), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) def consistency_transform_func(images): images = deepcopy(images) for idx, img in enumerate(images): images[idx] = consistency_transforms(img) return images cs_lambda = args.consistency with trange(args.iterations, dynamic_ncols=True) as pbar: for step in pbar:
def display_images(img_list, row, col): if (len(img_list) > 0): images = {} n = 0 for img in img_list: n += 1 images[str(n)] = img plot_images(images, row, col, cmap='gray') train_data = torchvision.datasets.ImageFolder( root='custom_dataset/', transform=transforms.Compose([ transforms.Grayscale(num_output_channels=1), transforms.RandomApply( [transforms.RandomAffine(degrees=(-30, 30), shear=(-30, 30))], p=1.0), transforms.ToTensor() ])) print(f'dataset size: {len(train_data)}') NUM_IMAGES = 36 groundtruth = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' ] data_loader = torch.utils.data.DataLoader(train_data, batch_size=NUM_IMAGES,
def __init__(self, dataset_dir, images_path, list=[], numpatches=900, numneg=3, pos_thr=50.0, reject=True, mode='train', rejection_radius=3000, dist_type='3D', patch_radius=None, use_depth=False, use_normals=False, use_silhouettes=False, color_jitter=False, greyscale=False, maxres=4096, scale_jitter=False, photo_jitter=False, uniform_negatives=False, needles=0, render_only=False): """Loads the patches dataset. @param dataset_dir String directory where the dataset of sampled points is located @param images_path path to the images to sample patches from @param list List of subdirectory names to be loaded with this loader. Use this to specify train/test/val splits. @param numneg Int number of generated negatives per positive pair. @param pos_thr Float threshold in meters used to define negatives. If the distance of two 3D points exceeds this threshold, the correspondence is considered negative. The lower the threshold, the harder the negatives are. @param reject [bool] True turns on rejetion sampling - for each patch we calculate density of 3D reprojected point cloud within 1km radius. Then the probability of rejection is calculated as num_points_1km_radius/max_num_points, where max_num_points is maximum taken across all queried samples until the current one. @param mode options: train|eval, default: train. If train is used, then the additional metadata per patch (which are used for some plots during validation are not generated and therefore the training shall be faster. @type string @param dist_type type of the distance used to generate positives and negatives. Can be `2D` or `3D`. Default: 3D. @type int @param patch_radius when set to None, the patch radius will be loaded from the patches dataset. Otherwise the defined patch radius will be used. Please note that if you use larger patch_radius than the one defined within the patches dataset, the source image will be padded automatically and so the patch may contain black edges. @param needles If number greater than zero is used, then instead of a single patch a whole needle of patches will be extracted. Our network then takes several patches in a form of a needle encoded to channels of the input. This approach is described here: Lotan and Irani: Needle-Match: Reliable Patch Matching under High Uncertainty, CVPR 2016. """ self.item_idx = -1 self.dataset_dir = dataset_dir self.images_path = images_path self.numneg = numneg self.pos_thr = pos_thr self.loaded_imgs_pts = [] self.all_coords3d = [] self.max_num_points = 0 self.reject = reject self.query_radius = rejection_radius self.dist_type = dist_type self.use_depth = use_depth self.use_normals = use_normals self.use_silhouettes = use_silhouettes self.color_jitter = color_jitter self.greyscale = greyscale self.left_maxres = maxres self.right_maxres = maxres self.scale_jitter = scale_jitter self.photo_jitter = photo_jitter self.uniform_negatives = uniform_negatives self.needles = needles self.render_only = render_only scene_info_file = os.path.join(os.path.dirname(images_path), "scene_info.txt") self.scene_center = MultimodalPatchesDataset.getSceneCenter( scene_info_file) self.numch_1 = 3 self.numch_2 = 3 if self.greyscale: self.numch_1 = 1 self.numch_2 = 1 if self.use_depth: self.numch_2 += 1 if self.use_normals: self.numch_2 += 3 if self.use_silhouettes: self.numch_2 += 1 self.transform = transforms.Compose([ transforms.ToPILImage(), transforms.ColorJitter(0.5, 0.5, 1.0, 0.5), transforms.ToTensor() ]) print("Rejection radius: ", self.query_radius, "mode", mode) self.mode = mode if len(list) == 0: self.dataset_items = [ d for d in os.listdir(self.dataset_dir) if os.path.isdir(os.path.join(self.dataset_dir, d)) ] else: self.dataset_items = [] if self.mode == 'eval': # choose only pairs where left view does not repeat print("Choosing non-repeating photographs for validation...") keyset = set() for item in tqdm(list): item_path = os.path.join(self.dataset_dir, item) info_path = os.path.join(item_path, "info.npy") info = np.load(info_path, encoding='latin1', allow_pickle=True).flatten()[0] img1_base = os.path.basename(info['img1_name']) key = os.path.splitext(img1_base)[0] if key in keyset: continue keyset.add(key) self.dataset_items.append(item) else: self.dataset_items = list if (len(self.dataset_items) > 0): item_path = os.path.join(self.dataset_dir, self.dataset_items[0]) info_path = os.path.join(item_path, "info.npy") self.info = np.load(info_path, encoding='latin1', allow_pickle=True).flatten()[0] self.numpatches = self.info['coords2d_1'].shape[0] if patch_radius is not None: self.patch_radius = patch_radius else: self.patch_radius = self.info['patch_radius'] if numpatches != self.numpatches: raise RuntimeError("Wrong number of patches in the first \ item of the dataset. Expected: " + str(numpatches) + ", obtained: " + str(self.numpatches)) self.load3DPoints() self.kdt = KDTree(self.all_coords3d[:, :3], leaf_size=40, metric='euclidean') translation_frac = np.sqrt(5) / (self.patch_radius * 2 ) # at most 5px self.photo_transform = transforms.Compose([ transforms.ToPILImage(), transforms.ColorJitter(0.2, (0.9, 1.001), 0.2, 0.2), transforms.RandomAffine(22.5, (translation_frac, translation_frac), shear=5), transforms.CenterCrop(self.patch_radius * 2), transforms.ToTensor() ]) if self.photo_jitter: self.prcoef = 1.25 else: self.prcoef = 1 # FIXME: remove since this is unneeded for training and is slow. Just for research. #self.saveDensityPointcloud() else: raise RuntimeError("No dataset items at specified location.")
def rotation(degree): return transforms.RandomAffine(degrees=degree)
def __init__(self): # TODO: try RandomPerspective and Normalize self.affine = transforms.RandomAffine(degrees=45, scale=(0.8, 1.6)) self.flip = transforms.RandomHorizontalFlip(0.5) self.perspective = transforms.RandomPerspective()
accuracy = 100. * correct.to(torch.float32) / len(loader.dataset) print('Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format( loss, correct, len(loader.dataset), accuracy)) if log is not None and epoch is not None: log.add_scalar('val_loss', loss, epoch-1) log.add_scalar('val_acc', accuracy, epoch-1) input_image_size = (150, 150) data_transform = transforms.Compose([ transforms.Resize(input_image_size), transforms.RandomAffine(degrees=0, translate=None, scale=(0.8, 1.2), shear=0.2), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) noop_transform = transforms.Compose([ transforms.Resize(input_image_size), transforms.ToTensor() ]) def get_train_loader(batch_size=25): print('Train: ', end="") train_dataset = datasets.ImageFolder(root=datapath+'/train', transform=data_transform) train_loader = DataLoader(train_dataset, batch_size=batch_size,
track_git=False) if torch.cuda.is_available() and not args.use_gpu: logger.info( 'You have a GPU device so you should probably run with --use_gpu') device = torch.device('cpu') elif torch.cuda.is_available() and args.use_gpu: device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') else: device = torch.device('cpu') logger.info('Running with device %s', device) logger.info('Creates datasets') train_transform = transforms.Compose([ transforms.RandomAffine(0, translate=(0, 0.1), scale=(1, 1.10)), transforms.RandomRotation((-20, 20)), transforms.ToTensor(), ]) transform = torchvision.transforms.Compose( [torchvision.transforms.ToTensor()]) train_dataset = XrayImageFolder(os.path.join(args.root_dir, 'train'), transform=train_transform) val_dataset = XrayImageFolder(os.path.join(args.root_dir, 'val'), transform=transform) test_dataset = XrayImageFolder(os.path.join(args.root_dir, 'test'), transform=transform) train_dataloader = DataLoader(train_dataset,