def create_numpy_dataset(num_images, train_loader, take_count=-1): datasets = [] labels = [] if num_images is None: num_images = len(train_loader) for i, data in enumerate(train_loader): data_numpy = data[0].numpy() label_numpy = data[1].numpy() label_numpy = np.squeeze(label_numpy) data_numpy = np.squeeze(data_numpy) datasets.append(data_numpy) labels.append(label_numpy) if i == (num_images - 1): break datasets = np.array(datasets) labels = np.array(labels) if len(datasets.shape) == 3: # the input image is grayscale image datasets = np.expand_dims(datasets, axis=1) print('Numpy dataset shape is {}'.format(datasets.shape)) if take_count != -1: return datasets[:take_count], labels[:take_count] else: return datasets, labels
def load_dataset(self, split, **kwargs): """Load a given dataset split. Args: split (str): name of the split (e.g., train, valid, test) """ path = self.args.data if not os.path.exists(path): raise FileNotFoundError("Dataset not found: ({})".format(path)) files = [os.path.join(path, f) for f in os.listdir(path)] if os.path.isdir(path) else [path] files = sorted([f for f in files if split in f]) # # debug if self.args.num_file > 0: files = files[0:self.args.num_file] assert len(files) > 0, "no suitable file in split ***{}***".format( split) datasets = [] for i, f in enumerate(files): datasets.append(BertH5pyData(f)) dataset = ConBertH5pyData(datasets) print('| loaded {} sentences from: {}'.format(len(dataset), path), flush=True) self.datasets[split] = dataset print('| loading finished')
def generate_curves( self, kernel_name, noise, num_instances_train=None, num_instances_vali=None, num_instances_test=None, purpose=None, **kwargs): x_values = self._create_shuffled_linspace() kernel = torch.tensor( pairwise.pairwise_kernels( x_values, x_values, kernel_name, kwargs), dtype=torch.float64) kernel = kernel + torch.eye(self._steps) * noise datasets = [] if purpose == 'train': num_instances = num_instances_train elif purpose == 'vali': num_instances = num_instances_vali elif purpose == 'test': num_instances = num_instances_test for _ in range(num_instances): # creating as many standard standard_normals = torch.normal(0, 1, (self._steps, 1)) func_x = kernel.float() @ standard_normals.float() datasets.append(func_x) datasets = Helper.list_np_to_tensor(datasets) x_values = x_values.repeat(datasets.shape[0], 1, 1) return x_values, datasets
def generate_curves(self, noise=1e-4, length_scale=0.4, gamma=1, num_instances_train=None, num_instances_vali=None, num_instances_test=None, purpose=None): datasets = [] if purpose == 'train': num_instances = num_instances_train x_values = Helper.scale_shift_uniform( self._xmin, self._xmax, *(self._steps, self._xdim)).float() elif purpose == 'vali': num_instances = num_instances_vali x_values = self._create_shuffled_linspace() elif purpose == 'test': num_instances = num_instances_test x_values = self._create_shuffled_linspace() kernel = self._rbf_kernel(length_scale, gamma, x_values) kernel = kernel + torch.eye(self._steps) * noise cholesky_decomp = torch.cholesky(kernel) for _ in range(num_instances): # creating as many standard standard_normals = torch.normal(0, 1, (self._steps, 1)) func_x = cholesky_decomp @ standard_normals datasets.append(func_x) datasets = Helper.list_np_to_tensor(datasets) x_values = x_values.repeat(datasets.shape[0], 1, 1) return x_values, datasets
def split_dataset_by_labels(X, y, task_labels, nb_classes=None, multihead=False): """Split dataset by labels. Args: X: data y: labels task_labels: list of list of labels, one for each dataset nb_classes: number of classes (used to convert to one-hot) Returns: List of (X, y) tuples representing each dataset """ if nb_classes is None: nb_classes = len(np.unique(y)) datasets = [] for labels in task_labels: idx = np.in1d(y, labels) if multihead: label_map = np.arange(nb_classes) label_map[labels] = np.arange(len(labels)) data = X[idx], np_utils.to_categorical(label_map[y[idx]], len(labels)) else: data = X[idx], np_utils.to_categorical(y[idx], nb_classes) datasets.append(data) return datasets
def build_set(queue, triplet_builder, log): while 1: datasets = [] for i in range(15): dataset = triplet_builder.build_set() datasets.append(dataset) dataset = ConcatDataset(datasets) # log.info('Created {0} triplets'.format(len(dataset))) queue.put(dataset)
def build_set(self, queue, triplet_builder, log): while 1: datasets = [] for i in range(TRAIN_SEQS_PER_EPOCH): dataset = triplet_builder.build_set() datasets.append(dataset) dataset = ConcatDataset(datasets) # log.info('Created {0} triplets'.format(len(dataset))) queue.put(dataset)
def create_numpy_dataset(): datasets = [] for data in train_loader: data_numpy = data[0].numpy() data_numpy = np.squeeze(data_numpy) datasets.append(data_numpy) datasets = np.array(datasets) datasets=np.expand_dims(datasets,axis=1) print('Numpy dataset shape is {}'.format(datasets.shape)) return datasets[:1000]
def split_dataset_by_labels(dataset, task_labels): datasets = [] task_idx = 0 for labels in task_labels: idx = np.in1d(dataset.targets, labels) splited_dataset = copy.deepcopy(dataset) splited_dataset.targets = splited_dataset.targets[idx] splited_dataset.data = splited_dataset.data[idx] datasets.append(splited_dataset) task_idx += 1 return datasets
def load_data(path='data/pan19-author-profiling-20200229/training/en/', num_classes=3): ''' Reads tweets and truth from the given path. Returns a list of lists with tweets of a users and a list of the gender of the corresponding user (labels) Parameters: path (str): path of the dataset Returns: (list, list): tuple of texts and genders ''' datasets = [] for file in os.listdir(path): if file.endswith(".xml"): datasets.append(file) tweets = {} for dataset in datasets: root = ET.parse(os.path.join(path, dataset)).getroot() tweet_texts = [] # get text from tweets for type_tag in root.findall('documents/document'): text = type_tag.text tweet_texts.append(text) user_id = re.findall(r"(.*)\.xml", dataset)[0] tweets[user_id] = tweet_texts labels = {} # get truth with open(os.path.join(path, 'truth.txt')) as f: for line in f: user_id, _, gender = re.findall( r'([A-Za-z0-9]*):::(human|bot):::([a-z]*)', line)[0] labels[user_id] = gender # create lists for input and output x, y = ([] for i in range(2)) # torch needs integer as output class # if num_classes==3: # class_lables = {"bot":0, "female":1, "male":2} # else: # class_lables = {"bot":0, "female":1, "male":1} class_lables = {"bot": 0, "female": 1, "male": 2} for key, value in tweets.items(): x.append(value) y.append(class_lables[labels[key]]) return x, y
def append_adversarial_samples(args, data_loader, adv_data, adv_labels): datasets = data_loader.dataset.datasets dataset_adv = AdvDataset(adv_data, adv_labels) datasets.append(dataset_adv) dataset = ConcatDataset(datasets) loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) return loader
def create_numpy_dataset(num_images, train_loader): datasets = [] if num_images is None: num_images = len(train_loader) for i, data in enumerate(train_loader): data_numpy = data[0].numpy() data_numpy = np.squeeze(data_numpy) datasets.append(data_numpy) if i == (num_images - 1): break datasets = np.array(datasets) if len(datasets.shape) == 3: # the input image is grayscale image datasets = np.expand_dims(datasets, axis=1) return datasets
def create_numpy_dataset(opt): ''' @ Original 28x28 is rescaled to 32x32 to meet 2^P size @ batch_size and workders can be increased for faster loading ''' print torch.__version__ train_batch_size = opt.train_batch_size test_batch_size = opt.test_batch_size kwargs = {} train_loader = data_utils.DataLoader(MNIST(root='./data', train=True, process=False, transform=transforms.Compose([ transforms.Scale((32, 32)), transforms.ToTensor(), ])), batch_size=train_batch_size, shuffle=True, **kwargs) test_loader = data_utils.DataLoader(MNIST(root='./data', train=False, process=False, transform=transforms.Compose([ transforms.Scale((32, 32)), transforms.ToTensor(), ])), batch_size=test_batch_size, shuffle=True, **kwargs) # create numpy dataset datasets = [] labels = [] for data, label in train_loader: data_numpy = data.numpy() label_numpy = label.numpy() datasets.append(data_numpy) labels.append(label_numpy) datasets.append(data_numpy) datasets = np.concatenate(datasets, axis=0) labels = np.concatenate(labels, axis=0) print 'Create numpy dataset done, size: {}'.format(datasets.shape) return datasets[:opt.loadSize], labels[:opt.loadSize]
def get_train_dataloader(args): dataset_list = args.source assert isinstance(dataset_list, list) datasets = [] val_datasets = [] img_transformer = get_train_transformers(args) limit = args.limit_source for dname in dataset_list: if dname in digits_datasets: return get_digital_train_dataloader(args, dname) name_train, name_val, labels_train, labels_val = get_split_dataset_info( join(dirname(__file__), 'txt_lists', '%s_train.txt' % dname), args.val_size) train_dataset = JigsawDataset(name_train, labels_train, img_transformer=img_transformer) if limit: train_dataset = Subset(train_dataset, limit) datasets.append(train_dataset) val_datasets.append( JigsawDataset(name_val, labels_val, img_transformer=get_val_transformer(args))) dataset = ConcatDataset(datasets) val_dataset = ConcatDataset(val_datasets) loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True, drop_last=False) return loader, val_loader
def _split_dataset(self, dataset, class_split_order): task_length = len(class_split_order) // args.num_tasks # Used to map from random task_length classes in {0...1000} -> {0,1...task_length} tiled_class_map = np.tile(np.arange(task_length), args.num_tasks) inv_class_split_order = np.argsort(class_split_order) class_map = tiled_class_map[inv_class_split_order] # Constructing class splits paths, targets = zip(*dataset.samples) paths = np.array(paths) targets = np.array(targets) print("==> Extracting per class paths") class_samples = [ list(zip(paths[targets == c], class_map[targets[targets == c]])) for c in range(1000) ] datasets = [] print(f"==> Splitting dataset into {args.num_tasks} tasks") for i in range(0, 1000, task_length): task_classes = class_split_order[i : i + task_length] samples = [] for c in task_classes: samples.append(class_samples[c]) redataset = copy(dataset) redataset.samples = list(chain.from_iterable(samples)) datasets.append(redataset) return datasets
def load_dataset(path): """Load dataset in local. Load test image in local. Folder Tree Path ├─ Label 1 | ├─ Image 1 | ├─ Image 2 | └─ Image 3 | └─ Label 2 └─ Image 1 Args: path (str): Directory path of dataset in Local Returns: List[(tensor.torch, int)]: dataset Example:: get_dataset(./hoge) """ datasets = [] labels = os.listdir(path) for label in tqdm(labels, desc=" Label ", ascii=True): files = os.listdir(path + "/" + label) for file in tqdm(files, desc=" Data ", ascii=True): img = Image.open(path + "/" + label + "/" + file).convert("L") torch_img = transforms.ToTensor()(img) data = (torch_img, int(label)) datasets.append(data) print() return datasets
def separate_datasets(loader, dataset_type, Ktasks, folder, use_task_ids=False): fts = [] labels = [] task_labels = [] if use_task_ids: for i, (ft, tar, task) in enumerate(loader): fts.append(ft) labels.append(tar) task_labels.append(task) else: for i, (ft, tar) in enumerate(loader): fts.append(ft) labels.append(tar) task_labels.append(tar) all_fts = torch.cat(fts, dim=0) all_labels = torch.cat(labels, dim=0) all_task_labels = torch.cat(task_labels, dim=0) datasets = [] for task_lb in range(Ktasks): mask = torch.eq(all_task_labels, task_lb) inds = torch.nonzero(mask).squeeze() dt = torch.index_select(all_fts, dim=0, index=inds) lbls = torch.index_select(all_labels, dim=0, index=inds) datasets.append(data_utils.TensorDataset(dt, lbls)) if not os.path.exists(folder): os.mkdir(folder) torch.save(datasets, folder + dataset_type + '.t')
opts = [] datasets = [] datasets_plain = [] for i in range(num_models): model = ResNet(kernel_size=filter_size, config=config, batch_size=batch, device=device) model.to(device) # model = torch.nn.DataParallel(model) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) dataloader = torch.utils.data.DataLoader(subsets[i], batch_size=batch, shuffle=True, num_workers=2) models.append(model) opts.append(optimizer) datasets.append(dataloader) central_model = ResNet(kernel_size=filter_size, config=config, batch_size=batch, device=device) central_model.to(device) central_opt = torch.optim.Adam(central_model.parameters(), lr=1e-4) shadow_models_set_A = [] shadow_opts_set_A = [] for i in range(num_models): model = ResNet(kernel_size=filter_size, config=config, batch_size=batch, device=device) model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) shadow_models_set_A.append(model)
def main_worker(gpu, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) print("=> creating model '{}'".format(args.arch)) if args.arch == 'vgg16': model = vgg16(pretrained=args.pretrained, use_deepaugment_realtime=True) model.classifier[-1] = torch.nn.Linear(4096, len(classes_chosen)) print(model) elif args.arch == 'vgg11': model = vgg11(pretrained=args.pretrained, use_deepaugment_realtime=True) model.classifier[-1] = torch.nn.Linear(4096, len(classes_chosen)) print(model) elif args.arch == 'resnet18': model = resnet18(pretrained=args.pretrained) model.fc = torch.nn.Linear(512, len(classes_chosen)) print(model) else: raise NotImplementedError() model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) # optionally resume from a checkpoint args.start_epoch = 0 if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] print('Start epoch:', args.start_epoch) best_acc1 = checkpoint['best_acc1'] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.data_standard == None: print("No Standard Data! Only using --data-distorted datasets") if args.data_distorted != None: if args.symlink_distorted_data_dirs: print("Mixing together data directories: ", args.data_distorted) train_dataset = torch.utils.data.ConcatDataset([ CombinedDistortedDatasetFolder( args.data_distorted, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), ImageNetSubsetDataset(args.data_standard, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.data_standard != None else [] ]) else: print( f"Concatenating Datasets {args.data_standard} and {args.data_distorted}" ) datasets = [ # args.data_standard ImageNetSubsetDataset(args.data_standard, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.data_standard != None else [] ] for distorted_data_dir in args.data_distorted: datasets.append( ImageNetSubsetDataset( distorted_data_dir, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]))) train_dataset = torch.utils.data.ConcatDataset(datasets) else: print(f"Only using Dataset {args.data_standard}") train_dataset = ImageNetSubsetDataset( args.data_standard, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=None) val_loader = torch.utils.data.DataLoader(ImageNetSubsetDataset( args.data_val, transform=transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True) def cosine_annealing(step, total_steps, lr_max, lr_min): return lr_min + (lr_max - lr_min) * 0.5 * ( 1 + np.cos(step / total_steps * np.pi)) scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda step: cosine_annealing( step, args.epochs * len(train_loader), 1, # since lr_lambda computes multiplicative factor 1e-6 / (args.lr * args.batch_size / 256.))) if args.start_epoch != 0: scheduler.step(args.start_epoch * len(train_loader)) if args.evaluate: validate(val_loader, model, criterion, args) return ########################################################################### ##### Main Training Loop ########################################################################### if not args.resume: with open(os.path.join(args.save, 'training_log.csv'), 'w') as f: f.write( 'epoch,train_loss,train_acc1,train_acc5,val_loss,val_acc1,val_acc5\n' ) for epoch in range(args.start_epoch, args.epochs): # train for one epoch train_losses_avg, train_top1_avg, train_top5_avg = train( train_loader, model, criterion, optimizer, scheduler, epoch, args) print("Evaluating on validation set") # evaluate on validation set val_losses_avg, val_top1_avg, val_top5_avg = validate( val_loader, model, criterion, args) print("Finished Evaluating on validation set") # Save results in log file with open(os.path.join(args.save, 'training_log.csv'), 'a') as f: f.write( '%03d,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f\n' % ((epoch + 1), train_losses_avg, train_top1_avg, train_top5_avg, val_losses_avg, val_top1_avg, val_top5_avg)) # remember best acc@1 and save checkpoint is_best = val_top1_avg > best_acc1 best_acc1 = max(val_top1_avg, best_acc1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best)
# Make Syft federated dataset client_datapair_dict = {} datasets = [] logging.info("Load federated dataset") for client_id in client_ids: tmp_path = federated_path + '/hospital_' + str(client_id) + '.csv' x, y = eICU_data.get_train_data_from_hopital(client_id) client_datapair_dict["hospital_{}".format(client_id)] = (x, y) # client_data_list.append((pd.read_csv(federated_path + '/hospital_' + str(client_id) + '.csv')[predictive_attributes], ) for client_id in client_ids: tmp_tuple = client_datapair_dict["hospital_{}".format(client_id)] datasets.append( fl.BaseDataset( torch.tensor(tmp_tuple[0], dtype=torch.float32), torch.tensor(tmp_tuple[1].squeeze(), dtype=torch.long)).send( virtual_workers["hospital_{}".format(client_id)])) fed_dataset = sy.FederatedDataset(datasets) fdataloader = sy.FederatedDataLoader(fed_dataset, batch_size=args["batch_size"]) # Load test data if args['split_strategy'] == 'trainN_testN': x, y = eICU_data.get_full_test_data() if args['split_strategy'] == 'trainNminus1_test1': x, y = eICU_data.get_test_data_from_hopital(args['test_hospital_id']) x_pt = torch.tensor(x, dtype=torch.float32) # transform to torch tensor y_pt = torch.tensor(y.squeeze(), dtype=torch.long) my_dataset = TensorDataset(x_pt, y_pt) # create your datset
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) model.fc = torch.nn.Linear(512, 100) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # Define advnet resnet # advnet = ResNet( # epsilon=args.advnet_epsilon, # advnet_norm_factor=args.advnet_norm_factor # ).cuda() # advnet = torch.nn.DataParallel(advnet).cuda() advnet = ParallelResNet(epsilon=args.advnet_epsilon, advnet_norm_factor=args.advnet_norm_factor) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) # Not all parameters are trainable advnet_trainable_params = dict(list(advnet.named_parameters())) # del advnet_trainable_params['module.block1.0.weight'] # del advnet_trainable_params['module.block1.0.bias'] # del advnet_trainable_params['module.block1.9.weight'] # del advnet_trainable_params['module.block1.9.bias'] # del advnet_trainable_params['module.block2.0.weight'] # del advnet_trainable_params['module.block2.0.bias'] # del advnet_trainable_params['module.block2.9.weight'] # del advnet_trainable_params['module.block2.9.bias'] optimizer_advnet = [ torch.optim.SGD(advnet.blocks[i].parameters(), args.lr_advnet, momentum=args.momentum_advnet, weight_decay=args.weight_decay_advnet, nesterov=True) for i in range(len(classes_chosen)) ] # optionally resume from a checkpoint args.start_epoch = 0 if False: #args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] print('Start epoch:', args.start_epoch) best_acc1 = checkpoint['best_acc1'] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.data_standard == None: print("No Standard Data! Only using --data-distorted datasets") if args.data_distorted != None: if args.symlink_distorted_data_dirs: print("Mixing together data directories: ", args.data_distorted) train_dataset = torch.utils.data.ConcatDataset([ CombinedDistortedDatasetFolder( args.data_distorted, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])), ImageNetSubsetDataset(args.data_standard, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.data_standard != None else [] ]) else: print( f"Concatenating Datasets {args.data_standard} and {args.data_distorted}" ) datasets = [ # args.data_standard ImageNetSubsetDataset(args.data_standard, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.data_standard != None else [] ] for distorted_data_dir in args.data_distorted: datasets.append( ImageNetSubsetDataset( distorted_data_dir, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]))) train_dataset = torch.utils.data.ConcatDataset(datasets) else: print(f"Only using Dataset {args.data_standard}") train_dataset = ImageNetSubsetDataset( args.data_standard, transform=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(ImageNetSubsetDataset( args.data_val, transform=transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True) def cosine_annealing(step, total_steps, lr_max, lr_min): return lr_min + (lr_max - lr_min) * 0.5 * ( 1 + np.cos(step / total_steps * np.pi)) scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda step: cosine_annealing( step, args.epochs * len(train_loader), 1, # since lr_lambda computes multiplicative factor 1e-6 / (args.lr * args.batch_size / 256.))) scheduler_advnet = None if args.start_epoch != 0: scheduler.step(args.start_epoch * len(train_loader)) # scheduler_advnet.step(args.start_epoch * len(train_loader)) if args.evaluate: validate(val_loader, model, criterion, args) return ########################################################################### ##### Main Training Loop ########################################################################### with open(os.path.join(args.save, 'training_log.csv'), 'w') as f: f.write( 'epoch,train_loss,train_acc1,train_acc5,val_loss,val_acc1,val_acc5\n' ) with open(os.path.join(args.save, 'command.txt'), 'w') as f: import pprint pprint.pprint(vars(args), stream=f) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) # train for one epoch train_losses_avg, train_top1_avg, train_top5_avg = train( train_loader, model, advnet, criterion, optimizer, scheduler, optimizer_advnet, scheduler_advnet, epoch, args) print("Evaluating on validation set") # evaluate on validation set val_losses_avg, val_top1_avg, val_top5_avg = validate( val_loader, model, criterion, args) print("Finished Evaluating on validation set") # Save results in log file with open(os.path.join(args.save, 'training_log.csv'), 'a') as f: f.write( '%03d,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f\n' % ((epoch + 1), train_losses_avg, train_top1_avg, train_top5_avg, val_losses_avg, val_top1_avg, val_top5_avg)) # remember best acc@1 and save checkpoint is_best = val_top1_avg > best_acc1 best_acc1 = max(val_top1_avg, best_acc1) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), 'advnet_state_dict': advnet.state_dict(), }, is_best)
# print("omg") image = self.color_jitter(image) img_yuv = image.convert('YCbCr') img_yuv = transforms.functional.resize(img_yuv, (224, 224)) img_yuv = transforms.functional.to_tensor(img_yuv) img_yuv = img_yuv.numpy()[::-1].copy() img_yuv = torch.from_numpy(img_yuv) img_yuv = transforms.functional.normalize(img_yuv, [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) return img_yuv, torch.tensor([x, y]).float() datasets = [] datasets.append(XYDataset("Final dataset/test/dataset_xy", random_hflips=False)) # dataset = XYDataset("Final dataset/dataset_xy", random_hflips=False) for folder in os.listdir("Final dataset/test/Augmentation"): # print(folder) datasets.append( XYDataset("Final dataset/test/Augmentation/" + folder, random_hflips=False)) # read in our file if (entry.path.endswith(".jpg") dataset = D.ConcatDataset(datasets) # dataset = XYDataset("Final dataset/test/dataset_xy", random_hflips=False) if __name__ == '__main__':
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) print("=> creating model '{}'".format(args.arch)) model = models.__dict__[args.arch](pretrained=True) model.fc = torch.nn.Linear(2048, len(classes_chosen)) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: raise NotImplementedError() # torch.cuda.set_device(args.gpu) # model.cuda(args.gpu) # # When using a single GPU per process and per # # DistributedDataParallel, we need to divide the batch size # # ourselves based on the total number of GPUs we have # args.batch_size = int(args.batch_size / ngpus_per_node) # args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) # model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) else: # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model.cuda() model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): raise NotImplementedError() # model.features = torch.nn.DataParallel(model.features) # model.cuda() else: model = torch.nn.DataParallel(model).cuda() args.start_epoch = 0 if args.classifier_pretrained_path != None: if os.path.isfile(args.classifier_pretrained_path): print("=> loading checkpoint '{}'".format(args.classifier_pretrained_path)) sd = torch.load(args.classifier_pretrained_path)['state_dict'] del sd['module.conv1.weight'] del sd['module.fc.weight'] del sd['module.fc.bias'] model.load_state_dict(sd, strict=False) print("=> loaded checkpoint '{}'".format(args.classifier_pretrained_path)) else: print("=> no checkpoint found at '{}'".format(args.classifier_pretrained_path)) raise Exception() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) cudnn.benchmark = True # Data loading code if args.data_standard == None: print("No Standard Data! Only using --data-distorted datasets") transform_train_standard = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) if args.distorted_data_simple_transform: print("Using SIMPLE train transform for distorted data") transform_train_distorted = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), ]) else: print("Using STANDARD train transform for distorted data") transform_train_distorted = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) if args.data_distorted != None: if args.symlink_distorted_data_dirs: print("Mixing together data directories: ", args.data_distorted) train_dataset = torch.utils.data.ConcatDataset([ CombinedDistortedDatasetFolder( args.data_distorted, transform=transform_train_distorted ), ImageNetSubsetDataset( args.data_standard, transform=transform_train_standard ) if args.data_standard != None else [] ]) else: print(f"Concatenating Datasets {args.data_standard} and {args.data_distorted}") datasets = [ # args.data_standard ImageNetSubsetDataset( args.data_standard, transform=transform_train_standard ) if args.data_standard != None else [] ] for distorted_data_dir in args.data_distorted: datasets.append( ImageNetSubsetDataset( distorted_data_dir, transform=transform_train_distorted ) ) train_dataset = torch.utils.data.ConcatDataset(datasets) else: print(f"Only using Dataset {args.data_standard}") train_dataset = ImageNetSubsetDataset( args.data_standard, transform=transform_train_standard ) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader( ImageNetSubsetDataset( args.data_val, transform=transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), ]) ), batch_size=args.batch_size_val, shuffle=False, num_workers=args.workers, pin_memory=True) def cosine_annealing(step, total_steps, lr_max, lr_min): return lr_min + (lr_max - lr_min) * 0.5 * ( 1 + np.cos(step / total_steps * np.pi)) scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda step: cosine_annealing( step, args.epochs * len(train_loader), 1, # since lr_lambda computes multiplicative factor 1e-6 / (args.lr * args.batch_size / 256.))) if args.start_epoch != 0: scheduler.step(args.start_epoch * len(train_loader)) if args.evaluate: validate(val_loader, model, criterion, args) return ########################################################################### ##### Main Training Loop ########################################################################### with open(os.path.join(args.save, 'training_log.csv'), 'w') as f: f.write('epoch,train_loss,train_acc1,train_acc5,val_loss,val_acc1,val_acc5\n') for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) # train for one epoch train_losses_avg, train_top1_avg, train_top5_avg = train(train_loader, model, criterion, optimizer, scheduler, epoch, args) print("Evaluating on validation set") # evaluate on validation set val_losses_avg, val_top1_avg, val_top5_avg = validate(val_loader, model, criterion, args) print("Finished Evaluating on validation set") # Save results in log file with open(os.path.join(args.save, 'training_log.csv'), 'a') as f: f.write('%03d,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f,%0.5f\n' % ( (epoch + 1), train_losses_avg, train_top1_avg, train_top5_avg, val_losses_avg, val_top1_avg, val_top5_avg )) # remember best acc@1 and save checkpoint is_best = val_top1_avg > best_acc1 best_acc1 = max(val_top1_avg, best_acc1) if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer' : optimizer.state_dict(), }, is_best)
def test_all_datasets(args): #for each dataset, we have a dictionary, that contains # - the name # - the Parameters directory # - name of data (***_names.txt file in Param directory) # - the batch size for testing test_dir = join(args.root_dir, 'Doc/Test_all/') if not exists(test_dir): os.makedirs(test_dir) datasets = [] MSRAB = ECSSD = DUT = SED2 = THUR = False MSRAB = True ECSSD = True DUT = True SED2 = True ''' THUR = True ''' if not args.resume: args.resume = join(args.root_dir + 'Doc/Phase_II_Fusion/checkpoint_latest.pth.tar') #01_MSRAB if MSRAB: datasets.append({ \ 'name' : '01_MSRAB', \ 'param_dir' : '/media/bigData/_80_User/Dax/UnsupSD/SD_beta/Data/01_MSRAB/Parameters/', \ 'data_prefix' : 'test', \ 'batch_size' : 1 \ }) #02_ECSSD if ECSSD: datasets.append({ \ 'name' : '02_ECSSD', \ 'param_dir' : '/media/bigData/_80_User/Dax/UnsupSD/SD_beta/Data/02_ECSSD/Parameters/', \ 'data_prefix' : 'all', \ 'batch_size' : 1 \ }) #03_DUT if DUT: datasets.append({ \ 'name' : '03_DUT', \ 'param_dir' : '/media/bigData/_80_User/Dax/UnsupSD/SD_beta/Data/03_DUT/Parameters/', \ 'data_prefix' : 'all', \ 'batch_size' : 1 \ }) #04_SED2 if SED2: datasets.append({ \ 'name' : '04_SED2', \ 'param_dir' : '/media/bigData/_80_User/Dax/UnsupSD/SD_beta/Data/04_SED2/Parameters/', \ 'data_prefix' : 'all', \ 'batch_size' : 1 \ }) #06_THUR if THUR: datasets.append({ \ 'name' : '06_THUR', \ 'param_dir' : '/media/bigData/_80_User/Dax/UnsupSD/SD_beta/Data/06_THUR/Parameters/', \ 'data_prefix' : 'GT', \ 'batch_size' : 1 \ }) #Iterate through the dictionaries and test each dataset for dataset in datasets: #set correct arguments args.dataset_name = dataset['name'] args.data_dir = dataset['param_dir'] args.test_data = dataset['data_prefix'] args.batch_size = dataset['batch_size'] DOC = test_saliency(args) dataset['Result'] = DOC print("\n\n\t\t\tMAE\t\tF\t\tprecision\trecall") for dataset in datasets: print("{name}: \t\t{DOC.L1_GT.avg:.3f}\t\t{DOC.F_GT.avg:.3f}\t\t{DOC.prec_GT.avg:.3f}\t\t{DOC.recall_GT.avg:.3f}"\ .format(name=dataset['name'], DOC=dataset['Result']) ) result_file = join(test_dir, 'Test_Results.txt') f = open(result_file, 'a') f.write("\t\t\tMAE\t\tF\t\tprecision\trecall\n") for dataset in datasets: f.write("{name}: \t\t{DOC.L1_GT.avg:.5f}\t\t{DOC.F_GT.avg:.5f}\t\t{DOC.prec_GT.avg:.5f}\t\t{DOC.recall_GT.avg:.5f}\n"\ .format(name=dataset['name'], DOC=dataset['Result']) ) f.close()