#optuna.logging.disable_default_handler() from tqdm import tqdm_notebook as tqdm BATCHSIZE = 128 transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, ))]) train_set = MNIST(root='./data', train=True, download=True, transform=transform) subset1_indices = list(range(0, 6000)) train_set = Subset(train_set, subset1_indices) train_loader = DataLoader(train_set, batch_size=BATCHSIZE, shuffle=True, num_workers=2) subset2_indices = list(range(0, 1000)) test_set = MNIST(root='./data', train=False, download=True, transform=transform) test_set = Subset(test_set, subset2_indices) test_loader = DataLoader(test_set, batch_size=BATCHSIZE, shuffle=False, num_workers=2)
override_sample_function_name=flags.override_sample_function_name, ) if flags.include_valid: train_agent_dataset2 = FasterAgentDataset( cfg, valid_zarr, rasterizer, min_frame_history=flags.min_frame_history, min_frame_future=flags.min_frame_future, override_sample_function_name=flags.override_sample_function_name, ) print("Before Concat: ", len(train_agent_dataset), len(train_agent_dataset2)) train_agent_dataset = ConcatDataset([train_agent_dataset, train_agent_dataset2]) print("After Concat: ", len(train_agent_dataset)) train_dataset = TransformDataset(train_agent_dataset, transform) if debug: # Only use 1000 dataset for fast check... train_dataset = Subset(train_dataset, np.arange(1000)) if is_mpi: if flags.scene_sampler: assert isinstance(train_agent_dataset, FasterAgentDataset) train_sampler = DistributedSceneSampler( get_frame_arguments=train_agent_dataset.get_frame_arguments, min_state_index=flags.scene_sampler_min_state_index, shuffle=bool(train_cfg["shuffle"]) ) else: train_sampler = ResumableDistributedSampler(train_dataset, shuffle=bool(train_cfg["shuffle"])) else: if flags.scene_sampler: assert isinstance(train_agent_dataset, FasterAgentDataset) assert bool(train_cfg["shuffle"]) train_sampler = SceneSampler(
def get_train_val_loaders( summary_data_filepath: Path, train_transforms: Callable, val_transforms: Callable, train_preprocessing: Optional[Callable] = None, val_preprocessing: Optional[Callable] = None, batch_size: int = 16, num_workers: int = 8, limit_train_num_samples: Optional[int] = None, limit_val_num_samples: Optional[int] = None, drop_empty_images: Optional[bool] = True ) -> Tuple[DataLoader, DataLoader, DataLoader]: summary_data_df = pd.read_csv(summary_data_filepath) print(summary_data_df.shape) if drop_empty_images: summary_data_df = summary_data_df[summary_data_df.has_building] print(summary_data_df.shape) train_image_filepaths = summary_data_df[summary_data_df.train_val_test == "train"].image_filepath.values train_mask_filepaths = summary_data_df[summary_data_df.train_val_test == "train"].mask_filepath.values valid_image_filepaths = summary_data_df[summary_data_df.train_val_test == "valid"].image_filepath.values valid_mask_filepaths = summary_data_df[summary_data_df.train_val_test == "valid"].mask_filepath.values test_image_filepaths = summary_data_df[summary_data_df.train_val_test == "test"].image_filepath.values test_mask_filepaths = summary_data_df[summary_data_df.train_val_test == "test"].mask_filepath.values train_ds = SatelliteSegmentationDataset( image_filepath_list=train_image_filepaths, mask_filepath_list=train_mask_filepaths, transform=train_transforms, preprocessing=train_preprocessing) val_ds = SatelliteSegmentationDataset( image_filepath_list=valid_image_filepaths, mask_filepath_list=valid_mask_filepaths, transform=val_transforms, preprocessing=val_preprocessing) test_ds = SatelliteSegmentationDataset( image_filepath_list=test_image_filepaths, mask_filepath_list=test_mask_filepaths, transform=val_transforms, preprocessing=val_preprocessing) if limit_train_num_samples is not None: np.random.seed(limit_train_num_samples) train_indices = np.random.permutation( len(train_ds))[:limit_train_num_samples] train_ds = Subset(train_ds, train_indices) if limit_val_num_samples is not None: np.random.seed(limit_val_num_samples) val_indices = np.random.permutation( len(val_ds))[:limit_val_num_samples] val_ds = Subset(val_ds, val_indices) # random samples for evaluation on training dataset # if len(val_ds) < len(train_ds): # np.random.seed(len(val_ds)) # train_eval_indices = np.random.permutation(len(train_ds))[: len(val_ds)] # test_ds = Subset(train_ds, train_eval_indices) # else: # test_ds = test_ds train_loader = DataLoader( train_ds, shuffle=True, batch_size=batch_size, num_workers=num_workers, drop_last=True, ) val_loader = DataLoader( val_ds, shuffle=False, batch_size=batch_size, num_workers=num_workers, drop_last=False, ) test_loader = DataLoader( test_ds, shuffle=False, batch_size=batch_size, num_workers=num_workers, drop_last=False, ) return train_loader, val_loader, test_loader # def get_inference_dataloader( # image_dir: Path, # mask_dir: Optional[Path] = None, # summary_data_filepath: Optional[Path] = None, # transforms: Optional[Callable] = None, # preprocessing: Optional[Callable] = None, # batch_size: int = 16, # num_workers: int = 8, # drop_empty_images: Optional[bool] = True, # pin_memory: bool = True, # limit_num_samples: Optional[int] = None, # ) -> DataLoader: # # summary_data_df = pd.read_csv(summary_data_filepath) # if drop_empty_images: # summary_data_df = summary_data_df[summary_data_df.PolygonWKT_Geo != "POLYGON EMPTY"] # image_ids = summary_data_df.ImageId.unique() # # ds = SatelliteSegmentationDataset( # image_dir, mask_dir, image_id_list=image_ids, # transform=transforms, preprocessing=preprocessing) # # if limit_num_samples is not None: # indices = np.random.permutation(len(ds))[:limit_num_samples] # dataset = Subset(ds, indices) # # loader = DataLoader( # dataset, shuffle=False, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=False # ) # return loader
def subset(self, skorch_ds): from torch.utils.data.dataset import Subset return Subset(skorch_ds, [1, 3])
os.environ["L5KIT_DATA_FOLDER"] = flags.l5kit_data_folder dm = LocalDataManager(None) print('=' * 10 + 'Loading Training Data' + '=' * 10) train_cfg = cfg["train_data_loader"] # Rasterizer rasterizer = build_rasterizer(cfg, dm) # Train dataset/dataloader train_path = "scenes/sample.zarr" if debug else train_cfg["key"] train_zarr = ChunkedDataset(dm.require(train_path)).open() train_agent_dataset = AgentDataset(cfg, train_zarr, rasterizer) train_dataset = TransformDataset(train_agent_dataset, transform) if debug: # Only use subset dataset for fast check... train_dataset = Subset( train_dataset, np.arange(cfg["train_data_loader"]["batch_size"] * 40)) train_loader = DataLoader(train_dataset, shuffle=train_cfg["shuffle"], batch_size=train_cfg["batch_size"], num_workers=train_cfg["num_workers"]) print(train_agent_dataset) # GENERATE AND LOAD CHOPPED DATASET print('=' * 10 + 'Loading Validation' + '=' * 10) valid_cfg = cfg["valid_data_loader"] valid_path = "scenes/sample.zarr" if debug else valid_cfg["key"] num_frames_to_chop = 100 MIN_FUTURE_STEPS = 10 valid_base_path = create_chopped_dataset( dm.require(valid_path),
print(f'corruption {corrupt_type} : {corrupt_level}') from imagenet_c import corrupt if not corrupt_type.isdigit(): ts.insert(corrupt_idx, lambda img: PIL.Image.fromarray(corrupt(np.array(img), corrupt_level, corrupt_type))) else: ts.insert(corrupt_idx, lambda img: PIL.Image.fromarray(corrupt(np.array(img), corrupt_level, None, int(corrupt_type)))) transform_test = transforms.Compose(ts) testset = ImageNet(root='/data/public/rw/datasets/imagenet-pytorch', split='val', transform=transform_test) sss = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=0) for _ in range(1): sss = sss.split(list(range(len(testset))), testset.targets) train_idx, valid_idx = next(sss) testset = Subset(testset, valid_idx) testloader = torch.utils.data.DataLoader(testset, batch_size=args.test_batch, shuffle=False, num_workers=32, pin_memory=True, drop_last=False) metric = Accumulator() dl_test = tqdm(testloader) data_id = 0 tta_rule_cnt = [0] * tta_num for data, label in dl_test: data = data.view(-1, data.shape[-3], data.shape[-2], data.shape[-1]) data = data.cuda() with torch.no_grad(): preds = model_target(data) preds = torch.softmax(preds, dim=1)
def gen_val_datasets(self, transform=None, target_transform=None) -> Dataset: ds = cifar.CIFAR100(root=CIFAR100PATH, train=True, download=True, transform=transform, target_transform=target_transform) return Subset(OrderDataset(ds), list(range(40000, 50000)))
[len(dataset) - num_test, num_test]) train_set, validation_set = random_split( train_set, [len(train_set) - num_test, num_test]) loader_train, loader_validation, loader_test = [ StructureLoader(d, batch_size=hyperparams['batch_size']) for d in [train_set, validation_set, test_set] ] else: # Split the dataset print('Structural split') dataset_indices = {d['name']: i for i, d in enumerate(dataset)} with open(args.file_splits) as f: dataset_splits = json.load(f) train_set, validation_set, test_set = [ Subset(dataset, [ dataset_indices[chain_name] for chain_name in dataset_splits[key] ]) for key in ['train', 'validation', 'test'] ] loader_train, loader_validation, loader_test = [ StructureLoader(d, batch_size=hyperparams['batch_size']) for d in [train_set, validation_set, test_set] ] print('Training:{}, Validation:{}, Test:{}'.format(len(train_set), len(validation_set), len(test_set))) # Build basepath for experiment base_folder = strftime("log/%y%b%d_%I%M%p/", localtime()) if not os.path.exists(base_folder): os.makedirs(base_folder)
def get_train_val_loaders( root_path: str, train_transforms: Callable, val_transforms: Callable, batch_size: int = 16, num_workers: int = 8, val_batch_size: Optional[int] = None, pin_memory: bool = True, random_seed: Optional[int] = None, train_sampler: Optional[Union[Sampler, str]] = None, val_sampler: Optional[Union[Sampler, str]] = None, limit_train_num_samples: Optional[int] = None, limit_val_num_samples: Optional[int] = None, ) -> Tuple[DataLoader, DataLoader, DataLoader]: train_ds = ImageNet( root_path, split="train", transform=lambda sample: train_transforms(image=sample)["image"], loader=opencv_loader) val_ds = ImageNet( root_path, split="val", transform=lambda sample: val_transforms(image=sample)["image"], loader=opencv_loader) if limit_train_num_samples is not None: if random_seed is not None: np.random.seed(random_seed) train_indices = np.random.permutation( len(train_ds))[:limit_train_num_samples] train_ds = Subset(train_ds, train_indices) if limit_val_num_samples is not None: val_indices = np.random.permutation( len(val_ds))[:limit_val_num_samples] val_ds = Subset(val_ds, val_indices) # random samples for evaluation on training dataset if len(val_ds) < len(train_ds): train_eval_indices = np.random.permutation(len(train_ds))[:len(val_ds)] train_eval_ds = Subset(train_ds, train_eval_indices) else: train_eval_ds = train_ds if isinstance(train_sampler, str): assert train_sampler == "distributed" train_sampler = data_dist.DistributedSampler(train_ds) train_eval_sampler = None if isinstance(val_sampler, str): assert val_sampler == "distributed" val_sampler = data_dist.DistributedSampler(val_ds, shuffle=False) train_eval_sampler = data_dist.DistributedSampler(train_eval_ds, shuffle=False) train_loader = DataLoader( train_ds, shuffle=train_sampler is None, batch_size=batch_size, num_workers=num_workers, sampler=train_sampler, pin_memory=pin_memory, drop_last=True, ) val_batch_size = batch_size * 4 if val_batch_size is None else val_batch_size val_loader = DataLoader( val_ds, shuffle=False, sampler=val_sampler, batch_size=val_batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=False, ) train_eval_loader = DataLoader( train_eval_ds, shuffle=False, sampler=train_eval_sampler, batch_size=val_batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=False, ) return train_loader, val_loader, train_eval_loader
resize_transform = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor()]) train_and_valid = datasets.MNIST(root='../data', train=True, transform=resize_transform, download=True) test_dataset = datasets.MNIST(root='../data', train=False, transform=resize_transform, download=True) train_dataset = Subset(train_and_valid, train_indices) valid_dataset = Subset(train_and_valid, valid_indices) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=4, shuffle=True) valid_loader = DataLoader(dataset=valid_dataset, batch_size=BATCH_SIZE, num_workers=4, shuffle=False) test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, num_workers=4,
transforms.RandomHorizontalFlip(p=0.5), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) #create trainset and validation set train_set = GetData(TRAIN_DIR, X_Train, Y_Train, Transform) sample_num = len(train_set) train1_size_ind = int(sample_num * 0.45) train2_size_ind = int(sample_num * 0.9) subset1_indices = list(range(0, train1_size_ind)) subset2_indices = list(range(train1_size_ind, train2_size_ind)) subset3_indices = list(range(train2_size_ind, sample_num)) trainset1 = Subset(train_set, subset1_indices) trainset2 = Subset(train_set, subset2_indices) valset = Subset(train_set, subset3_indices) trainloader1 = DataLoader(trainset1, batch_size=BATCH, shuffle=True, num_workers=16) trainloader2 = DataLoader(trainset2, batch_size=BATCH, shuffle=True, num_workers=16) valloader = DataLoader(valset, batch_size=1, shuffle=False, num_workers=16) testset = GetData(TEST_DIR, X_Test, None, Transform) testloader = DataLoader(testset, batch_size=1, shuffle=False, num_workers=16) #define neural network models class Cnn_2(nn.Module): def __init__(self): super(Cnn_2, self).__init__() self.conv1 = nn.Conv2d(3, 32, 3)
def data_split(dataset, val_ratio=0.1, test_ratio=0.1, seed=1234): """Splits a dataset into separate training, validation, and testing sets, where the size of the val and test sets are (roughly) given by val_ratio and test_ratio. Splitting is done so that class priors of training and validation sets match the original data (this is also known as 'stratified sampling'). Note: size of val/test set may be bigger than ratio*original_data_size since this value is rounded up to an integer Note: still need to update this function to be more efficient (i.e. remove for-loops) Note: still need to add checks to make sure val and test sets don't exceed data size for extreme case where val_ratio+test_ratio=1 Args: dataset = The dataset to split val_ratio = The amount of the given dataset to use for the validation set, must be 0-1 (default 0.1) test_ratio = The amount of the given data set to use for the test set, must be 0-1 (default 0.1) * Also require: val_ratio+test_ratio<=1 (if =1, then there will be no training set!) seed = Seed for random shuffling of dataset (used for reproducibility) (default 1234) Returns: train = The training dataset val = The validation dataset test = The test dataset """ # How you grab the labels will depend on what type of Pytorch Dataset object 'dataset' is # (i.e. ImageFolder/DatasetFolder or not) # For fun, check the method resolution order (MRO) of 'dataset' print('Dataset object\'s inheritance: ', type(dataset).__mro__) # Determine what kind of Dataset object it is, then grab labels # Warning: currently this will break for anything other than an ImageFolder or CIFAR10 train set if isinstance(dataset, datasets.CIFAR10): labels = dataset.train_labels elif isinstance(dataset, datasets.ImageFolder): labels = [img[1] for img in dataset.imgs] else: error('Dataset not supported yet') # Calculate class priors, (number in class)/(size of dataset) idcs = [i for i in range(len(dataset))] samples_per_class = np.bincount(np.array(labels)) priors = samples_per_class / len(labels) # Number of samples in each class for val and test set val_per_class = np.ceil(samples_per_class * val_ratio).astype(np.int) test_per_class = np.ceil(samples_per_class * test_ratio).astype(np.int) # Copy and shuffle the labels and corresponding indices to randomize before splitting shuffled_labels = list(labels) shuffled_idcs = list(idcs) random.Random(seed).shuffle(shuffled_labels) random.Random(seed).shuffle(shuffled_idcs) # Iterate through, grabbing indices for each class to place in validation set # until the desired number is reached val_idcs = [] val_counts = np.zeros(val_per_class.shape) for i, l in zip(shuffled_idcs, shuffled_labels): # Check if validation set quota has been reached yet for this class if val_counts[l] < val_per_class[l]: val_idcs.append(i) val_counts[l] += 1 # Check if stopping point is reached if (val_counts == val_per_class).all(): break # Repeat for test set test_idcs = [] test_counts = np.zeros(test_per_class.shape) for i, l in zip(shuffled_idcs, shuffled_labels): # Check if this index is already in val set if i in val_idcs: continue # Check if test set quota has been reached yet for this class if test_counts[l] < test_per_class[l]: test_idcs.append(i) test_counts[l] += 1 # Check if stopping point is reached if (test_counts == test_per_class).all(): break # Get train indices too (all the remaining samples not in val or test) train_idcs = [j for j in idcs if j not in val_idcs + test_idcs] # Split the data train = Subset(dataset, train_idcs) val = Subset(dataset, val_idcs) test = Subset(dataset, test_idcs) return train, val, test
]) test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) no_normalize_test_transform = transforms.Compose([ #transforms.RandomCrop(32, padding=4), transforms.ToTensor(), ]) BATCH_SIZE = 50 fullset = datasets.CIFAR10(root=".data", train=True, transform=image_transform, download=True) trainset = Subset(fullset, range(40000)) valset = Subset(fullset, range(40000,50000)) testset = datasets.CIFAR10(root=".data", train=False, transform=test_transform, download=True) fullloader = ch.utils.data.DataLoader(fullset, batch_size=32, shuffle=True) trainloader = ch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True) validationloader = ch.utils.data.DataLoader(valset, batch_size=32, shuffle=True) testloader = ch.utils.data.DataLoader(testset, batch_size=32, shuffle=False) no_norm_fullset = datasets.CIFAR10(root=".data", train=True, transform=no_normalize_transform, download=True) no_norm_testset = datasets.CIFAR10(root=".data", train=False, transform=no_normalize_test_transform, download=True) no_norm_trainset = Subset(no_norm_fullset, range(40000)) no_norm_valset = Subset(no_norm_fullset, range(40000,50000)) no_norm_fullloader = ch.utils.data.DataLoader(no_norm_fullset, batch_size=BATCH_SIZE, shuffle=True) no_norm_trainloader = ch.utils.data.DataLoader(no_norm_trainset, batch_size=BATCH_SIZE, shuffle=True) no_norm_validationloader = ch.utils.data.DataLoader(no_norm_valset, batch_size=BATCH_SIZE, shuffle=True)
def train(epoch): epoch_loss = 0 epoch_rloss = 0 epoch_ploss = 0 startIter = 1 # keep track of batch iter across subsets for logging if opt.cacheRefreshRate > 0: subsetN = ceil(len(train_set) / opt.cacheRefreshRate) #TODO randomise the arange before splitting? subsetIdx = np.array_split(np.arange(len(train_set)), subsetN) else: subsetN = 1 subsetIdx = [np.arange(len(train_set))] nBatches = (len(train_set) + opt.batchSize - 1) // opt.batchSize for subIter in range(subsetN): print('====> Building Cache') model.eval() train_set.cache = join( opt.cachePath, train_set.whichSet + '1n_' + str(opt.ratio) + str(opt.nEpochs) + str(opt.mul) + str(opt.p_margin) + str(opt.beta) + str(opt.random_pos_level) + str(opt.freeze) + opt.optim + str(opt.random_crop) + '_' + str(opt.casa) + str(opt.pooling) + str(opt.margin) + str(opt.relu) + str(opt.fromscratch) + 'i_' + str(opt.num_PCA) + '_' + str(opt.cacheRefreshRate) + '_' + str(opt.lr) + str(opt.arch) + str(opt.atten_type) + '_feat_cache.hdf5') # if isfile(train_set.cache): # print('Cache already existed') if True: start_time = time.time() with h5py.File(train_set.cache, mode='w') as h5: pool_size = opt.num_PCA if pool_size == -1: pool_size = encoder_dim * opt.num_clusters h5feat = h5.create_dataset("features", [len(whole_train_set), pool_size], dtype=np.float32) with torch.no_grad(): for iteration, (input, indices) in enumerate( whole_training_data_loader, 1): input = input.to(device=device, dtype=torch.float) # print('input size {}'.format(input.size())) vlad_encoding = model(input).cuda() # print(vlad_encoding.size(),pool_size,len(whole_train_set)) h5feat[indices.detach().numpy( ), :] = vlad_encoding.detach().cpu().numpy() del input, vlad_encoding #end_time = time.time()-start_time # print('building cache elasped ', end_time) sub_train_set = Subset(dataset=train_set, indices=subsetIdx[subIter]) training_data_loader = DataLoader(dataset=sub_train_set, num_workers=opt.threads, batch_size=opt.batchSize, shuffle=True, collate_fn=dataset.collate_fn, pin_memory=cuda) del sub_train_set model.train() for iteration, (query, positives, negatives, negCounts, indices) in enumerate(training_data_loader, startIter): # some reshaping to put query, pos, negs in a single (N, 3, H, W) tensor # where N = batchSize * (nQuery + nPos + nNeg) if query is None: continue # in case we get an empty batch B, C, H, W = query.shape nNeg = torch.sum(negCounts) #start_time = time.time() input = torch.cat([query, positives, negatives]) del query, positives, negatives input = input.to(device=device, dtype=torch.float) vlad_encoding = model(input) del input vladQ, vladP, vladN = torch.split(vlad_encoding, [B, B, nNeg]) del vlad_encoding optimizer.zero_grad() # calculate loss for each Query, Positive, Negative triplet # due to potential difference in number of negatives have to # do it per query, per negative r_loss = 0 for i, negCount in enumerate(negCounts): for n in range(negCount): negIx = (torch.sum(negCounts[:i]) + n).item() r_loss += criterion(vladQ[i:i + 1], vladP[i:i + 1], vladN[negIx:negIx + 1]).cuda() #if opt.loss in ['impr_triplet']: #p_loss = p_criterion(vladQ, vladP).cuda() #del vladQ, vladP, vladN if opt.beta < 0: beta = epoch / opt.nEpochs p_loss = p_criterion(vladQ, vladP).cuda() elif opt.beta > 1: beta = ((1 - epoch) / opt.nEpochs) * 0.5 p_loss = p_criterion(vladQ, vladP).cuda() elif opt.beta == 0.0: beta = opt.beta p_loss = 0 else: p_loss = p_criterion(vladQ, vladP).cuda() beta = opt.beta r_loss /= nNeg.float().to( device) # normalise by actual number of negatives loss = beta * p_loss + r_loss #d:el r_loss,p_loss loss.backward() optimizer.step() #del query, positives, negatives batch_loss = loss.item() epoch_loss += batch_loss epoch_rloss += r_loss epoch_ploss += p_loss if iteration % 500 == 0 or nBatches <= 10: #print("==> Epoch[{}]({}/{}): Loss: {:.4f}".format(epoch, iteration, # nBatches, batch_loss), flush=True) print( "==> Epoch[{}]({}/{}): sum loss: {:.4f} p_loss: {:.4f} r_loss: {:.4f}" .format(epoch, iteration, nBatches, loss, p_loss, r_loss), flush=True) writer.add_scalar('Train/Loss', batch_loss, ((epoch - 1) * nBatches) + iteration) writer.add_scalar('Train/nNeg', nNeg, ((epoch - 1) * nBatches) + iteration) #print('Allocated:', torch.cuda.memory_allocated()) #print('Cached:', torch.cuda.memory_cached()) del loss, batch_loss #print('train finished') startIter += len(training_data_loader) del training_data_loader optimizer.zero_grad() torch.cuda.empty_cache() #remove(train_set.cache) # delete HDF5 cache avg_loss = epoch_loss / nBatches avg_rloss = epoch_loss / nBatches avg_ploss = epoch_ploss / nBatches print( "===> Epoch {} Complete: Avg. Loss: {:.4f} rloss:{:.4f} ploss:{:.4f}". format(epoch, avg_loss, avg_rloss, avg_ploss), flush=True) print(beta) writer.add_scalar('Train/AvgLoss', avg_loss, epoch)
def load_datasets(): """Create data loaders for the CIFAR-10 dataset. Returns: Dict containing data loaders. """ ## Normalizes the data we use. normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) ## Transform the training data. train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) ## Cutout will randomly take parts of the image and cut them out. if args.cutout > 0: train_transform.transforms.append(Cutout(length=args.cutout)) ## Transform the validation data. valid_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) ## Transform the testing data. test_transform = transforms.Compose([transforms.ToTensor(), normalize]) train_dataset = datasets.CIFAR10(root=args.data_path, train=True, transform=train_transform, download=True) valid_dataset = datasets.CIFAR10(root=args.data_path, train=True, transform=valid_transform, download=True) test_dataset = datasets.CIFAR10(root=args.data_path, train=False, transform=test_transform, download=True) train_indices = list(range(0, 45000)) valid_indices = list(range(45000, 50000)) train_subset = Subset(train_dataset, train_indices) valid_subset = Subset(valid_dataset, valid_indices) data_loaders = {} data_loaders['train_subset'] = torch.utils.data.DataLoader( dataset=train_subset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) data_loaders['valid_subset'] = torch.utils.data.DataLoader( dataset=valid_subset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2, drop_last=True) data_loaders['train_dataset'] = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) data_loaders['test_dataset'] = torch.utils.data.DataLoader( dataset=test_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) return data_loaders
def get_data_loaders(): test_dataset = datasets.CIFAR10( './', train=False, transform=transforms.Compose([ transforms.ToTensor(), #transforms.Normalize((0.1307,), (0.3081,)) transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]), download=False) train_dataset = datasets.CIFAR10( './', train=True, download=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #transforms.Normalize((0.1307,), (0.3081,)) ])) if (args.admode == 'testing'): n_samples = int(len(test_dataset) * args.proportion) subset1_indices = list(range(0, n_samples)) subset2_indices = list(range(n_samples, len(test_dataset))) adversarial_loader = torch.utils.data.DataLoader( Subset(test_dataset, subset1_indices), batch_size=args.batch_size, shuffle=True, num_workers=1, pin_memory=True) rtest_loader = torch.utils.data.DataLoader( Subset(test_dataset, subset2_indices), batch_size=args.test_batch_size, shuffle=True, num_workers=1, pin_memory=True) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=3, pin_memory=True) elif (args.admode == 'training'): if (args.proportion != 0): n_samples = int(len(train_dataset) * args.proportion) subset1_indices = list(range(0, n_samples)) subset2_indices = list(range(n_samples, len(train_dataset))) adversarial_loader = torch.utils.data.DataLoader( Subset(train_dataset, subset1_indices), batch_size=args.batch_size, shuffle=True, num_workers=1, pin_memory=True) rtest_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.test_batch_size, shuffle=True, num_workers=2, pin_memory=True) train_loader = torch.utils.data.DataLoader( Subset(train_dataset, subset2_indices), batch_size=args.batch_size, shuffle=True, num_workers=3, pin_memory=True) else: n_samples = 0 adversarial_loader = None rtest_loader = torch.utils.data.DataLoader( test_dataset, batch_size=args.test_batch_size, shuffle=True, num_workers=1, pin_memory=True) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=3, pin_memory=True) else: n_samples = int(len(test_dataset) * args.proportion) subset1_indices = list(range(0, n_samples)) subset2_indices = list(range(n_samples, len(test_dataset))) adversarial_loader = torch.utils.data.DataLoader( Subset(test_dataset, subset1_indices), batch_size=args.batch_size, shuffle=True, num_workers=1, pin_memory=True) rtest_loader = torch.utils.data.DataLoader( Subset(test_dataset, subset2_indices), batch_size=args.test_batch_size, shuffle=True, num_workers=1, pin_memory=True) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=3, pin_memory=True) #print('number of total samples', n_samples) return adversarial_loader, train_loader, rtest_loader
model=param["model_directory"], machine_type=machine_type) com.logger.info("============== DATASET_GENERATOR ==============") dataset = dcaseDataset( target_dir, n_mels=param["feature"]["n_mels"], frames=param["feature"]["frames"], n_fft=param["feature"]["n_fft"], hop_length=param["feature"]["hop_length"], power=param["feature"]["power"], ) n_samples = len(dataset) train_size = int(n_samples * (1.0 - param["fit"]["validation_split"])) subset1_indices = list(range(0, train_size)) subset2_indices = list(range(train_size, n_samples)) train_dataset = Subset(dataset, subset1_indices) val_dataset = Subset(dataset, subset2_indices) com.logger.info("============== DATALOADER_GENERATOR ==============") train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=param["fit"]["batch_size"], shuffle=param["fit"]["shuffle"], drop_last=True) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=param["fit"]["batch_size"], shuffle=False, drop_last=False)
def setup_fold_index(self, fold_index: int) -> None: train_indices, val_indices = self.splits[fold_index] self.train_fold = Subset(self.train_dataset, train_indices) self.val_fold = Subset(self.train_dataset, val_indices)
train_ids=torch.arange(0,44000) valid_ids=torch.arange(44000,50000) tr0=(.5,.5,.5) train_transform=transforms\ .Compose([transforms.Resize((70,70)), transforms.RandomCrop((64,64)), transforms.ToTensor(), transforms.Normalize(tr0,tr0)]) test_transform=transforms\ .Compose([transforms.Resize((70,70)), transforms.CenterCrop((64,64)), transforms.ToTensor(), transforms.Normalize(tr0,tr0)]) train_valid=tcifar10(root='data',train=True,download=True, transform=train_transform) train=Subset(train_valid,train_ids) valid=Subset(train_valid,valid_ids) test=tcifar10(root='data',train=False, transform=test_transform) train_loader=tdl(dataset=train,shuffle=True, batch_size=batch_size) valid_loader=tdl(dataset=valid,shuffle=True, batch_size=batch_size) test_loader=tdl(dataset=test,shuffle=False, batch_size=batch_size) classes=('plane','car','bird','cat','deer', 'dog','frog','horse','ship','truck') show_examples(valid_loader,classes,7) fpath='https://olgabelitskaya.github.io/'
def datasetSplit(dataset, split_info: str = ''): ''' return two subsets of data, where the validation set has each image duplicated 5 times. This is for alignment with related work. ''' if len(split_info) > 0: # load an existing dataset split and generate mappings with open(split_info, 'r') as f: split_info = json.load(f) # create dictionaries for remapping sid2idx = {} for i, (iid, sid) in enumerate(dataset.idpairs()): sid2idx[sid] = i validx = [sid2idx[x] for (x, _) in split_info['val']] tstidx = [sid2idx[x] for (x, _) in split_info['test']] trnidx = [sid2idx[x] for (x, _) in split_info['train']] print( f'preSplit: train({len(trnidx)}), val({len(validx)}), test({len(tstidx)})' ) valset = Subset(dataset, list(validx)) testset = Subset(dataset, list(tstidx)) trainset = Subset(dataset, list(trnidx)) return trainset, valset, testset, split_info valsize = 5000 # 5k images <-> 25k sentences tstsize = 5000 # 5k images <-> 25k sentences if len(dataset) < 155100: print('DATASET SPLIT: detected f30k') valsize = 1000 tstsize = 1000 trainidxs, validxs, testidxs = list(), list(), list() # assign images for val set and test set valiids, testiids = set(), set() for (_, iid, sid) in dataset.lingual: if len(valiids) < valsize: valiids.update([iid]) elif len(testiids) < tstsize and iid not in valiids: testiids.update([iid]) if len(valiids) >= valsize and len(testiids) >= tstsize: break # fill in indexes and backup dataset split information valset = collections.defaultdict(list) tstset = collections.defaultdict(list) trnset = collections.defaultdict(list) for idx, (iid, sid) in enumerate(dataset.idpairs()): if iid in valiids: valset[iid].append(idx) elif iid in testiids: tstset[iid].append(idx) else: trnset[iid].append(idx) # force i:s ratio in validation set to 1:5 for k, v in valset.items(): if len(valset[k]) == 5: continue elif len(valset[k]) > 5: valset[k] = v[:5] else: while len(valset[k]) < 5: valset[k].append(v[0]) # for ce i:s ratio in test set to 1:5 for k, v in tstset.items(): if len(tstset[k]) == 5: continue elif len(tstset[k]) > 5: tstset[k] = v[:5] else: while len(tstset[k]) < 5: tstset[k].append(v[0]) # save dataset split info splitinfo = {'train': [], 'val': [], 'test': []} for iid, idxs in trnset.items(): for idx in idxs: _, iid_, sid = dataset.lingual[idx] assert (iid_ == iid) splitinfo['train'].append([sid, iid]) for iid, idxs in valset.items(): for idx in idxs: _, iid_, sid = dataset.lingual[idx] assert (iid_ == iid) splitinfo['val'].append([sid, iid]) for iid, idxs in tstset.items(): for idx in idxs: _, iid_, sid = dataset.lingual[idx] assert (iid_ == iid) splitinfo['test'].append([sid, iid]) # flatten #validx = reduce(list.__add__, map(list, valset.values())) #tstidx = reduce(list.__add__, map(list, tstset.values())) #trnidx = reduce(list.__add__, map(list, trnset.values())) validx, tstidx, trnidx = [], [], [] for x in valset.values(): validx.extend(x) for x in tstset.values(): tstidx.extend(x) for x in trnset.values(): trnidx.extend(x) assert (len(validx) == 5 * valsize) # Split! print( f'datasetSplit: train({len(trnidx)}), val({len(validx)}), test({len(tstidx)})' ) valset = Subset(dataset, list(validx)) testset = Subset(dataset, list(tstidx)) trainset = Subset(dataset, list(trnidx)) return trainset, valset, testset, splitinfo
def get_train_val_loaders( root_path: str, train_transforms: Callable, val_transforms: Callable, batch_size: int = 16, num_workers: int = 8, val_batch_size: Optional[int] = None, pin_memory: bool = True, random_seed: Optional[int] = None, train_sampler: Optional[Union[Sampler, str]] = None, val_sampler: Optional[Union[Sampler, str]] = None, with_sbd: Optional[str] = None, limit_train_num_samples: Optional[int] = None, limit_val_num_samples: Optional[int] = None, ) -> Tuple[DataLoader, DataLoader, DataLoader]: train_ds = get_train_dataset(root_path) val_ds = get_val_dataset(root_path) if with_sbd is not None: sbd_train_ds = get_train_noval_sbdataset(with_sbd) train_ds = ConcatDataset([train_ds, sbd_train_ds]) if random_seed is not None: np.random.seed(random_seed) if limit_train_num_samples is not None: train_indices = np.random.permutation( len(train_ds))[:limit_train_num_samples] train_ds = Subset(train_ds, train_indices) if limit_val_num_samples is not None: val_indices = np.random.permutation( len(val_ds))[:limit_val_num_samples] val_ds = Subset(val_ds, val_indices) # random samples for evaluation on training dataset if len(val_ds) < len(train_ds): train_eval_indices = np.random.permutation(len(train_ds))[:len(val_ds)] train_eval_ds = Subset(train_ds, train_eval_indices) else: train_eval_ds = train_ds train_ds = TransformedDataset(train_ds, transform_fn=train_transforms) val_ds = TransformedDataset(val_ds, transform_fn=val_transforms) train_eval_ds = TransformedDataset(train_eval_ds, transform_fn=val_transforms) if isinstance(train_sampler, str): assert train_sampler == "distributed" train_sampler = data_dist.DistributedSampler(train_ds) if isinstance(val_sampler, str): assert val_sampler == "distributed" val_sampler = data_dist.DistributedSampler(val_ds, shuffle=False) train_loader = DataLoader( train_ds, shuffle=train_sampler is None, batch_size=batch_size, num_workers=num_workers, sampler=train_sampler, pin_memory=pin_memory, drop_last=True, ) val_batch_size = batch_size * 4 if val_batch_size is None else val_batch_size val_loader = DataLoader( val_ds, shuffle=False, sampler=val_sampler, batch_size=val_batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=False, ) train_eval_loader = DataLoader( train_eval_ds, shuffle=False, sampler=val_sampler, batch_size=val_batch_size, num_workers=num_workers, pin_memory=pin_memory, drop_last=False, ) return train_loader, val_loader, train_eval_loader
def sequence_split(dataset, lengths): indices = torch.arange(0, sum(lengths)).tolist() return [ Subset(dataset, indices[offset - length:offset]) for offset, length in zip(_accumulate(lengths), lengths) ]
def subset_subset(self, subset): from torch.utils.data.dataset import Subset return Subset(subset, [0])
def compute(model_string, dataset_string, highlighted_set_path, non_highlighted_set_path, validation_set_path, test_set_path, no_Lfs, train_size, boostrap_split, train_split, validation_split, unlabelled_split, results_folder, score_to_optimize, dim_target=2, rationale_noise=0): if dataset_string == 'NREHatespeechDataset': dataset_class = NREHatespeechDataset elif dataset_string == 'NRESpouseDataset': dataset_class = NRESpouseDataset elif dataset_string == 'NREMovieReviewDataset': dataset_class = NREMovieReviewDataset else: raise highlighted_set = dataset_class(highlighted_set_path, rationale_noise=rationale_noise) non_highlighted_set = dataset_class(non_highlighted_set_path, rationale_noise=rationale_noise) test_set = dataset_class(test_set_path) all_train_dataset = ConcatDataset((highlighted_set, non_highlighted_set)) train_set = Subset(all_train_dataset, train_split) if validation_set_path is not None: validation_set = dataset_class(validation_set_path) # Validation split is not interesting if we have an explicit validation set unlabelled_set = Subset(all_train_dataset, validation_split + unlabelled_split) else: validation_set = Subset(all_train_dataset, validation_split) unlabelled_set = Subset(all_train_dataset, unlabelled_split) best_vl_scores = {'accuracy': 0., 'precision': 0., 'recall': 0., 'f1': 0.} te_scores = { 'best_params': {}, 'best_vl_scores': {}, 'test_scores': { 'accuracy': 0, 'precision': 0, 'recall': 0, 'f1': 0 } } if 'fasttext' in highlighted_set_path: embedding_dim = 300 else: embedding_dim = 768 for hpb in [float(np.exp(1)), 5, 10]: for lr in [1e-2]: for l1 in [1e-2, 1e-3]: for l2 in [1e-3, 1e-4]: for no_prototypes in [5, 10]: for num_epochs in [500]: for gating_param in [10, 100]: models = bagging(model_string, no_Lfs, train_set, \ lr=lr, l1_coeff=l1, l2=l2, max_epochs=num_epochs, \ no_prototypes=no_prototypes, embedding_dim=embedding_dim, gating_param=gating_param, batch_size=32, save_path=None, highlights_pow_base=hpb) # Compute prediction for each NRE for dataset_type, dataset in [ ('train', train_set), ('validation', validation_set), ('test', test_set), ('unlabelled', unlabelled_set) ]: if dataset_type == 'unlabelled' and no_Lfs == 1: continue # IMPORTANT: SHUFFLE MUST STAY FALSE (SEE TARGETS LATER) loader = DataLoader( dataset, batch_size=256, collate_fn=custom_collate, shuffle=False, num_workers=0) predictions, _ = compute_predictions_for_DP( models, loader, save_path=Path( results_folder, 'stored_results', f'predictions_{train_size}_size_{dataset_type}_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch' )) # Compute and store targets on different files for dataset, dataset_type in [ (train_set, 'train'), (validation_set, 'validation'), (test_set, 'test') ]: dataset_loader = DataLoader( dataset, batch_size=256, collate_fn=custom_collate, shuffle=False, num_workers=2) all_targets = None for _, _, _, targets, _ in dataset_loader: targets, _ = targets if all_targets is None: all_targets = targets else: all_targets = torch.cat( (all_targets, targets), dim=0) if not os.path.exists( Path(results_folder, 'stored_results')): os.makedirs( Path(results_folder, 'stored_results')) torch.save( all_targets, Path( results_folder, 'stored_results', f'all_targets_{dataset_type}_{train_size}_size_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch' )) all_targets_valid = torch.load( Path( results_folder, 'stored_results', f'all_targets_validation_{train_size}_size_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch' )) all_targets_test = torch.load( Path( results_folder, 'stored_results', f'all_targets_test_{train_size}_size_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch' )) # For Data Programming all_targets_valid_score = np.copy( all_targets_valid ) # It will be used to compute scores all_targets_valid[all_targets_valid == 0] = 2 targets_valid = all_targets_valid.numpy() all_targets_test_score = np.copy( all_targets_test ) # It will be used to compute scores all_targets_test[all_targets_test == 0] = 2 train_predictions = torch.load(Path( results_folder, 'stored_results', f'predictions_{train_size}_size_train_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch' ), map_location= 'cpu') if no_Lfs != 1: unlabelled_predictions = torch.load( Path( results_folder, 'stored_results', f'predictions_{train_size}_size_unlabelled_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch' ), map_location='cpu') valid_predictions = torch.load(Path( results_folder, 'stored_results', f'predictions_{train_size}_size_validation_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch' ), map_location= 'cpu') test_predictions = torch.load(Path( results_folder, 'stored_results', f'predictions_{train_size}_size_test_{no_Lfs}_rules_bootstrap_{boostrap_split}.torch' ), map_location='cpu' ) train_predictions = train_predictions.cpu( ).reshape(-1, dim_target) if no_Lfs != 1: unlabelled_predictions = unlabelled_predictions.cpu( ).reshape(-1, dim_target) valid_predictions = valid_predictions.cpu( ).reshape(-1, dim_target) test_predictions = test_predictions.cpu( ).reshape(-1, dim_target) for threshold in [0.01, 0.05]: Ls_train = process_outputs( train_predictions, no_Lfs, threshold) if no_Lfs != 1: Ls_unlabelled = process_outputs( unlabelled_predictions, no_Lfs, threshold) Ls_valid = process_outputs( valid_predictions, no_Lfs, threshold) Ls_test = process_outputs( test_predictions, no_Lfs, threshold) if no_Lfs != 1: # Concatenate train and "unlabelled" data Ls_dataset = np.concatenate( (Ls_train, Ls_unlabelled), axis=0) search_space = { 'n_epochs': [100, 500], 'lr': { 'range': [0.01, 0.001], 'scale': 'log' }, 'show_plots': True, } tuner = RandomSearchTuner( LabelModelNoSeed) # , seed=123) # ------------ DANGER ZONE: be careful here! ------------ # # Train on train+unlabelled because it is unsupervised (exploit unlabelled data), and "optimize" on # small validation set label_aggregator = tuner.search( search_space, train_args=[Ls_dataset], X_dev=Ls_valid, Y_dev=targets_valid.squeeze(), max_search=10, verbose=False, metric=score_to_optimize, shuffle=False # Leave it False, ow gen_splits generates different splits compared to linear baseline ) Y_vl = label_aggregator.predict( Ls_valid) Y_test = label_aggregator.predict( Ls_test) # ------------ END OF DANGER ZONE ------------ # else: Y_vl = Ls_valid[:, 0] Y_test = Ls_test[:, 0] Y_vl[Y_vl == 2] = 0 Y_test[Y_test == 2] = 0 vl_pr = precision_score( all_targets_valid_score, Y_vl) * 100 vl_rec = recall_score( all_targets_valid_score, Y_vl) * 100 vl_acc = accuracy_score( all_targets_valid_score, Y_vl) * 100 vl_f1 = f1_score(all_targets_valid_score, Y_vl) * 100 te_pr = precision_score( all_targets_test_score, Y_test) * 100 te_rec = recall_score( all_targets_test_score, Y_test) * 100 te_acc = accuracy_score( all_targets_test_score, Y_test) * 100 te_f1 = f1_score(all_targets_test_score, Y_test) * 100 vl_scores = { 'accuracy': float(vl_acc), 'precision': float(vl_pr), 'recall': float(vl_rec), 'f1': float(vl_f1) } if vl_scores[ score_to_optimize] > best_vl_scores[ score_to_optimize]: best_vl_scores = deepcopy(vl_scores) best_params = deepcopy({ 'learning_rate': lr, 'l1': l1, 'l2': l2, 'train_split': train_split, 'validation_split': validation_split, 'no_prototypes': no_prototypes, 'gating_param': gating_param, 'threshold': threshold, 'error_multiplier': hpb, 'epochs': num_epochs }) te_scores['best_params'] = best_params te_scores[ 'best_vl_scores'] = best_vl_scores te_scores['test_scores'] = { 'accuracy': float(te_acc), 'precision': float(te_pr), 'recall': float(te_rec), 'f1': float(te_f1) } print(f'Best VL scores found is {best_vl_scores}') print(f'Best TE scores found is {te_scores["test_scores"]}') print( f'End of model assessment for train size {train_size} and {no_Lfs} rules, test results are {te_scores}' ) if not os.path.exists(results_folder): os.makedirs(results_folder) with open( Path( results_folder, f'NRE_size_{train_size}_rules_{no_Lfs}_test_results_bootstrap_{boostrap_split}.json' ), 'w') as f: json.dump(te_scores, f)
def tune_hyperparams(args, task, preprocess_func, model, device): ''' Tune hyperparameters Given task, preprocess function, and model, this method returns tuned hyperparameters. ''' # TODO: Implement hyperparameter tuning kwargs = {'num_workers': 1, 'pin_memory': False} if use_cuda else {} train_loader = torch.utils.data.DataLoader(datasets.CIFAR100( '../data', train=True, download=True, transform=preprocess_func), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.CIFAR100( '../data', train=False, transform=preprocess_func), batch_size=args.test_batch_size, shuffle=True, **kwargs) n_samples = len(train_loader.dataset) train_size = int(n_samples / 5) subset1_indices = list(range(0, train_size)) subset2_indices = list(range(train_size, n_samples)) train_dataset = Subset(train_loader.dataset, subset1_indices) val_dataset = Subset(train_loader.dataset, subset2_indices) train_dataset_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True) val_dataset_loader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=False) torch.save(model.state_dict(), 'init_model') init_lr_list = [0.001, 0.01] first_time = [0.05, 0.1, 0.2] second_time = [0.4, 0.5, 0.6] third_time = [0.7, 0.8, 0.9] accuracy = 0 pre_accuracy = 0 for i in init_lr_list: for j in first_time: for k in second_time: for l in third_time: model.load_state_dict(torch.load('init_model')) for epoch in range(1, args.epochs + 1): warmup_lr = wrap_scheduler(i, j, k, l, epoch) optimizer = optim.Adadelta(model.parameters(), lr=warmup_lr) train(args, model, device, train_loader, optimizer, epoch) accuracy = test(args, model, device, test_loader) if accuracy > pre_accuracy: result_init_lr = i result_first_time = j result_second_time = k result_third_time = l pre_accuracy = accuracy torch.save(model.state_dict(), 'learned_model') Hyperparams.hyperparam0 = result_init_lr Hyperparams.hyperparam1 = result_first_time Hyperparams.hyperparam2 = result_second_time Hyperparams.hyperparam3 = result_third_time param_list = [ result_init_lr, result_first_time, result_second_time, result_third_time ] return param_list
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.0,), (1.0,))]) # # MNIST Dataset train_dataset = datasets.MNIST(root='./mnist_data/', train=True, transform=trans, download=True) test_dataset = datasets.MNIST(root='./mnist_data/', train=False, transform=trans) train_dataset = Subset(train_dataset, range(n)) test_dataset = Subset(test_dataset, range(1000)) print(len(train_dataset)) print(len(test_dataset)) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size , shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) class GaussianProcessLayer(gpytorch.models.AdditiveGridInducingVariationalGP):
def train(epoch): epoch_loss = 0 startIter = 1 # keep track of batch iter across subsets for logging if opt.cacheRefreshRate > 0: subsetN = ceil(len(train_set) / opt.cacheRefreshRate) #TODO randomise the arange before splitting? subsetIdx = np.array_split(np.arange(len(train_set)), subsetN) else: subsetN = 1 subsetIdx = [np.arange(len(train_set))] nBatches = (len(train_set) + opt.batchSize - 1) // opt.batchSize for subIter in range(subsetN): print('====> Building Cache') model.eval() train_set.cache = join(opt.cachePath, train_set.whichSet + '_feat_cache.hdf5') with h5py.File(train_set.cache, mode='w') as h5: pool_size = encoder_dim if opt.pooling.lower() == 'netvlad': pool_size *= opt.num_clusters h5feat = h5.create_dataset("features", [len(whole_train_set), pool_size], dtype=np.float32) with torch.no_grad(): for iteration, (input, indices) in enumerate(whole_training_data_loader, 1): input = input.to(device) image_encoding = model.encoder(input) vlad_encoding = model.pool(image_encoding) h5feat[indices.detach().numpy(), :] = vlad_encoding.detach().cpu().numpy() del input, image_encoding, vlad_encoding sub_train_set = Subset(dataset=train_set, indices=subsetIdx[subIter]) training_data_loader = DataLoader(dataset=sub_train_set, num_workers=opt.threads, batch_size=opt.batchSize, shuffle=True, collate_fn=dataset.collate_fn, pin_memory=cuda) print('Allocated:', torch.cuda.memory_allocated()) print('Cached:', torch.cuda.memory_cached()) model.train() for iteration, (query, positives, negatives, negCounts, indices) in enumerate(training_data_loader, startIter): # some reshaping to put query, pos, negs in a single (N, 3, H, W) tensor # where N = batchSize * (nQuery + nPos + nNeg) if query is None: continue # in case we get an empty batch B, C, H, W = query.shape nNeg = torch.sum(negCounts) input = torch.cat([query, positives, negatives]) input = input.to(device) image_encoding = model.encoder(input) vlad_encoding = model.pool(image_encoding) vladQ, vladP, vladN = torch.split(vlad_encoding, [B, B, nNeg]) optimizer.zero_grad() # calculate loss for each Query, Positive, Negative triplet # due to potential difference in number of negatives have to # do it per query, per negative loss = 0 for i, negCount in enumerate(negCounts): for n in range(negCount): negIx = (torch.sum(negCounts[:i]) + n).item() loss += criterion(vladQ[i:i+1], vladP[i:i+1], vladN[negIx:negIx+1]) loss /= nNeg.float().to(device) # normalise by actual number of negatives loss.backward() optimizer.step() del input, image_encoding, vlad_encoding, vladQ, vladP, vladN del query, positives, negatives batch_loss = loss.item() epoch_loss += batch_loss if iteration % 50 == 0 or nBatches <= 10: print("==> Epoch[{}]({}/{}): Loss: {:.4f}".format(epoch, iteration, nBatches, batch_loss), flush=True) writer.add_scalar('Train/Loss', batch_loss, ((epoch-1) * nBatches) + iteration) writer.add_scalar('Train/nNeg', nNeg, ((epoch-1) * nBatches) + iteration) print('Allocated:', torch.cuda.memory_allocated()) print('Cached:', torch.cuda.memory_cached()) startIter += len(training_data_loader) del training_data_loader, loss optimizer.zero_grad() torch.cuda.empty_cache() remove(train_set.cache) # delete HDF5 cache avg_loss = epoch_loss / nBatches print("===> Epoch {} Complete: Avg. Loss: {:.4f}".format(epoch, avg_loss), flush=True) writer.add_scalar('Train/AvgLoss', avg_loss, epoch)
def train(args): logger.log('Training processes started at {}.'.format(time.ctime())) # Load files print('Loading files...') ref_bim = pd.read_table(args.ref + '.bim', sep='\t|\s+', names=['chr', 'id', 'dist', 'pos', 'a1', 'a2'], header=None, engine='python') ref_phased = pd.read_table(args.ref + '.bgl.phased', sep='\t|\s+', header=None, engine='python', skiprows=5).iloc[:, 1:] ref_phased = ref_phased.set_index(1) sample_bim = pd.read_table(args.sample + '.bim', sep='\t|\s+', names=['chr', 'id', 'dist', 'pos', 'a1', 'a2'], header=None, engine='python') with open(args.model + '.model.json', 'r') as f: model_config = json.load(f) with open(args.hla + '.hla.json', 'r') as f: hla_info = json.load(f) model_dir = args.model_dir if args.max_digit == '2-digit': digit_list = ['2-digit'] elif args.max_digit == '4-digit': digit_list = ['2-digit', '4-digit'] elif args.max_digit == '6-digit': digit_list = ['2-digit', '4-digit', '6-digit'] # Extract only SNPs which exist both in reference and sample data concord_snp = ref_bim.pos.isin(sample_bim.pos) for i in range(len(concord_snp)): if concord_snp.iloc[i]: tmp = np.where(sample_bim.pos == ref_bim.iloc[i].pos)[0][0] if set((ref_bim.iloc[i].a1, ref_bim.iloc[i].a2)) != \ set((sample_bim.iloc[tmp].a1, sample_bim.iloc[tmp].a2)): concord_snp.iloc[i] = False num_ref = ref_phased.shape[1] // 2 num_concord = np.sum(concord_snp) logger.log('{} people loaded from reference.'.format(num_ref)) logger.log('{} SNPs loaded from reference.'.format(len(ref_bim))) logger.log('{} SNPs loaded from sample.'.format(len(sample_bim))) logger.log('{} SNPs matched in position and used for training.'.format(num_concord)) ref_concord_phased = ref_phased.iloc[np.where(concord_snp)[0]] model_bim = ref_bim.iloc[np.where(concord_snp)[0]] if not os.path.exists(os.path.join(BASE_DIR, model_dir)): os.mkdir(os.path.join(BASE_DIR, model_dir)) else: print('Warning: Directory for saving models already exists.') model_bim.to_csv(os.path.join(BASE_DIR, model_dir, 'model.bim'), sep='\t', header=False, index=False) # Encode reference SNP data snp_encoded = np.zeros((2*num_ref, num_concord, 2)) for i in range(num_concord): a1 = model_bim.iloc[i].a1 a2 = model_bim.iloc[i].a2 snp_encoded[ref_concord_phased.iloc[i, :] == a1, i, 0] = 1 snp_encoded[ref_concord_phased.iloc[i, :] == a2, i, 1] = 1 # Encode reference HLA data hla_encoded = {} for hla in hla_info: for i in range(2*num_ref): hla_encoded[hla] = {} for digit in digit_list: hla_encoded[hla][digit] = np.zeros(2 * num_ref) for j in range(len(hla_info[hla][digit])): hla_encoded[hla][digit][np.where(ref_phased.loc[hla_info[hla][digit][j]] == 'P')[0]] = j # Parameters for training val_split = args.val_split batch_size = 64 num_epoch = args.num_epoch patience = args.patience result_best_val = pd.DataFrame(index=hla_info.keys(), columns=digit_list) for g in model_config: hla_list = model_config[g]['HLA'] w = model_config[g]['w'] * 1000 st = int(hla_info[hla_list[0]]['pos']) - w ed = int(hla_info[hla_list[-1]]['pos']) + w st_index = max(0, np.sum(model_bim.pos < st) - 1) ed_index = min(num_concord, num_concord - np.sum(model_bim.pos > ed)) for digit in digit_list: logger.log('Training models for {} at {} level.'.format(', '.join(hla_list), digit)) # Count HLA alleles allele_cnts = {} all_one_allele = True skip_hlas = [] for hla in hla_list: allele_cnts[hla] = len(hla_info[hla][digit]) if allele_cnts[hla] == 1: skip_hlas.append(hla) else: all_one_allele = False if all_one_allele: logger.log('Skipped group {} at {} level because all genes have only one allele.'.format(g, digit)) continue elif len(skip_hlas) != 0: logger.log('Skipped {} at {} level because of only one allele.'.format(', '.join(skip_hlas), digit)) # Generate training data train_data = [] for i in range(2*num_ref): tmp = [snp_encoded[i, st_index:ed_index]] for hla in hla_list: if not allele_cnts[hla] == 1: tmp.append(hla_encoded[hla][digit][i]) train_data.append(tmp) num_task = len(train_data[0]) - 1 # Spare the part of data for validation train_index = np.arange(int(2*num_ref*val_split), 2*num_ref) val_index = np.arange(int(2*num_ref*val_split)) train_loader = torch.utils.data.DataLoader(Subset(train_data, train_index), batch_size=batch_size) val_loader = torch.utils.data.DataLoader(Subset(train_data, val_index), batch_size=batch_size, shuffle=False) # Generate models model = {'shared': SharedNet(model_config[g], ed_index-st_index, input_collapse=False)} # Transfer parameters of shared net from those of upper digit if not digit == '2-digit': try: model['shared'].load_state_dict(torch.load(os.path.join(BASE_DIR, model_dir, '{}_{}_shared_model.pickle'.format(g, digit_list[digit_list.index(digit)-1])))) logger.log('Transferred parameters from shared net of upper digit at {} level.'.format(digit)) except FileNotFoundError: logger.log('Shared net of upper digit not found at {} level.'.format(digit)) pass t = 0 for hla in hla_list: if not allele_cnts[hla] == 1: model[t] = EachNet(model_config[g], allele_cnts[hla]) t += 1 for m in model: model[m] = model[m].float() model[m].train() model_params = [] for m in model: model_params += model[m].parameters() optimizer = torch.optim.Adam(model_params) loss_fn = get_loss(num_task) metric = get_metrics(num_task) best_epoch = 1 best_ave_val_acc = 0 patience_cnt = 0 # Training iteration for epoch in range(1, num_epoch+1): model = train_model(train_loader, hla_list, allele_cnts, num_task, model, optimizer, loss_fn, metric, epoch) val_acc = test_model(val_loader, hla_list, allele_cnts, num_task, model, metric, 'val') ave_val_acc = np.mean(val_acc) logger.log('Average validation accuracy: {}'.format(ave_val_acc)) # Save the current model if the current model is equal or better than the best one if ave_val_acc >= best_ave_val_acc: torch.save(model['shared'].state_dict(), os.path.join(BASE_DIR, model_dir, '{}_{}_epoch{}_shared_model.pickle'.format(g, digit, epoch))) t = 0 for hla in hla_list: if not allele_cnts[hla] == 1: torch.save(model[t].state_dict(), os.path.join(BASE_DIR, model_dir, '{}_{}_epoch{}_{}_model.pickle'.format(g, digit, epoch, hla))) t += 1 if not epoch == 1: os.remove(os.path.join(BASE_DIR, model_dir, '{}_{}_epoch{}_shared_model.pickle'.format(g, digit, best_epoch))) for hla in hla_list: if not allele_cnts[hla] == 1: os.remove(os.path.join(BASE_DIR, model_dir, '{}_{}_epoch{}_{}_model.pickle'.format(g, digit, best_epoch, hla))) best_epoch = epoch if ave_val_acc > best_ave_val_acc: best_ave_val_acc = ave_val_acc patience_cnt = 0 # Increase patience count if the current model is not better than the best one if ave_val_acc <= best_ave_val_acc: patience_cnt += 1 # Early stopping when patience count reaches to the upper limit if patience_cnt >= patience: logger.log('Early stopping.') break if epoch == num_epoch: logger.log('All epochs finished without early stopping.') logger.log('The best model is at epoch {}.'.format(best_epoch)) # Rename models os.rename(os.path.join(BASE_DIR, model_dir, '{}_{}_epoch{}_shared_model.pickle'.format(g, digit, best_epoch)), os.path.join(BASE_DIR, model_dir, '{}_{}_shared_model.pickle'.format(g, digit))) for hla in hla_list: if not allele_cnts[hla] == 1: os.rename(os.path.join(BASE_DIR, model_dir, '{}_{}_epoch{}_{}_model.pickle'.format(g, digit, best_epoch, hla)), os.path.join(BASE_DIR, model_dir, '{}_{}_{}_model.pickle'.format(g, digit, hla))) # Load the best models model['shared'].load_state_dict(torch.load(os.path.join(BASE_DIR, model_dir, '{}_{}_shared_model.pickle'.format(g, digit)))) t = 0 for hla in hla_list: if not allele_cnts[hla] == 1: model[t].load_state_dict( torch.load(os.path.join(BASE_DIR, model_dir, '{}_{}_{}_model.pickle'.format(g, digit, hla)))) t += 1 for m in model: model[m] = model[m].float() model[m].eval() torch.no_grad() best_val_acc = test_model(val_loader, hla_list, allele_cnts, num_task, model, metric, 'best_val') result_best_val.loc[hla_list, digit] = best_val_acc result_best_val.to_csv(os.path.join(BASE_DIR, model_dir, 'best_val.txt'), header=True, index=True, sep='\t') print('The processes have been finished at {}.'.format(time.ctime()))
def train(args): yolo_dataset = YoloDataset(DATASET, "faces.csv", input_size=208, transform=transforms.Compose([ToTensor()])) dataset_indices = set([i for i in range(len(yolo_dataset))]) test_indices = random.sample(dataset_indices, int(args.test_size * len(yolo_dataset))) train_indices = list(dataset_indices - set(test_indices)) trainset = Subset(yolo_dataset, train_indices) testset = Subset(yolo_dataset, train_indices) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=4) validloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=True, num_workers=4) dataloaders = {TRAIN: trainloader, VALID: validloader} model = MyResnet() if CUDA: model.cuda() optimizer = optim.Adam(model.conv_addition.parameters(), lr=args.lr) saver = CheckpointSaver(args.save_dir_name, max_checkpoints=3) scheduler = lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) # writer = SummaryWriter(os.path.join(args.save_dir_name, "log")) train_losses, test_losses = [], [] for epoch in range(args.epochs): for phase in dataloaders: if phase == TRAIN: scheduler.step() model.train() else: model.eval() epoch_avg = AVG() logger.info(f"-----------------{phase.upper()}-----------------") for i, data in enumerate(dataloaders[phase]): img, y = data['img'], data['y'] if CUDA: img = img.cuda() y = y.cuda() optimizer.zero_grad() with torch.set_grad_enabled(phase == TRAIN): pred = model(img) y_ = pred.permute((0, 2, 3, 1)) y = y.permute((0, 2, 3, 1)) loss = loss_function(y_, y) epoch_avg.add(loss.item()) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() logger.info(f"Epoch: {epoch}, batch: {i}, loss {loss.item()}") logger.info(f"Epoch: {epoch}, average loss: {epoch_avg}") if phase == TRAIN: train_losses.append(str(epoch_avg)) else: test_losses.append(str(epoch_avg)) # writer.add_scalar(f'{phase}_data/average_loss', str(epoch_avg), epoch) if epoch % 20 == 0: saver.save(model, optimizer, epoch) with open(os.path.join(args.save_dir_name, "losses.txt"), 'w') as file: file.write(str(train_losses)) file.write(str(test_losses))
def main(): torch.cuda.set_device(args.gpu) cudnn.benchmark = True cudnn.enabled = True logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) cur_epoch = 0 net = eval(args.arch) print(net) code = gen_code_from_list(net, node_num=int((len(net) / 4))) genotype = translator([code, code], max_node=int((len(net) / 4))) print(genotype) model_ema = None if not continue_train: print('train from the scratch') model = Network(args.init_ch, 10, args.layers, args.auxiliary, genotype).cuda() print("model init params values:", flatten_params(model)) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = CutMixCrossEntropyLoss(True).cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.wd) if args.model_ema: model_ema = ModelEma( model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else '') else: print('continue train from checkpoint') model = Network(args.init_ch, 10, args.layers, args.auxiliary, genotype).cuda() criterion = CutMixCrossEntropyLoss(True).cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.wd) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) checkpoint = torch.load(args.save + '/model.pt') model.load_state_dict(checkpoint['model_state_dict']) cur_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer_state_dict']) if args.model_ema: model_ema = ModelEma( model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else '', resume=args.save + '/model.pt') train_transform, valid_transform = utils._auto_data_transforms_cifar10( args) ds_train = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) args.cv = -1 if args.cv >= 0: sss = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=0) sss = sss.split(list(range(len(ds_train))), ds_train.targets) for _ in range(args.cv + 1): train_idx, valid_idx = next(sss) ds_valid = Subset(ds_train, valid_idx) ds_train = Subset(ds_train, train_idx) else: ds_valid = Subset(ds_train, []) train_queue = torch.utils.data.DataLoader(CutMix(ds_train, 10, beta=1.0, prob=0.5, num_mix=2), batch_size=args.batch_size, shuffle=True, num_workers=2, pin_memory=True) valid_queue = torch.utils.data.DataLoader(dset.CIFAR10( root=args.data, train=False, transform=valid_transform), batch_size=args.batch_size, shuffle=True, num_workers=2, pin_memory=True) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) best_acc = 0.0 if continue_train: for i in range(cur_epoch + 1): scheduler.step() for epoch in range(cur_epoch, args.epochs): print('cur_epoch is', epoch) scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs if model_ema is not None: model_ema.ema.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer, epoch, model_ema) logging.info('train_acc: %f', train_acc) if model_ema is not None and not args.model_ema_force_cpu: valid_acc_ema, valid_obj_ema = infer(valid_queue, model_ema.ema, criterion, ema=True) logging.info('valid_acc_ema %f', valid_acc_ema) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc: %f', valid_acc) if valid_acc > best_acc: best_acc = valid_acc print('this model is the best') torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, os.path.join(args.save, 'top1.pt')) print('current best acc is', best_acc) logging.info('best_acc: %f', best_acc) if model_ema is not None: torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'state_dict_ema': get_state_dict(model_ema) }, os.path.join(args.save, 'model.pt')) else: torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, os.path.join(args.save, 'model.pt')) print('saved to: trained.pt')