def get_data(transform, mode='train'): print('Loading data for "%s" ...' % mode) global dataset if args.dataset == 'ucf101': dataset = UCF101Dataset(mode=mode, transform=transform, seq_len=args.seq_len, num_seq=args.num_seq, downsample=args.ds, which_split=args.split, return_label=True) elif args.dataset == 'hmdb51': dataset = HMDB51Dataset(mode=mode, transform=transform, seq_len=args.seq_len, num_seq=args.num_seq, downsample=args.ds, which_split=args.split, return_label=True) elif args.dataset.split('_')[0] == 'CATER': dataset = CATERDataset( mode=mode, task=args.dataset.split('_', 1)[1], transform=transform, seq_len=args.seq_len, num_seq=args.num_seq, downsample=args.ds, #which_split=args.split, return_label=True) else: raise ValueError('dataset not supported') my_sampler = data.RandomSampler(dataset) if mode == 'train': data_loader = data.DataLoader(dataset, batch_size=args.batch_size, sampler=my_sampler, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) elif mode == 'val': data_loader = data.DataLoader(dataset, batch_size=args.batch_size, sampler=my_sampler, shuffle=False, num_workers=args.workers, pin_memory=True, drop_last=True) elif mode == 'test': data_loader = data.DataLoader(dataset, batch_size=1, sampler=my_sampler, shuffle=False, num_workers=args.workers, pin_memory=True) print('"%s" dataset size: %d' % (mode, len(dataset))) return data_loader, dataset
def make_data_sampler(dataset, is_train=True, shuffle=True, is_distributed=False): if is_train: sampler = dutils.RandomSampler(dataset) else: sampler = dutils.SequentialSampler(dataset) return sampler
def get_random_data_loader(opt): ''' Sample random batch. Used for evaluation during training. ''' dset = get_dataset(opt) # Random sampler sampler = data.RandomSampler(dset) dloader = data.DataLoader(dset, batch_size=opt.batch_size, sampler=sampler) return dloader
def data_sampler(dataset, shuffle, distributed): if distributed: return data.distributed.DistributedSampler(dataset, shuffle=shuffle) if shuffle: return data.RandomSampler(dataset) else: return data.SequentialSampler(dataset)
def __init__(self, dataset, batch_size, device='cpu'): super().__init__() self._dataset = dataset self.batch_size = batch_size self.device = device self._sampler = data.BatchSampler(data.RandomSampler( self._dataset, replacement=False), self.batch_size, False)
def prepare_loaders(args, datasets): collate_fn = collate_fns[args.task_type] loaders = {'source': {}, 'target': {}} loaders['source']['train'] = torchdata.DataLoader( datasets['source']['train'], batch_size=args.batch_size, num_workers=args.nthreads, sampler=torchdata.RandomSampler(datasets['source']['train'], replacement=True), collate_fn=collate_fn, drop_last=True) loaders['target']['labeled'] = torchdata.DataLoader( datasets['target']['labeled'], batch_size=args.batch_size, num_workers=args.nthreads, sampler=torchdata.RandomSampler(datasets['target']['labeled'], replacement=True), collate_fn=collate_fn, drop_last=True) loaders['target']['unlabeled'] = torchdata.DataLoader( datasets['target']['unlabeled'], batch_size=args.batch_size, num_workers=args.nthreads, sampler=torchdata.RandomSampler(datasets['target']['unlabeled'], replacement=True), collate_fn=collate_fn, drop_last=True) loaders['source']['validation'] = torchdata.DataLoader( datasets['source']['validation'], batch_size=1, num_workers=args.nthreads, collate_fn=collate_fn, ) loaders['target']['validation'] = torchdata.DataLoader( datasets['target']['validation'], batch_size=1, num_workers=args.nthreads, collate_fn=collate_fn, ) return loaders
def init_train_loader(args, path_to_source_train, path_to_sampled_train): fmri_train = fastMRIData(path_to_source_train, path_to_sampled_train) if args.random_subset: sampler = torch_data.RandomSampler(fmri_train, replacement=True, num_samples=args.random_subset) train_loader = torch_data.DataLoader(fmri_train, sampler=sampler, batch_size=args.train_batch_size, shuffle=False, num_workers=args.loader_workers) else: train_loader = torch_data.DataLoader(fmri_train, batch_size=args.train_batch_size, shuffle=True, num_workers=args.loader_workers) return train_loader
def run_with_epoch(self): progress_bar = progressbar.ProgressBar(max_value=self.num_epoch) sampler = data.RandomSampler(torch.arange(self.num_data), replacement=True, num_samples=self.num_epoch) self.data_generator = data.DataLoader(mdp_dataset(self), batch_size=self.batch_size, sampler=sampler, num_workers=self.num_workers, drop_last=False) for batch_A_t, batch_b_t, batch_C_t, batch_t_m in self.data_generator: batch_size = batch_t_m.shape[0] for j in range(batch_size): A_t, b_t, C_t, t_m = svrg.get_stoc_data(self, batch_A_t, batch_b_t, batch_C_t, batch_t_m, j) self.theta.sub_(torch.mul(mspbe.mspbe_grad_theta(self.theta, self.omega, A_t, rho=self.rho), self.sigma_theta)) self.omega.sub_(torch.mul(mspbe.mspbe_grad_omega(self.theta, self.omega, A_t, b_t, C_t, self.rho_omega), self.sigma_omega)) self.end_of_epoch() progress_bar.update(self.cur_epoch)
def data_sampler(dataset, shuffle, distributed, weights=None): if distributed: return data.distributed.DistributedSampler(dataset, shuffle=shuffle) if weights is not None: return data.WeightedRandomSampler(weights, len(weights), replacement=True) if shuffle: return data.RandomSampler(dataset) else: return data.SequentialSampler(dataset)
def _split(self, valid_rate, shuffle_seed): self.indices = list(range(self.dataset_size)) random.seed(shuffle_seed) random.shuffle(self.indices) split = int(np.floor((1 - valid_rate) * self.dataset_size)) self.train_indices, self.valid_indices = self.indices[:split], self.indices[split:] self.train_dataset = data.Subset(self, self.train_indices) self.valid_dataset = data.Subset(self, self.valid_indices) self.train_sampler = data.RandomSampler(self.train_dataset) self.valid_sampler = data.SequentialSampler(self.valid_dataset) self.test_sampler = data.SequentialSampler(self)
def _run(self): svrg.load_mdp_data(self) svrg.init_alg(self) full_dataset = mdp_dataset(self) scsg_batch_size = int(self.num_data * self.scsg_batch_size_ratio) geom_dist_p = 1/(scsg_batch_size+1) #rho = 1e-2*mspbe.calc_L_rho(self) if self.terminate_if_less_than_epsilon==False: progress_bar = progressbar.ProgressBar(max_value=self.num_epoch+50) while self.check_termination_cond(): theta_tilde = self.theta.clone() omega_tilde = self.omega.clone() theta_tilde_grad, omega_tilde_grad = self.get_grad_theta_omega_from_batch_abc(self.theta, self.omega, full_dataset, torch.randperm(self.num_data)[:scsg_batch_size], scsg_batch_size, self.rho) torch.cuda.empty_cache() self.num_grad_eval += scsg_batch_size if self.record_per_dataset_pass: self.check_complete_data_pass() if self.use_geometric_dist: inner_loop_epoch = np.random.geometric(geom_dist_p) else: inner_loop_epoch = int(self.num_data * self.scsg_batch_size_ratio) sampler = data.RandomSampler(torch.arange(self.num_data), replacement=True, num_samples=inner_loop_epoch) data_generator = data.DataLoader(full_dataset, batch_size=self.batch_size, sampler=sampler, num_workers=self.num_workers, drop_last=False) for batch_A_t, batch_b_t, batch_C_t, batch_t_m in data_generator: batch_size = batch_t_m.shape[0] for j in range(batch_size): A_t, b_t, C_t, t_m = svrg.get_stoc_data(self, batch_A_t, batch_b_t, batch_C_t, batch_t_m, j) theta_grad = mspbe.mspbe_grad_theta(self.theta, self.omega, A_t, rho=self.rho) + theta_tilde_grad - mspbe.mspbe_grad_theta(theta_tilde, omega_tilde, A_t, rho=self.rho) omega_grad = mspbe.mspbe_grad_omega(self.theta, self.omega, A_t, b_t, C_t, self.rho_omega) + omega_tilde_grad - mspbe.mspbe_grad_omega(theta_tilde,omega_tilde,A_t,b_t,C_t, self.rho_omega) self.theta.sub_(torch.mul(theta_grad, self.sigma_theta)) self.omega.sub_(torch.mul(omega_grad, self.sigma_omega)) self.num_grad_eval += inner_loop_epoch if self.record_per_dataset_pass: self.check_complete_data_pass() if self.record_before_one_pass: self.record_value_before_one_pass() # Temporary mspbe_at_epoch = float(mspbe.calc_mspbe_torch(self, self.rho).cpu().numpy()) print('scsg ratio = '+ str(self.scsg_batch_size_ratio) + ' sigma_theta =' + str(self.sigma_theta) + ' sigma_omega = ' + str(self.sigma_omega) + ' scsg mspbe = %.5f' % (mspbe_at_epoch)) self.end_of_epoch() if self.terminate_if_less_than_epsilon==False: progress_bar.update(self.num_pass) if self.record_per_dataset_pass else progress_bar.update(self.cur_epoch) svrg.end_of_exp(self) #Temporary if self.record_before_one_pass: return {'record_points_before_one_pass':self.record_points_before_one_pass, 'use_geom_dist':self.use_geometric_dist, 'theta':self.theta, 'omega':self.omega, 'result': self.result, 'sigma_theta': self.sigma_theta, 'sigma_omega': self.sigma_omega,'name': self.name, 'scsg_batch_size_ratio':self.scsg_batch_size_ratio, 'record_per_dataset_pass':self.record_per_dataset_pass, 'record_per_epoch':self.record_per_epoch, 'comp_cost':self.num_pass, 'rho': self.rho, 'rho_ac': self.rho_ac} else: return {'use_geom_dist': self.use_geometric_dist, 'theta': self.theta, 'omega': self.omega, 'result': self.result, 'sigma_theta': self.sigma_theta, 'sigma_omega': self.sigma_omega, 'name': self.name, 'scsg_batch_size_ratio': self.scsg_batch_size_ratio, 'record_per_dataset_pass': self.record_per_dataset_pass, 'record_per_epoch': self.record_per_epoch, 'comp_cost': self.num_pass, 'rho': self.rho, 'rho_ac': self.rho_ac}
def data_loader(root, phase, batch_size, tokenizer, config): dataset = load_and_cache_examples(root, tokenizer, config=config, mode=phase) if phase == 'train': sampler = data.RandomSampler(dataset) else: sampler = data.SequentialSampler(dataset) dataloader = data.DataLoader(dataset=dataset, sampler=sampler, batch_size=batch_size) return dataloader # from transformers import AutoTokenizer # dataloader = data_loader('/home/ubuntu/aikorea/sbs/data', 'train', 32, AutoTokenizer.from_pretrained(config.bert_model_name)) # print(len(dataloader))
def get_data(transform, mode='test'): print('Loading data for "%s" ...' % mode) dataset = deepfake_3d(out_dir=args.out_dir, mode=mode, transform=transform) sampler = data.RandomSampler(dataset) if mode == 'test': data_loader = data.DataLoader(dataset, batch_size=1, sampler=sampler, shuffle=False, num_workers=32, pin_memory=True, collate_fn=my_collate) print('"%s" dataset size: %d' % (mode, len(dataset))) return data_loader
def train_network(self, X_inputs, Y_labels): optimizer = optim.Adam(self.neural_network.parameters()) X_inputs = torch.from_numpy(X_inputs).double() Y_labels = torch.from_numpy(Y_labels).double().view(len(Y_labels), 1) self.neural_network.train(True) for iteration in range(self.nb_iters): for batch in tdata.BatchSampler( tdata.RandomSampler(range(len(X_inputs)), replacement=False), batch_size=self.batch_size, drop_last=False): optimizer.zero_grad() with torch.set_grad_enabled(True): outputs = self.neural_network(X_inputs[batch]) loss = nn.MSELoss(reduction="mean")(outputs, Y_labels[batch]) loss.backward() optimizer.step()
def main(self, clevr_dir, preproc_dir, results_loc, log_loc=None): logging.basicConfig(level=logging.INFO) utils.cuda_message() np.printoptions(linewidth=139) clevr_fs = open_fs(clevr_dir, create=False) preproc_fs = open_fs(preproc_dir, create=True) dataset = datasets.TaskDataset(clevr_fs, preproc_fs, "train") total_words = len(dataset.word_ix) + 1 logging.info("Total words: %s", total_words) sampler = data.BatchSampler(data.RandomSampler(dataset), 32, False) net = mac.MACNet(mac.MACRec(12, 512), total_words).to(config.torch_device()) opt = torch.optim.Adam(net.parameters()) if log_loc: now = datetime.datetime.now() log_dir = f"{log_loc}/new-{now}" writer = tensorboardX.SummaryWriter(log_dir) else: writer = None step = 0 rolling_accuracy = 0 for epoch in range(10): bar = tqdm(sampler) for batch_ix in bar: opt.zero_grad() images, qns, qn_lens, answers = dataset[batch_ix] predictions = net(images, qns, qn_lens) loss = functional.cross_entropy(predictions, answers) loss.backward() opt.step() hard_preds = np.argmax(predictions.detach().cpu().numpy(), 1) accuracy = ( hard_preds == answers.detach().cpu().numpy()).mean() if writer is not None: writer.add_scalar("loss", loss.item(), step) writer.add_scalar("accuracy", accuracy, step) rolling_accuracy = rolling_accuracy * 0.95 + accuracy * 0.05 bar.set_description("Accuracy: {}".format(rolling_accuracy)) step += 1
def get_data(transform, mode='train'): print('Loading data for "%s" ...' % mode) if args.dataset == 'k400': use_big_K400 = args.img_dim > 140 dataset = Kinetics400_full_3d(mode=mode, transform=transform, seq_len=args.seq_len, num_seq=args.num_seq, downsample=5, big=use_big_K400) elif args.dataset == 'ucf101': dataset = UCF101_3d(mode=mode, transform=transform, seq_len=args.seq_len, num_seq=args.num_seq, downsample=args.ds) elif args.dataset == 'nturgbd': dataset = NTURGBD_3D(mode=mode, transform=transform, seq_len=args.seq_len, num_seq=args.num_seq, downsample=args.ds, train_csv=args.train_csv, val_csv=args.test_csv) else: raise ValueError('dataset not supported') sampler = data.RandomSampler(dataset) if mode == 'train': data_loader = data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler, shuffle=False, num_workers=32, pin_memory=True, drop_last=True) elif mode == 'val': data_loader = data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler, shuffle=False, num_workers=32, pin_memory=True, drop_last=True) print('"%s" dataset size: %d' % (mode, len(dataset))) return data_loader
def data_iterator(self): while True: if self.training: random.shuffle(self.files) for f_id in range(self.num_files): data_file = self.files[f_id] train_data = BertPretrainingPreprocessedDataset( input_file=data_file, max_pred_length=self.max_pred_length) train_sampler = pt_data.RandomSampler(train_data) train_dataloader = pt_data.DataLoader( dataset=train_data, batch_size=self.batch_size, collate_fn=self._collate_fn, shuffle=train_sampler is None, sampler=train_sampler) for x in train_dataloader: yield x
def data_loader_from_dataset(dset, batch_size=64, num_workers=2, batches_per_epoch=None, random_sample=True, shuffle=False, **kwargs): if random_sample: if batches_per_epoch is None: batches_per_epoch = len(dset) // batch_size dataloader = data.DataLoader(dset, batch_size=batch_size, sampler=data.RandomSampler(dset, replacement=True, num_samples=batches_per_epoch * batch_size), shuffle=shuffle, num_workers=num_workers, **kwargs) else: dataloader = data.DataLoader(dset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, **kwargs) return dataloader
def train_dataset(self, dataset=None, batch_size=32, iters_per_validation=1000, early_stopping=True, early_stopping_patience=10, validation_fraction=0.1, num_workers=2): validation_size = int(len(dataset[0]) * validation_fraction) + 1 training_set = Dataset([d[:-validation_size] for d in dataset]) training_loader = D.DataLoader( training_set, batch_size=batch_size, sampler=D.RandomSampler(training_set, replacement=True), pin_memory=True, num_workers=2) validation_set = Dataset([d[-validation_size:] for d in dataset]) validation_loader = D.DataLoader( validation_set, batch_size=1024, num_workers=2) fail = 0 #states, values, variance, policy, weights loss_avg = 0 idx = 0 while True: for batch in training_loader: idx += 1 l = self.train(batch) loss_avg += l[0] if ( idx + 1 ) % iters_per_validation == 0: l_val = 0 """ for b in validation_loader: l = self.compute_loss(b) l_val += l[0] * len(b[0]) l_val /= validation_size """ print(loss_avg/iters_per_validation, l_val)
def _make_batch_loader(self, batch_size=None, shuffle=None, num_samples=200000): nb_threads = self.nb_threads batch_size = self.batch_size if batch_size is None else batch_size shuffle = self.shuffle if shuffle is None else shuffle if shuffle: sampler = data.RandomSampler(self, replacement=True, num_samples=min(num_samples, len(self))) shuffle = None else: sampler = None batch_loader = data.DataLoader( dataset=self, batch_size=batch_size, shuffle=shuffle, pin_memory=self.pin_memory, num_workers=nb_threads, collate_fn=self.collate_fn, sampler=sampler) return batch_loader
def get_tinyImgNet_train_loader(batch_size, shuffle=True, transform_type='none', bootstrap=-1): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if transform_type == 'all': train_transform = transforms.Compose([ transforms.RandomResizedCrop(size=64, scale=(0.2, 1), ratio=(0.8, 1.2)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) elif transform_type == 'flip': train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) elif transform_type == 'none': train_transform = transforms.Compose( [transforms.ToTensor(), normalize]) else: raise ValueError( "'transform_type' should be 'none', 'flip', or 'all'. Got {}.". format(transform_type)) dset = TINDataset(is_train=True, transform=train_transform) if bootstrap > 0: # NOTE: when using a sampler, 'shuffle' has to be False. sampler = data.RandomSampler(dset, replacement=True, num_samples=int( min(1, bootstrap) * len(dset))) return data.DataLoader(dset, batch_size=batch_size, shuffle=False, sampler=sampler) else: return data.DataLoader(dset, batch_size=batch_size, shuffle=shuffle)
def get_dataloader(dataset, mode, args): print("Creating data loaders") train_sampler = data.RandomSampler(dataset) val_sampler = None if mode == 'train': data_loader = data.DataLoader( dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) elif mode == 'val': data_loader = data.DataLoader( dataset, batch_size=args.batch_size, shuffle=(val_sampler is None), num_workers=args.workers, pin_memory=True, sampler=val_sampler, drop_last=True) elif mode == 'test': data_loader = data.DataLoader( dataset, batch_size=1, shuffle=True, num_workers=args.workers, pin_memory=True) print('"%s" dataset size: %d' % (mode, len(dataset))) return data_loader
def __init__(self, dataset: data.Dataset, mask: bool, batch_size: int, initial_temperature: float, drop_last: bool = False, device='cpu'): super().__init__() self._dataset = dataset self.mask = mask self.batch_size = batch_size self.drop_last = drop_last self.device = device self._temperature = initial_temperature # * TODO: A better than random sampler that take into account sample length self._sampler = data.BatchSampler(data.RandomSampler( self._dataset, replacement=False), batch_size=self.batch_size, drop_last=self.drop_last)
def _setup_dataloader_from_config(self, cfg: DictConfig): if cfg.get("load_from_cached_dataset", False): logging.info('Loading from cached dataset %s' % (cfg.src_file_name)) if cfg.src_file_name != cfg.tgt_file_name: raise ValueError( "src must be equal to target for cached dataset") dataset = pickle.load(open(cfg.src_file_name, 'rb')) dataset.reverse_lang_direction = cfg.get("reverse_lang_direction", False) else: dataset = TranslationDataset( dataset_src=str(Path(cfg.src_file_name).expanduser()), dataset_tgt=str(Path(cfg.tgt_file_name).expanduser()), tokens_in_batch=cfg.tokens_in_batch, clean=cfg.get("clean", False), max_seq_length=cfg.get("max_seq_length", 512), min_seq_length=cfg.get("min_seq_length", 1), max_seq_length_diff=cfg.get("max_seq_length_diff", 512), max_seq_length_ratio=cfg.get("max_seq_length_ratio", 512), cache_ids=cfg.get("cache_ids", False), cache_data_per_node=cfg.get("cache_data_per_node", False), use_cache=cfg.get("use_cache", False), reverse_lang_direction=cfg.get("reverse_lang_direction", False), ) dataset.batchify(self.encoder_tokenizer, self.decoder_tokenizer) if cfg.shuffle: sampler = pt_data.RandomSampler(dataset) else: sampler = pt_data.SequentialSampler(dataset) return torch.utils.data.DataLoader( dataset=dataset, batch_size=1, sampler=sampler, num_workers=cfg.get("num_workers", 2), pin_memory=cfg.get("pin_memory", False), drop_last=cfg.get("drop_last", False), )
def get_data_loader(self, examples, args): features_0 = bert.convert_examples_to_features( [x[0] for x in examples], self.get_labels(), args.max_seq_length, self.tokenizer) features_1 = bert.convert_examples_to_features( [x[1] for x in examples], self.get_labels(), args.max_seq_length, self.tokenizer) features = list(zip(features_0, features_1)) input_ids_0 = torch.tensor([f[0].input_ids for f in features], dtype=torch.long) input_mask_0 = torch.tensor([f[0].input_mask for f in features], dtype=torch.long) segment_ids_0 = torch.tensor([f[0].segment_ids for f in features], dtype=torch.long) input_ids_1 = torch.tensor([f[1].input_ids for f in features], dtype=torch.long) input_mask_1 = torch.tensor([f[1].input_mask for f in features], dtype=torch.long) segment_ids_1 = torch.tensor([f[1].segment_ids for f in features], dtype=torch.long) label_ids = torch.tensor([f[0].label_id for f in features], dtype=torch.long) ids = [x[0].guid for x in examples] tensors = td.TensorDataset( input_ids_0, input_mask_0, segment_ids_0, input_ids_1, input_mask_1, segment_ids_1, label_ids) train_data = ARCTDataset(ids, tensors) if args.local_rank == -1: train_sampler = td.RandomSampler(train_data) else: train_sampler = td.DistributedSampler(train_data) data_loader = td.DataLoader( dataset=train_data, sampler=train_sampler, batch_size=args.train_batch_size, collate_fn=collate) return data_loader
def verka_300w_w2_boot(enc): sum_loss = 0 n = len(LazyLoader.w300().test_dataset) loader = torch_data.DataLoader(LazyLoader.w300().test_dataset, batch_size=16, drop_last=False, sampler=torch_data.RandomSampler( LazyLoader.w300().test_dataset, replacement=True, num_samples=n), num_workers=20) for i, batch in enumerate(loader): data = batch['data'].cuda() landmarks = batch["meta"]["keypts_normalized"].cuda() pred = enc(data)["mes"].coord eye_dist = landmarks[:, 45] - landmarks[:, 36] eye_dist = eye_dist.pow(2).sum(dim=1).sqrt() sum_loss += (OTWasDist().forward(pred, landmarks) / eye_dist).sum().item() # print("test brule_loss: ", sum_loss / n) return sum_loss / n
def create_data_loaders(train_dataset: data.Dataset, val_dataset: data.Dataset, num_workers: int, batch_size: int): logging.info( f'creating dataloaders with {num_workers} workers and a batch-size of {batch_size}' ) fn_dataloader = functools.partial( data.DataLoader, batch_size=batch_size, num_workers=num_workers, collate_fn=collate_fn, pin_memory=True, ) train_loader = fn_dataloader(train_dataset, shuffle=True) train_metrics_sampler = data.RandomSampler(train_dataset, replacement=True, num_samples=len(val_dataset)) train_metrics_loader = fn_dataloader(train_dataset, sampler=train_metrics_sampler) val_metrics_loader = fn_dataloader(val_dataset) return train_loader, train_metrics_loader, val_metrics_loader
def main(args): utils.init_distributed_mode(args) device = torch.device(args.gpus) in_chns = 3 if args.vision_type == 'monochromat': in_chns = 1 elif 'dichromat' in args.vision_type: in_chns = 2 data_reading_kwargs = { 'target_size': args.target_size, 'colour_vision': args.vision_type, 'colour_space': args.colour_space } dataset, num_classes = utils.get_dataset(args.dataset, args.data_dir, 'train', **data_reading_kwargs) json_file_name = os.path.join(args.out_dir, 'args.json') with open(json_file_name, 'w') as fp: json.dump(dict(args._get_kwargs()), fp, sort_keys=True, indent=4) dataset_test, _ = utils.get_dataset(args.dataset, args.data_dir, 'val', **data_reading_kwargs) if args.distributed: train_sampler = torch_dist.DistributedSampler(dataset) test_sampler = torch_dist.DistributedSampler(dataset_test) else: train_sampler = torch_data.RandomSampler(dataset) test_sampler = torch_data.SequentialSampler(dataset_test) data_loader = torch_data.DataLoader(dataset, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers, collate_fn=utils.collate_fn, drop_last=True) data_loader_test = torch_data.DataLoader(dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn) if args.network_name == 'unet': model = segmentation_models.unet.model.Unet( encoder_weights=args.backbone, classes=num_classes) if args.pretrained: print('Loading %s' % args.pretrained) checkpoint = torch.load(args.pretrained, map_location='cpu') remove_keys = [] for key_ind, key in enumerate(checkpoint['state_dict'].keys()): if 'segmentation_head' in key: remove_keys.append(key) for key in remove_keys: del checkpoint['state_dict'][key] model.load_state_dict(checkpoint['state_dict'], strict=False) elif args.custom_arch: print('Custom model!') backbone_name, customs = model_utils.create_custom_resnet( args.backbone, None) if customs is not None: args.backbone = {'arch': backbone_name, 'customs': customs} model = custom_models.__dict__[args.network_name]( args.backbone, num_classes=num_classes, aux_loss=args.aux_loss) if args.pretrained: print('Loading %s' % args.pretrained) checkpoint = torch.load(args.pretrained, map_location='cpu') num_all_keys = len(checkpoint['state_dict'].keys()) remove_keys = [] for key_ind, key in enumerate(checkpoint['state_dict'].keys()): if key_ind > (num_all_keys - 3): remove_keys.append(key) for key in remove_keys: del checkpoint['state_dict'][key] pretrained_weights = OrderedDict( (k.replace('segmentation_model.', ''), v) for k, v in checkpoint['state_dict'].items()) model.load_state_dict(pretrained_weights, strict=False) else: model = seg_models.__dict__[args.network_name]( num_classes=num_classes, aux_loss=args.aux_loss, pretrained=args.pretrained) model.to(device) if args.distributed: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) best_iou = 0 model_progress = [] model_progress_path = os.path.join(args.out_dir, 'model_progress.csv') # loading the model if to eb resumed if args.resume is not None: checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint['model']) best_iou = checkpoint['best_iou'] # if model progress exists, load it if os.path.exists(model_progress_path): model_progress = np.loadtxt(model_progress_path, delimiter=',') model_progress = model_progress.tolist() master_model = model if args.distributed: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpus]) master_model = model.module if args.network_name == 'unet': params_to_optimize = model.parameters() else: params_to_optimize = [ { 'params': [ p for p in master_model.backbone.parameters() if p.requires_grad ] }, { 'params': [ p for p in master_model.classifier.parameters() if p.requires_grad ] }, ] if args.aux_loss: params = [ p for p in master_model.aux_classifier.parameters() if p.requires_grad ] params_to_optimize.append({'params': params, 'lr': args.lr * 10}) optimizer = torch.optim.SGD(params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) lr_lambda = lambda x: (1 - x / (len(data_loader) * args.epochs))**0.9 lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda) criterion = select_criterion(args.dataset) start_time = time.time() for epoch in range(args.initial_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) train_log = train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, args.print_freq) val_confmat = utils.evaluate(model, data_loader_test, device=device, num_classes=num_classes) val_log = val_confmat.get_log_dict() is_best = val_log['iou'] > best_iou best_iou = max(best_iou, val_log['iou']) model_data = { 'epoch': epoch + 1, 'arch': args.network_name, 'customs': { 'aux_loss': args.aux_loss, 'pooling_type': args.pooling_type, 'in_chns': in_chns, 'num_classes': num_classes, 'backbone': args.backbone }, 'state_dict': master_model.state_dict(), 'optimizer': optimizer.state_dict(), 'target_size': args.target_size, 'args': args, 'best_iou': best_iou, } utils.save_on_master(model_data, os.path.join(args.out_dir, 'checkpoint.pth')) if is_best: utils.save_on_master(model_data, os.path.join(args.out_dir, 'model_best.pth')) epoch_prog, header = add_to_progress(train_log, [], '') epoch_prog, header = add_to_progress(val_log, epoch_prog, header, prefix='v_') model_progress.append(epoch_prog) np.savetxt(model_progress_path, np.array(model_progress), delimiter=';', header=header, fmt='%s') total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) print('Training time {}'.format(total_time_str))
def _setup_dataloader_from_config(self, cfg: DictConfig, predict_last_k=0): if cfg.get("use_tarred_dataset", False): if cfg.get("metadata_file") is None: raise FileNotFoundError( "Trying to use tarred data set but could not find metadata path in config." ) else: metadata_file = cfg.get('metadata_file') with open(metadata_file) as metadata_reader: metadata = json.load(metadata_reader) if cfg.get('tar_files') is None: tar_files = metadata.get('tar_files') if tar_files is not None: logging.info( f'Loading from tarred dataset {tar_files}') else: raise FileNotFoundError( "Could not find tarred dataset in config or metadata." ) else: tar_files = cfg.get('tar_files') if metadata.get('tar_files') is not None: raise ValueError( 'Tar files specified in config and in metadata file. Tar files should only be specified once.' ) dataset = TarredSentenceDataset( text_tar_filepaths=tar_files, metadata_path=metadata_file, tokenizer=self.tokenizer, shuffle_n=cfg.get("tar_shuffle_n", 100), shard_strategy=cfg.get("shard_strategy", "scatter"), global_rank=self.global_rank, world_size=self.world_size, ) return torch.utils.data.DataLoader( dataset=dataset, batch_size=1, num_workers=cfg.get("num_workers", 2), pin_memory=cfg.get("pin_memory", False), drop_last=cfg.get("drop_last", False), ) else: dataset = SentenceDataset( tokenizer=self.tokenizer, dataset=cfg.file_name, tokens_in_batch=cfg.tokens_in_batch, clean=cfg.get("clean", False), max_seq_length=cfg.get("max_seq_length", 512), min_seq_length=cfg.get("min_seq_length", 1), cache_ids=cfg.get("cache_ids", False), ) if cfg.shuffle: sampler = pt_data.RandomSampler(dataset) else: sampler = pt_data.SequentialSampler(dataset) return torch.utils.data.DataLoader( dataset=dataset, batch_size=1, sampler=sampler, num_workers=cfg.get("num_workers", 2), pin_memory=cfg.get("pin_memory", False), drop_last=cfg.get("drop_last", False), )
print('System start to load data...') t0 = time() train_data, val_data = data_utils.load_all() t1 = time() print('Data has been loaded successfully, cost:%.4fs' % (t1 - t0)) ########################### TRAINING STAGE ################################## check_dir('%s/train_log' % conf.out_path) log = Logging('%s/train_%s_nrms.log' % (conf.out_path, conf.data_name)) train_model_path = '%s/train_%s_nrms.mod' % (conf.out_path, conf.data_name) # prepare data for the training stage train_dataset = data_utils.TrainData(train_data) val_dataset = data_utils.TestData(val_data) train_batch_sampler = data.BatchSampler(data.RandomSampler( range(train_dataset.length)), batch_size=conf.batch_size, drop_last=False) val_batch_sampler = data.BatchSampler(data.SequentialSampler( range(val_dataset.length)), batch_size=conf.batch_size, drop_last=True) # Start Training !!! max_auc = 0 for epoch in range(1, conf.train_epochs+1): t0 = time() model.train() train_loss = [] count = 0 for batch_idx_list in train_batch_sampler: his_input_title, pred_input_title, labels = \ train_dataset._get_batch(batch_idx_list)