def test_concat_two_non_singletons(self): result = ConcatDataset([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) self.assertEqual(10, len(result)) self.assertEqual(0, result[0]) self.assertEqual(5, result[5])
return BenchmarkTasksets(train_tasks, valid_tasks, test_tasks) if __name__ == '__main__': tasks = get_few_shot_tasksets(dataset='cifar-fs') tasks = get_normal_tasksets(dataset='cifar-fs') # tasks = get_normal_tasksets(dataset='cifar-fc100') # batch = tasks.train.sample() # x, y = batch # print(x.size()) # print(y.size()) # print(y) # x, y = tasks.train[0] # print(x.size()) # print(y.size()) from torch.utils.data import ConcatDataset import torch.utils.data dataset = ConcatDataset([tasks[1], tasks[0]]) # print(len(tasks[0])) # print(len(tasks[1])) # print(len(dataset)) # loader = torch.utils.data.DataLoader(dataset, batch_size=100) # for x, y in loader: # print(x.size(), y.size()) dset = l2l.data.MetaDataset(dataset) tsk = l2l.data.TaskDataset(dset) batch = tsk.sample() print(batch)
#x, y, d, numPedsList, PedsList, target_ids return seq_data, seq_num_persons_list, seq_persons_list, folder_name def __len__(self): # Returns sequence length return len(self.person_to_frames) # Test Block from os import listdir from os.path import isfile, join path = '../data/dataloader/' files_list = [f for f in listdir(path) if isfile(join(path, f))] all_datasets = ConcatDataset([PedTrajectoryDataset(join(path, file)) for file in files_list]) train_loader = DataLoader(all_datasets, batch_size=2, shuffle=False, num_workers=0, pin_memory=False, collate_fn=lambda x: x) #print(len(train_loader.dataset.datasets)) for i, tuple in enumerate(train_loader): print(i) print(tuple) print('************************') print("reached") ''' d = PedTrajectoryDataset('../data/train/overfit/x.txt') batch_size = 1
def kitti_zhou_train(resize_height, resize_width, crop_height, crop_width, batch_size, num_workers): """A loader that loads image sequences for depth training from the kitti training set. This loader returns sequences from the left camera, as well as from the right camera. """ transforms_common = [ tf.RandomHorizontalFlip(), tf.CreateScaledImage(), tf.Resize((resize_height, resize_width), image_types=('color', 'depth', 'camera_intrinsics', 'K')), tf.ConvertDepth(), tf.CreateColoraug(new_element=True), tf.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1, gamma=0.0, fraction=0.5), tf.RemoveOriginals(), tf.ToTensor(), tf.NormalizeZeroMean(), tf.AddKeyValue('domain', 'kitti_zhou_train_depth'), tf.AddKeyValue('purposes', ('depth', 'domain')), ] dataset_name = 'kitti' cfg_common = { 'dataset': dataset_name, 'trainvaltest_split': 'train', 'video_mode': 'video', 'stereo_mode': 'mono', 'split': 'zhou_split', 'video_frames': (0, -1, 1), 'disable_const_items': False } cfg_left = {'keys_to_load': ('color', ), 'keys_to_video': ('color', )} cfg_right = { 'keys_to_load': ('color_right', ), 'keys_to_video': ('color_right', ) } dataset_left = StandardDataset(data_transforms=transforms_common, **cfg_left, **cfg_common) dataset_right = StandardDataset(data_transforms=[tf.ExchangeStereo()] + transforms_common, **cfg_right, **cfg_common) dataset = ConcatDataset((dataset_left, dataset_right)) loader = DataLoader(dataset, batch_size, True, num_workers=num_workers, pin_memory=True, drop_last=True) print( f" - Can use {len(dataset)} images from the kitti (zhou_split) train split for depth training", flush=True) return loader
def train(vocabs, char_vocab, tag_vocab, train_sets, dev_sets, test_sets, unlabeled_sets): """ train_sets, dev_sets, test_sets: dict[lang] -> AmazonDataset For unlabeled langs, no train_sets are available """ # dataset loaders train_loaders, unlabeled_loaders = {}, {} train_iters, unlabeled_iters, d_unlabeled_iters = {}, {}, {} dev_loaders, test_loaders = {}, {} my_collate = utils.sorted_collate if opt.model == 'lstm' else utils.unsorted_collate for lang in opt.langs: train_loaders[lang] = DataLoader(train_sets[lang], opt.batch_size, shuffle=True, collate_fn=my_collate) train_iters[lang] = iter(train_loaders[lang]) for lang in opt.dev_langs: dev_loaders[lang] = DataLoader(dev_sets[lang], opt.batch_size, shuffle=False, collate_fn=my_collate) test_loaders[lang] = DataLoader(test_sets[lang], opt.batch_size, shuffle=False, collate_fn=my_collate) for lang in opt.all_langs: if lang in opt.unlabeled_langs: uset = unlabeled_sets[lang] else: # for labeled langs, consider which data to use as unlabeled set if opt.unlabeled_data == 'both': uset = ConcatDataset([train_sets[lang], unlabeled_sets[lang]]) elif opt.unlabeled_data == 'unlabeled': uset = unlabeled_sets[lang] elif opt.unlabeled_data == 'train': uset = train_sets[lang] else: raise Exception( f'Unknown options for the unlabeled data usage: {opt.unlabeled_data}' ) unlabeled_loaders[lang] = DataLoader(uset, opt.batch_size, shuffle=True, collate_fn=my_collate) unlabeled_iters[lang] = iter(unlabeled_loaders[lang]) d_unlabeled_iters[lang] = iter(unlabeled_loaders[lang]) # embeddings emb = MultiLangWordEmb(vocabs, char_vocab, opt.use_wordemb, opt.use_charemb).to(opt.device) # models F_s = None F_p = None C, D = None, None num_experts = len(opt.langs) + 1 if opt.expert_sp else len(opt.langs) if opt.model.lower() == 'lstm': if opt.shared_hidden_size > 0: F_s = LSTMFeatureExtractor(opt.total_emb_size, opt.F_layers, opt.shared_hidden_size, opt.word_dropout, opt.dropout, opt.bdrnn) if opt.private_hidden_size > 0: if not opt.concat_sp: assert opt.shared_hidden_size == opt.private_hidden_size, "shared dim != private dim when using add_sp!" F_p = nn.Sequential( LSTMFeatureExtractor(opt.total_emb_size, opt.F_layers, opt.private_hidden_size, opt.word_dropout, opt.dropout, opt.bdrnn), MixtureOfExperts(opt.MoE_layers, opt.private_hidden_size, len(opt.langs), opt.private_hidden_size, opt.private_hidden_size, opt.dropout, opt.MoE_bn, False)) else: raise Exception(f'Unknown model architecture {opt.model}') if opt.C_MoE: C = SpMixtureOfExperts( opt.C_layers, opt.shared_hidden_size, opt.private_hidden_size, opt.concat_sp, num_experts, opt.shared_hidden_size + opt.private_hidden_size, len(tag_vocab), opt.mlp_dropout, opt.C_bn) else: C = SpMlpTagger(opt.C_layers, opt.shared_hidden_size, opt.private_hidden_size, opt.concat_sp, opt.shared_hidden_size + opt.private_hidden_size, len(tag_vocab), opt.mlp_dropout, opt.C_bn) if opt.shared_hidden_size > 0 and opt.n_critic > 0: if opt.D_model.lower() == 'lstm': d_args = { 'num_layers': opt.D_lstm_layers, 'input_size': opt.shared_hidden_size, 'hidden_size': opt.shared_hidden_size, 'word_dropout': opt.D_word_dropout, 'dropout': opt.D_dropout, 'bdrnn': opt.D_bdrnn, 'attn_type': opt.D_attn } elif opt.D_model.lower() == 'cnn': d_args = { 'num_layers': 1, 'input_size': opt.shared_hidden_size, 'hidden_size': opt.shared_hidden_size, 'kernel_num': opt.D_kernel_num, 'kernel_sizes': opt.D_kernel_sizes, 'word_dropout': opt.D_word_dropout, 'dropout': opt.D_dropout } else: d_args = None if opt.D_model.lower() == 'mlp': D = MLPLanguageDiscriminator(opt.D_layers, opt.shared_hidden_size, opt.shared_hidden_size, len(opt.all_langs), opt.loss, opt.D_dropout, opt.D_bn) else: D = LanguageDiscriminator(opt.D_model, opt.D_layers, opt.shared_hidden_size, opt.shared_hidden_size, len(opt.all_langs), opt.D_dropout, opt.D_bn, d_args) F_s, C, D = F_s.to(opt.device) if F_s else None, C.to( opt.device), D.to(opt.device) if D else None if F_p: F_p = F_p.to(opt.device) # optimizers optimizer = optim.Adam(filter(lambda p: p.requires_grad, itertools.chain(*map(list, [emb.parameters(), F_s.parameters() if F_s else [], \ C.parameters(), F_p.parameters() if F_p else []]))), lr=opt.learning_rate, weight_decay=opt.weight_decay) if D: optimizerD = optim.Adam(D.parameters(), lr=opt.D_learning_rate, weight_decay=opt.D_weight_decay) # testing if opt.test_only: log.info(f'Loading model from {opt.model_save_file}...') if F_s: F_s.load_state_dict( torch.load(os.path.join(opt.model_save_file, f'netF_s.pth'))) for lang in opt.all_langs: F_p.load_state_dict( torch.load(os.path.join(opt.model_save_file, f'net_F_p.pth'))) C.load_state_dict( torch.load(os.path.join(opt.model_save_file, f'netC.pth'))) if D: D.load_state_dict( torch.load(os.path.join(opt.model_save_file, f'netD.pth'))) log.info('Evaluating validation sets:') acc = {} log.info(dev_loaders) log.info(vocabs) for lang in opt.all_langs: acc[lang] = evaluate(f'{lang}_dev', dev_loaders[lang], vocabs[lang], tag_vocab, emb, lang, F_s, F_p, C) avg_acc = sum([acc[d] for d in opt.dev_langs]) / len(opt.dev_langs) log.info(f'Average validation accuracy: {avg_acc}') log.info('Evaluating test sets:') test_acc = {} for lang in opt.all_langs: test_acc[lang] = evaluate(f'{lang}_test', test_loaders[lang], vocabs[lang], tag_vocab, emb, lang, F_s, F_p, C) avg_test_acc = sum([test_acc[d] for d in opt.dev_langs]) / len(opt.dev_langs) log.info(f'Average test accuracy: {avg_test_acc}') return {'valid': acc, 'test': test_acc} # training best_acc, best_avg_acc = defaultdict(float), 0.0 epochs_since_decay = 0 # lambda scheduling if opt.lambd > 0 and opt.lambd_schedule: opt.lambd_orig = opt.lambd num_iter = int(utils.gmean([len(train_loaders[l]) for l in opt.langs])) # adapt max_epoch if opt.max_epoch > 0 and num_iter * opt.max_epoch < 15000: opt.max_epoch = 15000 // num_iter log.info(f"Setting max_epoch to {opt.max_epoch}") for epoch in range(opt.max_epoch): emb.train() if F_s: F_s.train() C.train() if D: D.train() if F_p: F_p.train() # lambda scheduling if hasattr(opt, 'lambd_orig') and opt.lambd_schedule: if epoch == 0: opt.lambd = opt.lambd_orig elif epoch == 5: opt.lambd = 10 * opt.lambd_orig elif epoch == 15: opt.lambd = 100 * opt.lambd_orig log.info(f'Scheduling lambda = {opt.lambd}') # training accuracy correct, total = defaultdict(int), defaultdict(int) gate_correct = defaultdict(int) c_gate_correct = defaultdict(int) # D accuracy d_correct, d_total = 0, 0 for i in tqdm(range(num_iter), ascii=True): # D iterations if opt.shared_hidden_size > 0: utils.freeze_net(emb) utils.freeze_net(F_s) utils.freeze_net(F_p) utils.freeze_net(C) utils.unfreeze_net(D) # WGAN n_critic trick since D trains slower n_critic = opt.n_critic if opt.wgan_trick: if opt.n_critic > 0 and ((epoch == 0 and i < 25) or i % 500 == 0): n_critic = 100 for _ in range(n_critic): D.zero_grad() loss_d = {} lang_features = {} # train on both labeled and unlabeled langs for lang in opt.all_langs: # targets not used d_inputs, _ = utils.endless_get_next_batch( unlabeled_loaders, d_unlabeled_iters, lang) d_inputs, d_lengths, mask, d_chars, d_char_lengths = d_inputs d_embeds = emb(lang, d_inputs, d_chars, d_char_lengths) shared_feat = F_s((d_embeds, d_lengths)) if opt.grad_penalty != 'none': lang_features[lang] = shared_feat.detach() if opt.D_model.lower() == 'mlp': d_outputs = D(shared_feat) # if token-level D, we can reuse the gate label generator d_targets = utils.get_gate_label(d_outputs, lang, mask, False, all_langs=True) d_total += torch.sum(d_lengths).item() else: d_outputs = D((shared_feat, d_lengths)) d_targets = utils.get_lang_label( opt.loss, lang, len(d_lengths)) d_total += len(d_lengths) # D accuracy _, pred = torch.max(d_outputs, -1) # d_total += len(d_lengths) d_correct += (pred == d_targets).sum().item() l_d = functional.nll_loss(d_outputs.view( -1, D.num_langs), d_targets.view(-1), ignore_index=-1) l_d.backward() loss_d[lang] = l_d.item() # gradient penalty if opt.grad_penalty != 'none': gp = utils.calc_gradient_penalty( D, lang_features, onesided=opt.onesided_gp, interpolate=(opt.grad_penalty == 'wgan')) gp.backward() optimizerD.step() # F&C iteration utils.unfreeze_net(emb) if opt.use_wordemb and opt.fix_emb: for lang in emb.langs: emb.wordembs[lang].weight.requires_grad = False if opt.use_charemb and opt.fix_charemb: emb.charemb.weight.requires_grad = False utils.unfreeze_net(F_s) utils.unfreeze_net(F_p) utils.unfreeze_net(C) utils.freeze_net(D) emb.zero_grad() if F_s: F_s.zero_grad() if F_p: F_p.zero_grad() C.zero_grad() # optimizer.zero_grad() for lang in opt.langs: inputs, targets = utils.endless_get_next_batch( train_loaders, train_iters, lang) inputs, lengths, mask, chars, char_lengths = inputs bs, seq_len = inputs.size() embeds = emb(lang, inputs, chars, char_lengths) shared_feat, private_feat = None, None if opt.shared_hidden_size > 0: shared_feat = F_s((embeds, lengths)) if opt.private_hidden_size > 0: private_feat, gate_outputs = F_p((embeds, lengths)) if opt.C_MoE: c_outputs, c_gate_outputs = C((shared_feat, private_feat)) else: c_outputs = C((shared_feat, private_feat)) # targets are padded with -1 l_c = functional.nll_loss(c_outputs.view(bs * seq_len, -1), targets.view(-1), ignore_index=-1) # gate loss if F_p: gate_targets = utils.get_gate_label( gate_outputs, lang, mask, False) l_gate = functional.cross_entropy(gate_outputs.view( bs * seq_len, -1), gate_targets.view(-1), ignore_index=-1) l_c += opt.gate_loss_weight * l_gate _, gate_pred = torch.max( gate_outputs.view(bs * seq_len, -1), -1) gate_correct[lang] += ( gate_pred == gate_targets.view(-1)).sum().item() if opt.C_MoE and opt.C_gate_loss_weight > 0: c_gate_targets = utils.get_gate_label( c_gate_outputs, lang, mask, opt.expert_sp) _, c_gate_pred = torch.max( c_gate_outputs.view(bs * seq_len, -1), -1) if opt.expert_sp: l_c_gate = functional.binary_cross_entropy_with_logits( mask.unsqueeze(-1) * c_gate_outputs, c_gate_targets) c_gate_correct[lang] += torch.index_select( c_gate_targets.view(bs * seq_len, -1), -1, c_gate_pred.view(bs * seq_len)).sum().item() else: l_c_gate = functional.cross_entropy( c_gate_outputs.view(bs * seq_len, -1), c_gate_targets.view(-1), ignore_index=-1) c_gate_correct[lang] += (c_gate_pred == c_gate_targets. view(-1)).sum().item() l_c += opt.C_gate_loss_weight * l_c_gate l_c.backward() _, pred = torch.max(c_outputs, -1) total[lang] += torch.sum(lengths).item() correct[lang] += (pred == targets).sum().item() # update F with D gradients on all langs if D: for lang in opt.all_langs: inputs, _ = utils.endless_get_next_batch( unlabeled_loaders, unlabeled_iters, lang) inputs, lengths, mask, chars, char_lengths = inputs embeds = emb(lang, inputs, chars, char_lengths) shared_feat = F_s((embeds, lengths)) # d_outputs = D((shared_feat, lengths)) if opt.D_model.lower() == 'mlp': d_outputs = D(shared_feat) # if token-level D, we can reuse the gate label generator d_targets = utils.get_gate_label(d_outputs, lang, mask, False, all_langs=True) else: d_outputs = D((shared_feat, lengths)) d_targets = utils.get_lang_label( opt.loss, lang, len(lengths)) l_d = functional.nll_loss(d_outputs.view(-1, D.num_langs), d_targets.view(-1), ignore_index=-1) if opt.lambd > 0: l_d *= -opt.lambd l_d.backward() optimizer.step() # end of epoch log.info('Ending epoch {}'.format(epoch + 1)) if d_total > 0: log.info('D Training Accuracy: {}%'.format(100.0 * d_correct / d_total)) log.info('Training accuracy:') log.info('\t'.join(opt.langs)) log.info('\t'.join( [str(100.0 * correct[d] / total[d]) for d in opt.langs])) log.info('Gate accuracy:') log.info('\t'.join( [str(100.0 * gate_correct[d] / total[d]) for d in opt.langs])) log.info('Tagger Gate accuracy:') log.info('\t'.join( [str(100.0 * c_gate_correct[d] / total[d]) for d in opt.langs])) log.info('Evaluating validation sets:') acc = {} for lang in opt.dev_langs: acc[lang] = evaluate(f'{lang}_dev', dev_loaders[lang], vocabs[lang], tag_vocab, emb, lang, F_s, F_p, C) avg_acc = sum([acc[d] for d in opt.dev_langs]) / len(opt.dev_langs) log.info(f'Average validation accuracy: {avg_acc}') log.info('Evaluating test sets:') test_acc = {} for lang in opt.dev_langs: test_acc[lang] = evaluate(f'{lang}_test', test_loaders[lang], vocabs[lang], tag_vocab, emb, lang, F_s, F_p, C) avg_test_acc = sum([test_acc[d] for d in opt.dev_langs]) / len(opt.dev_langs) log.info(f'Average test accuracy: {avg_test_acc}') if avg_acc > best_avg_acc: epochs_since_decay = 0 log.info(f'New best average validation accuracy: {avg_acc}') best_acc['valid'] = acc best_acc['test'] = test_acc best_avg_acc = avg_acc with open(os.path.join(opt.model_save_file, 'options.pkl'), 'wb') as ouf: pickle.dump(opt, ouf) if F_s: torch.save(F_s.state_dict(), '{}/netF_s.pth'.format(opt.model_save_file)) torch.save(emb.state_dict(), '{}/net_emb.pth'.format(opt.model_save_file)) if F_p: torch.save(F_p.state_dict(), '{}/net_F_p.pth'.format(opt.model_save_file)) torch.save(C.state_dict(), '{}/netC.pth'.format(opt.model_save_file)) if D: torch.save(D.state_dict(), '{}/netD.pth'.format(opt.model_save_file)) else: epochs_since_decay += 1 if opt.lr_decay < 1 and epochs_since_decay >= opt.lr_decay_epochs: epochs_since_decay = 0 old_lr = optimizer.param_groups[0]['lr'] optimizer.param_groups[0]['lr'] = old_lr * opt.lr_decay log.info(f'Decreasing LR to {old_lr * opt.lr_decay}') # end of training log.info(f'Best average validation accuracy: {best_avg_acc}') return best_acc
def train(self, net, samples, optimizer, e): alpha = 2 * max(0, ((50 - e) / 50)) criterion = losses.ELULovaszFocalWithLogitsLoss(alpha, 2 - alpha) transforms = generator.TransformationsGenerator([ random.RandomFlipLr(), random.RandomAffine(image_size=101, translation=lambda rs: (rs.randint(-20, 20), rs.randint(-20, 20)), scale=lambda rs: (rs.uniform(0.85, 1.15), 1), **utils.transformations_options) ]) samples_aux = list( set(samples).intersection(set(utils.get_aux_samples()))) dataset_aux = datasets.ImageDataset(samples_aux, settings.train, transforms) dataset_pseudo = datasets.SemiSupervisedImageDataset( samples_test, settings.test, transforms, size=len(samples_test), test_predictions=self.test_predictions, momentum=0.0) dataset = datasets.ImageDataset(samples, settings.train, transforms) weight_train = len(dataset_pseudo) / len(dataset) * 2 weight_aux = weight_train / 2 weights = [weight_train] * len(dataset) + [weight_aux] * len( dataset_aux) + [1] * len(dataset_pseudo) dataloader = DataLoader( ConcatDataset([dataset, dataset_aux, dataset_pseudo]), num_workers=10, batch_size=16, sampler=WeightedRandomSampler(weights=weights, num_samples=3200)) average_meter_train = meters.AverageMeter() with tqdm(total=len(dataloader), leave=False, ascii=True) as pbar, torch.enable_grad(): net.train() padding = tta.Pad((13, 14, 13, 14)) for images, masks_targets in dataloader: masks_targets = masks_targets.to(gpu) masks_predictions = padding.transform_backward( net(padding.transform_forward(images))).contiguous() loss = criterion(masks_predictions, masks_targets) loss.backward() optimizer.step() optimizer.zero_grad() average_meter_train.add('loss', loss.item()) self.update_pbar(torch.sigmoid(masks_predictions), masks_targets, pbar, average_meter_train, 'Training epoch {}'.format(e)) train_stats = { 'train_' + k: v for k, v in average_meter_train.get_all().items() } return train_stats
set1 = subDataSet(wav_filename, meta_filename, crop_duration_s=crop_duration_s, transform=composed) specific_name = "split1_ir0_ov1_2" wav_filename = os.path.join(wav_dir, specific_name + ".wav") meta_filename = os.path.join(meta_dir, specific_name + ".csv") set2 = subDataSet(wav_filename, meta_filename, crop_duration_s=crop_duration_s, transform=composed) datasets.append(set1) datasets.append(set2) dataset = ConcatDataset(datasets) print(dataset[0]['specgram'].size()) print(dataset[23]['specgram'].size()) print(os.listdir(wav_dir).__len__()) # end region # region fulltest wav_dir = "C:\\Users\\jgohj\\PycharmProjects\\Jon\\data\\mic_dev_test" meta_dir = "C:\\Users\\jgohj\\PycharmProjects\\Jon\\data\\metadata_dev" wav_list = os.listdir((wav_dir)) composed = transforms.Compose([Spectrogram1(), Binarize()]) melcomposed = transforms.Compose([MelSpectrogram(), Binarize()]) datasets = [] meldatasets = [] print("Creating Full Data set...")
def _get_dataset(self, filename, dicts=None): if not filename and not dicts: raise ValueError("You must either supply `filename` or `dicts`") # loading dicts from file (default) if dicts is None: dicts = list(self.processor.file_to_dicts(filename)) #shuffle list of dicts here if we later want to have a random dev set splitted from train set if str(self.processor.train_filename) in str(filename): if not self.processor.dev_filename: if self.processor.dev_split > 0.0: random.shuffle(dicts) num_dicts = len(dicts) multiprocessing_chunk_size, num_cpus_used = calc_chunksize( num_dicts=num_dicts, max_processes=self.max_processes, max_chunksize=self.max_multiprocessing_chunksize, ) with ExitStack() as stack: if self.max_processes > 1: # use multiprocessing only when max_processes > 1 p = stack.enter_context(mp.Pool(processes=num_cpus_used)) logger.info( f"Got ya {num_cpus_used} parallel workers to convert {num_dicts} dictionaries " f"to pytorch datasets (chunksize = {multiprocessing_chunk_size})..." ) log_ascii_workers(num_cpus_used, logger) results = p.imap( partial(self._dataset_from_chunk, processor=self.processor), grouper(dicts, multiprocessing_chunk_size), chunksize=1, ) else: logger.info( f"Multiprocessing disabled, using a single worker to convert {num_dicts}" f"dictionaries to pytorch datasets.") results = map( partial(self._dataset_from_chunk, processor=self.processor), grouper(dicts, num_dicts)) datasets = [] desc = f"Preprocessing Dataset" if filename: desc += f" {filename}" with tqdm(total=len(dicts), unit=' Dicts', desc=desc) as pbar: for dataset, tensor_names in results: datasets.append(dataset) # update progress bar (last step can have less dicts than actual chunk_size) pbar.update( min(multiprocessing_chunk_size, pbar.total - pbar.n)) # _dataset_from_chunk can return a None in cases where downsampling has occurred datasets = [d for d in datasets if d] concat_datasets = ConcatDataset(datasets) return concat_datasets, tensor_names
def _make_question_answering(cls, datasilo, sets=["train", "dev", "test"], n_splits=5, shuffle=True, random_state=None, n_neg_answers_per_question=1): """ Create number of folds data-silo-like objects which can be used for training from the original data silo passed on. This function takes into account the characteristics of the data for question-answering- :param datasilo: the data silo that contains the original data :type datasilo: DataSilo :param sets: which sets to use to create the xval folds (strings) :type sets: list :param n_splits: number of folds to create :type n_splits: int :param shuffle: shuffle each class' samples before splitting :type shuffle: bool :param random_state: random state for shuffling :type random_state: int :param n_neg_answers_per_question: number of negative answers per question to include for training :type n_neg_answers_per_question: int """ assert "id" in datasilo.tensor_names, f"Expected tensor 'id' in tensor names, found {datasilo.tensor_names}" assert "labels" in datasilo.tensor_names, f"Expected tensor 'labels' in tensor names, found {datasilo.tensor_names}" id_index = datasilo.tensor_names.index("id") label_index = datasilo.tensor_names.index("labels") sets_to_concat = [] for setname in sets: if datasilo.data[setname]: sets_to_concat.extend(datasilo.data[setname]) all_data = ConcatDataset(sets_to_concat) documents = [] keyfunc = lambda x: x[id_index][0] all_data = sorted(all_data.datasets, key=keyfunc) for key, document in groupby(all_data, key=keyfunc): documents.append(list(document)) xval_split = cls._split_for_qa( documents=documents, id_index=id_index, n_splits=n_splits, shuffle=shuffle, random_state=random_state, ) silos = [] for train_set, test_set in xval_split: # Each training set is further divided into actual train and dev set if datasilo.processor.dev_split > 0: dev_split = datasilo.processor.dev_split n_dev = int(np.ceil(dev_split * len(train_set))) assert n_dev > 0, f"dev split of {dev_split} is not large enough to split away a development set" n_actual_train = len(train_set) - n_dev actual_train_set = train_set[:n_actual_train] dev_set = train_set[n_actual_train:] ds_dev = [ sample for document in dev_set for sample in document ] else: ds_dev = None actual_train_set = train_set train_samples = [] for doc in actual_train_set: keyfunc = lambda x: x[id_index][1] doc = sorted(doc, key=keyfunc) for key, question in groupby(doc, key=keyfunc): # add all available answrs to train set sample_list = list(question) neg_answer_idx = [] for index, sample in enumerate(sample_list): if sample[label_index][0][0] or sample[label_index][0][ 1]: train_samples.append(sample) else: neg_answer_idx.append(index) # add random n_neg_answers_per_question samples to train set if len(neg_answer_idx) <= n_neg_answers_per_question: train_samples.extend( [sample_list[idx] for idx in neg_answer_idx]) else: neg_answer_idx = random.sample( neg_answer_idx, n_neg_answers_per_question) train_samples.extend( [sample_list[idx] for idx in neg_answer_idx]) ds_train = train_samples ds_test = [sample for document in test_set for sample in document] silos.append( DataSiloForCrossVal(datasilo, ds_train, ds_dev, ds_test)) return silos
def main(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument( "--data_dir", default=None, type=str, required=True, help= "The input data dir. Should contain the .tsv files (or other data files) for the task.", ) parser.add_argument( "--pkl_dir", default=None, type=str, help="The pkl data dir for training set logits.", ) parser.add_argument( "--model_type", default=None, type=str, required=True, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()), ) parser.add_argument( "--model_name_or_path", default=None, type=str, required=True, help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), ) parser.add_argument( "--data_cache_name", default=None, type=str, help="The name of cached data", ) parser.add_argument( "--language", default=None, type=str, required=True, help= "Evaluation language. Also train language if `train_language` is set to None.", ) parser.add_argument("--benchmark", default='xtreme', type=str, help="benchmark, xglue/xtreme") parser.add_argument( "--train_language", default=None, type=str, help="Train language if is different of the evaluation language.") parser.add_argument("--sample_ratio", default=0.0, type=float, help="The training sample ratio of each language") parser.add_argument( "--task_name", default=None, type=str, required=True, help="The name of the task to train selected in the list: " + ", ".join(processors.keys()), ) parser.add_argument( "--output_dir", default=None, type=str, required=True, help= "The output directory where the model predictions and checkpoints will be written.", ) # Other parameters parser.add_argument("--log_dir", default=None, type=str, help="The output log dir.") # Other parameters parser.add_argument( "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name") parser.add_argument("--gpu_id", default=None, type=str, help="GPU id") parser.add_argument("--filter_k", type=int, default=0) parser.add_argument("--filter_m", type=int, default=0) parser.add_argument("--first_loss_only", action='store_true') parser.add_argument("--use_eng_logits", action='store_true', help='use english soft logits for other language') parser.add_argument("--alpha", type=float, default=0, help='alpha for kd loss') parser.add_argument("--temperature", type=float, default=0.1, help="temprature to soft logits") parser.add_argument( "--tokenizer_name", default="", type=str, help="Pretrained tokenizer name or path if not the same as model_name", ) parser.add_argument( "--cache_dir", default="", type=str, help= "Where do you want to store the pre-trained models downloaded from s3", ) parser.add_argument( "--hidden_dropout_prob", default=0.1, type=float, help= "When splitting up a long document into chunks, how much stride to take between chunks.", ) parser.add_argument( "--max_seq_length", default=128, type=int, help= "The maximum total input sequence length after tokenization. Sequences longer " "than this will be truncated, sequences shorter will be padded.", ) parser.add_argument("--do_train", action='store_true', help="Whether to run training.") parser.add_argument("--do_eval", action='store_true', help="Whether to run eval on the test set.") parser.add_argument("--eval_checkpoints", type=str, default=None, help="evaluation checkpoints") parser.add_argument("--eval_splits", default='valid', type=str, help="eval splits") parser.add_argument("--eval_train", action='store_true', help="eval splits") parser.add_argument("--pkl_index", default="0", type=str, help="pickle index for dumping training logits") parser.add_argument( "--evaluate_during_training", action="store_true", help="Rul evaluation during training at each logging step.") parser.add_argument( "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model.") parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation.") parser.add_argument( "--gradient_accumulation_steps", type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass.", ) parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") parser.add_argument("--use_all_samples_per_epoch", type=boolean_string, default='true', help="Use all samples for per epoch training") parser.add_argument("--max_train_samples_per_epoch", default=None, type=int, help="Total number of training epochs to perform.") parser.add_argument( "--max_steps", default=-1, type=int, help= "If > 0: set total number of training steps to perform. Override num_train_epochs.", ) parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument("--logging_steps", type=int, default=-1, help="Log every X updates steps.") parser.add_argument("--logging_each_epoch", action="store_true", help="Whether to log after each epoch.") parser.add_argument("--logging_steps_in_sample", type=int, default=-1, help="log every X samples.") parser.add_argument("--save_steps", type=int, default=-1, help="Save checkpoint every X updates steps.") parser.add_argument( "--eval_all_checkpoints", action="store_true", help= "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number", ) parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available") parser.add_argument("--overwrite_output_dir", action="store_true", help="Overwrite the content of the output directory") parser.add_argument( "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets") parser.add_argument("--seed", type=int, default=52, help="random seed for initialization") parser.add_argument( "--fp16", action="store_true", help= "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit", ) parser.add_argument( "--fp16_opt_level", type=str, default="O1", help= "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "See details at https://nvidia.github.io/apex/amp.html", ) parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") parser.add_argument("--server_ip", type=str, default="", help="For distant debugging.") parser.add_argument("--server_port", type=str, default="", help="For distant debugging.") args = parser.parse_args() if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir): raise ValueError( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) # Setup distant debugging if needed if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() # Setup CUDA, GPU & distributed training if args.gpu_id: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") args.n_gpu = 1 args.device = device if args.pkl_dir is None: args.pkl_dir = args.data_dir # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16, ) logger.info("Training/evaluation parameters %s", args) # preprocess args assert not (args.logging_steps != -1 and args.logging_steps_in_sample != -1 ), "these two parameters can't both be setted" if args.logging_steps == -1 and args.logging_steps_in_sample != -1: total_batch_size = args.n_gpu * args.per_gpu_train_batch_size * args.gradient_accumulation_steps args.logging_steps = args.logging_steps_in_sample // total_batch_size # Set seed set_seed(args) if args.task_name not in processors: raise ValueError("Task not found: %s" % (args.task_name)) processor = processors[args.task_name](language=args.language, train_language=args.train_language, benchmark=args.benchmark) args.output_mode = output_modes[args.task_name] label_list = processor.get_labels() num_labels = len(label_list) # Load pretrained model and tokenizer if args.local_rank not in [-1, 0]: torch.distributed.barrier( ) # Make sure only the first process in distributed training will download model & vocab args.model_type = args.model_type.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained( args.config_name if args.config_name else args.model_name_or_path, num_labels=num_labels, finetuning_task=args.task_name, cache_dir=args.cache_dir if args.cache_dir else None, ) if args.filter_k > 0: # there is cross attention layer config.first_loss_only = args.first_loss_only config.alpha = args.alpha config.temperature = args.temperature config.filter_m = args.filter_m config.hidden_dropout_prob = args.hidden_dropout_prob config.output_hidden_states = True config.filter_k = min(args.filter_k, config.num_hidden_layers - args.filter_m) config.num_hidden_layers = args.filter_m tokenizer = tokenizer_class.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None, ) model = model_class.from_pretrained( args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, cache_dir=args.cache_dir if args.cache_dir else None, filter_m=args.filter_m, filter_k=args.filter_k, ) if args.local_rank == 0: torch.distributed.barrier( ) # Make sure only the first process in distributed training will download model & vocab model.to(args.device) # Training if args.do_train: # Create output directory if needed if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) if args.filter_k > 0: # FILTER train_langs = [ "en-{}".format(lang) for lang in args.language.split(',') ] else: train_langs = args.train_language.split(',') dataset_list = [] for lang in train_langs: lg_train_dataset, guids = load_and_cache_examples(args, args.task_name, tokenizer, lang, split="train") dataset_list.append(lg_train_dataset) if args.filter_k > 0: train_dataset = AlignDataset( dataset_list, train_langs.index('en-en'), is_training=True, use_all_samples=args.use_all_samples_per_epoch) else: train_dataset = ConcatDataset(dataset_list) global_step, tr_loss = train(args, train_dataset, label_list, model, tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0): logger.info("Saving model checkpoint to %s", args.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` model_to_save = (model.module if hasattr(model, "module") else model ) # Take care of distributed/parallel training model_to_save.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir) # Good practice: save your training arguments together with the trained model torch.save(args, os.path.join(args.output_dir, "training_args.bin")) # Evaluation if args.do_eval and args.local_rank in [-1, 0]: results = {} if args.eval_checkpoints: checkpoints = [ os.path.join(args.output_dir, ckpt) for ckpt in args.eval_checkpoints.split(',') ] else: checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME, recursive=True))) logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logger.info("Evaluate the following checkpoints: %s", checkpoints) best_avg, best_checkpoint = 0, None task_metric = "acc" if args.task_name != "rel" else "ndcg" for checkpoint in checkpoints: prefix = checkpoint.split("-")[-1] if len(checkpoints) > 1 else "" tokenizer = tokenizer_class.from_pretrained( checkpoint, do_lower_case=args.do_lower_case) model = model_class.from_pretrained(checkpoint) model.to(args.device) result = evaluate(args, model, tokenizer, label_list, prefix=prefix, splits=args.eval_splits.split(',')) results[os.path.basename(checkpoint)] = result logger.info("{}\t{}".format(checkpoint, result)) if best_avg < result["valid_avg"][task_metric]: best_avg = result["valid_avg"][task_metric] best_checkpoint = checkpoint with open(os.path.join(args.output_dir, "eval_logs.txt"), 'w') as log_writer: for key, val in results.items(): log_writer.write("{}\t{}\n".format(key, json.dumps(val))) if args.eval_train and args.local_rank in [-1, 0]: if args.eval_checkpoints: # use the first one checkpoint = [ os.path.join(args.output_dir, ckpt) for ckpt in args.eval_checkpoints.split(',') ][0] else: checkpoint = os.path.join(args.output_dir, 'checkpoint-best') assert os.path.exists(checkpoint) model = model_class.from_pretrained(checkpoint) model.to(args.device) evaluate(args, model, tokenizer, label_list, prefix="", splits=['train']) logger.info("Task {0} finished!".format(args.task_name))
os.makedirs(args.checkpoint_folder) logging.info("Prepare training datasets.") datasets = [] for dataset_path in args.datasets: if args.dataset_type == 'voc': dataset = VOCDataset(dataset_path, transform=train_transform, target_transform=target_transform) label_file = os.path.join(args.checkpoint_folder, "voc-model-labels.txt") store_labels(label_file, dataset.class_names) num_classes = len(dataset.class_names) else: raise ValueError(f"Dataset tpye {args.dataset_type} is not supported.") datasets.append(dataset) logging.info(f"Stored labels into file {label_file}.") train_dataset = ConcatDataset(datasets) logging.info("Train dataset size: {}".format(len(train_dataset))) train_loader = DataLoader(train_dataset, args.batch_size, num_workers=args.num_workers, shuffle=True) logging.info("Prepare Validation datasets.") if args.dataset_type == "voc": val_dataset = VOCDataset(args.validation_dataset, transform=test_transform, target_transform=target_transform, is_test=True) logging.info("validation dataset size: {}".format(len(val_dataset))) val_loader = DataLoader(val_dataset, args.batch_size, num_workers=args.num_workers, shuffle=False) logging.info("Build network.") net = create_net(num_classes)
def train_cl(model, train_datasets, replay_mode="none", scenario="class",classes_per_task=None,iters=2000,batch_size=32, generator=None, gen_iters=0, gen_loss_cbs=list(), loss_cbs=list(), eval_cbs=list(), sample_cbs=list(), use_exemplars=True, add_exemplars=False, eval_cbs_exemplars=list()): '''Train a model (with a "train_a_batch" method) on multiple tasks, with replay-strategy specified by [replay_mode]. [model] <nn.Module> main model to optimize across all tasks [train_datasets] <list> with for each task the training <DataSet> [replay_mode] <str>, choice from "generative", "exact", "current", "offline" and "none" [scenario] <str>, choice from "task", "domain" and "class" [classes_per_task] <int>, # of classes per task [iters] <int>, # of optimization-steps (i.e., # of batches) per task [generator] None or <nn.Module>, if a seperate generative model should be trained (for [gen_iters] per task) [*_cbs] <list> of call-back functions to evaluate training-progress''' # Set model in training-mode model.train() # Use cuda? cuda = model._is_on_cuda() device = model._device() # Initiate possible sources for replay (no replay for 1st task) Exact = Generative = Current = False previous_model = None # Register starting param-values (needed for "intelligent synapses"). if isinstance(model, ContinualLearner) and (model.si_c>0): for n, p in model.named_parameters(): if p.requires_grad: n = n.replace('.', '__') model.register_buffer('{}_SI_prev_task'.format(n), p.data.clone()) # Loop over all tasks. for task, train_dataset in enumerate(train_datasets, 1): # If offline replay-setting, create large database of all tasks so far if replay_mode=="offline" and (not scenario=="task"): train_dataset = ConcatDataset(train_datasets[:task]) # -but if "offline"+"task"-scenario: all tasks so far included in 'exact replay' & no current batch if replay_mode=="offline" and scenario == "task": Exact = True previous_datasets = train_datasets # Add exemplars (if available) to current dataset (if requested) if add_exemplars and task > 1: # ---------- ADHOC SOLUTION: permMNIST needs transform to tensor, while splitMNIST does not ---------- # if len(train_datasets) > 6: target_transform = (lambda y, x=classes_per_task: torch.tensor(y % x)) if ( scenario == "domain" ) else (lambda y: torch.tensor(y)) else: target_transform = (lambda y, x=classes_per_task: y % x) if scenario == "domain" else None # ---------------------------------------------------------------------------------------------------- # exemplar_dataset = ExemplarDataset(model.exemplar_sets, target_transform=target_transform) training_dataset = ConcatDataset([train_dataset, exemplar_dataset]) else: training_dataset = train_dataset # Prepare <dicts> to store running importance estimates and param-values before update ("Synaptic Intelligence") if isinstance(model, ContinualLearner) and (model.si_c>0): W = {} p_old = {} for n, p in model.named_parameters(): if p.requires_grad: n = n.replace('.', '__') W[n] = p.data.clone().zero_() p_old[n] = p.data.clone() # Find [active_classes] active_classes = None # -> for Domain-IL scenario, always all classes are active if scenario == "task": # -for Task-IL scenario, create <list> with for all tasks so far a <list> with the active classes active_classes = [list(range(classes_per_task * i, classes_per_task * (i + 1))) for i in range(task)] elif scenario == "class": # -for Class-IL scenario, create one <list> with active classes of all tasks so far active_classes = list(range(classes_per_task * task)) # Reset state of optimizer(s) for every task (if requested) if model.optim_type == "adam_reset": model.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) if (generator is not None) and generator.optim_type == "adam_reset": generator.optimizer = optim.Adam(model.optim_list, betas=(0.9, 0.999)) # Initialize # iters left on current data-loader(s) iters_left = iters_left_previous = 1 if scenario == "task": up_to_task = task if replay_mode == "offline" else task-1 iters_left_previous = [1]*up_to_task data_loader_previous = [None]*up_to_task # Define tqdm progress bar(s) progress = tqdm.tqdm(range(1, iters+1)) if generator is not None: progress_gen = tqdm.tqdm(range(1, gen_iters+1)) # Loop over all iterations iters_to_use = iters if (generator is None) else max(iters, gen_iters) for batch_index in range(1, iters_to_use+1): # Update # iters left on current data-loader(s) and, if needed, create new one(s) iters_left -= 1 if iters_left==0: data_loader = iter(utils.get_data_loader(training_dataset, batch_size, cuda=cuda, drop_last=True)) # NOTE: [train_dataset] is training-set of current task # [training_dataset] is training-set of current task with stored exemplars added (if requested) iters_left = len(data_loader) if Exact: if scenario == "task": up_to_task = task if replay_mode == "offline" else task-1 batch_size_replay = int(np.ceil(batch_size/up_to_task)) if (up_to_task>1) else batch_size # -in Task-IL scenario, need separate replay for each task for task_id in range(up_to_task): batch_size_to_use = min(batch_size_replay, len(previous_datasets[task_id])) iters_left_previous[task_id] -= 1 if iters_left_previous[task_id]==0: data_loader_previous[task_id] = iter(utils.get_data_loader( train_datasets[task_id], batch_size_to_use, cuda=cuda, drop_last=True )) iters_left_previous[task_id] = len(data_loader_previous[task_id]) else: iters_left_previous -= 1 if iters_left_previous == 0: batch_size_to_use = min(batch_size, len(ConcatDataset(previous_datasets))) data_loader_previous = iter(utils.get_data_loader(ConcatDataset(previous_datasets), batch_size_to_use, cuda=cuda, drop_last=True)) iters_left_previous = len(data_loader_previous) # -----------------Collect data------------------# # -----CURRENT BATCH----- # if replay_mode == "offline" and scenario == "task": x = y = scores = None else: x, y = next(data_loader) # --> sample training data of current task y = y-classes_per_task*(task-1) if scenario == "task" else y # --> ITL: adjust y-targets to 'active range' x, y = x.to(device), y.to(device) # --> transfer them to correct device # If --bce, --bce-distill & scenario=="class", calculate scores of current batch with previous model binary_distillation = hasattr(model, "binaryCE") and model.binaryCE and model.binaryCE_distill if binary_distillation and scenario == "class" and (previous_model is not None): with torch.no_grad(): scores = previous_model(x)[:, :(classes_per_task * (task - 1))] else: scores = None # -----REPLAYED BATCH----- # if not Exact and not Generative and not Current: x_ = y_ = scores_ = None # -> if no replay # -->> Exact Replay <<-- ## if Exact: scores_ = None if scenario in ("domain", "class"): # Sample replayed training data, move to correct device x_, y_ = next(data_loader_previous) x_ = x_.to(device) y_ = y_.to(device) if (model.replay_targets == "hard") else None # If required, get target scores (i.e, [scores_] -- using previous model, with no_grad() if model.replay_targets == "soft": with torch.no_grad(): scores_ = previous_model(x_) scores_ = scores_[:, :(classes_per_task*(task-1))] if scenario == "class" else scores_ # -> when scenario=="class", zero probabilities will be added in the [utils.loss_fn_kd]-function elif scenario == "task": # Sample replayed training data, wrap in (cuda-)Variables and store in lists x_ = list() y_ = list() up_to_task = task if replay_mode=="offline" else task-1 for task_id in range(up_to_task): x_temp, y_temp = next(data_loader_previous[task_id]) x_.append(x_temp.to(device)) # -only keep [y_] if required (as otherwise unnecessary computations will be done) if model.replay_targets == "hard": y_temp = y_temp - (classes_per_task*task_id) #-> adjust y-targets to 'active range' y_.append(y_temp.to(device)) else: y_.append(None) # If required, get target scores (i.e, [scores_] -- using previous model if (model.replay_targets == "soft") and (previous_model is not None): scores_ = list() for task_id in range(up_to_task): with torch.no_grad(): scores_temp = previous_model(x_[task_id]) scores_temp = scores_temp[:, (classes_per_task*task_id):(classes_per_task*(task_id+1))] scores_.append(scores_temp) # -->> Generative / Current Replay <<--## if Generative or Current: # Get replayed data (i.e., [x_]) -- either current data or use previous generator x_ = x if Current else previous_generator.sample(batch_size) # Get target scores and labels (i.e., [scores_] / [y_]) -- using previous model, with no_grad() # -if there are no task-specific mask, obtain all predicted scores at once if (not hasattr(previous_model, "mask_dict")) or (previous_model.mask_dict is None): with torch.no_grad(): all_scores_ = previous_model(x_) # -depending on chosen scenario, collect relevant predicted scores (per task, if required) if scenario in ("domain", "class") and ( (not hasattr(previous_model, "mask_dict")) or (previous_model.mask_dict is None) ): scores_ = all_scores_[:,:(classes_per_task * (task - 1))] if scenario == "class" else all_scores_ _, y_ = torch.max(scores_, dim=1) else: # NOTE: it's possible to have scenario=domain with task-mask (so actually it's the Task-IL scenario) # -[x_] needs to be evaluated according to each previous task, so make list with entry per task scores_ = list() y_ = list() for task_id in range(task - 1): # -if there is a task-mask (i.e., XdG is used), obtain predicted scores for each task separately if hasattr(previous_model, "mask_dict") and previous_model.mask_dict is not None: previous_model.apply_XdGmask(task=task_id + 1) with torch.no_grad(): all_scores_ = previous_model(x_) if scenario == "domain": temp_scores_ = all_scores_ else: temp_scores_ = all_scores_[:, (classes_per_task * task_id):(classes_per_task * (task_id + 1))] _, temp_y_ = torch.max(temp_scores_, dim=1) scores_.append(temp_scores_) y_.append(temp_y_) # Only keep predicted y/scores if required (as otherwise unnecessary computations will be done) y_ = y_ if (model.replay_targets == "hard") else None scores_ = scores_ if (model.replay_targets == "soft") else None # ---> Train MAIN MODEL if batch_index <= iters: # Train the main model with this batch loss_dict = model.train_a_batch(x, y, x_=x_, y_=y_, scores=scores, scores_=scores_, active_classes=active_classes, task=task, rnt=1./task) # Update running parameter importance estimates in W if isinstance(model, ContinualLearner) and (model.si_c>0): for n, p in model.named_parameters(): if p.requires_grad: n = n.replace('.', '__') if p.grad is not None: W[n].add_(-p.grad*(p.detach()-p_old[n])) p_old[n] = p.detach().clone() # Fire callbacks (for visualization of training-progress / evaluating performance after each task) for loss_cb in loss_cbs: if loss_cb is not None: loss_cb(progress, batch_index, loss_dict, task=task) for eval_cb in eval_cbs: if eval_cb is not None: eval_cb(model, batch_index, task=task) if model.label == "VAE": for sample_cb in sample_cbs: if sample_cb is not None: sample_cb(model, batch_index, task=task) #---> Train GENERATOR if generator is not None and batch_index <= gen_iters: # Train the generator with this batch loss_dict = generator.train_a_batch(x, y, x_=x_, y_=y_, scores_=scores_, active_classes=active_classes, task=task, rnt=1./task) # Fire callbacks on each iteration for loss_cb in gen_loss_cbs: if loss_cb is not None: loss_cb(progress_gen, batch_index, loss_dict, task=task) for sample_cb in sample_cbs: if sample_cb is not None: sample_cb(generator, batch_index, task=task) ##----------> UPON FINISHING EACH TASK... # Close progres-bar(s) progress.close() if generator is not None: progress_gen.close() # EWC: estimate Fisher Information matrix (FIM) and update term for quadratic penalty if isinstance(model, ContinualLearner) and (model.ewc_lambda>0): # -find allowed classes allowed_classes = list( range(classes_per_task*(task-1), classes_per_task*task) ) if scenario=="task" else (list(range(classes_per_task*task)) if scenario=="class" else None) # -if needed, apply correct task-specific mask if model.mask_dict is not None: model.apply_XdGmask(task=task) # -estimate FI-matrix model.estimate_fisher(training_dataset, allowed_classes=allowed_classes) # SI: calculate and update the normalized path integral if isinstance(model, ContinualLearner) and (model.si_c>0): model.update_omega(W, model.epsilon) # EXEMPLARS: update exemplar sets if (add_exemplars or use_exemplars) or replay_mode=="exemplars": exemplars_per_class = int(np.floor(model.memory_budget / (classes_per_task*task))) # reduce examplar-sets model.reduce_exemplar_sets(exemplars_per_class) # for each new class trained on, construct examplar-set new_classes = list(range(classes_per_task)) if scenario=="domain" else list(range(classes_per_task*(task-1), classes_per_task*task)) for class_id in new_classes: start = time.time() # create new dataset containing only all examples of this class class_dataset = SubDataset(original_dataset=train_dataset, sub_labels=[class_id]) # based on this dataset, construct new exemplar-set for this class model.construct_exemplar_set(dataset=class_dataset, n=exemplars_per_class) print("Constructed exemplar-set for class {}: {} seconds".format(class_id, round(time.time()-start))) model.compute_means = True # evaluate this way of classifying on test set for eval_cb in eval_cbs_exemplars: if eval_cb is not None: eval_cb(model, iters, task=task) # REPLAY: update source for replay previous_model = copy.deepcopy(model).eval() if replay_mode == 'generative': Generative = True previous_generator = copy.deepcopy(generator).eval() if generator is not None else previous_model elif replay_mode == 'current': Current = True elif replay_mode in ('exemplars', 'exact'): Exact = True if replay_mode == "exact": previous_datasets = train_datasets[:task] else: if scenario == "task": previous_datasets = [] for task_id in range(task): previous_datasets.append( ExemplarDataset( model.exemplar_sets[ (classes_per_task * task_id):(classes_per_task * (task_id + 1))], target_transform=lambda y, x=classes_per_task * task_id: y + x) ) else: target_transform = (lambda y, x=classes_per_task: y % x) if scenario == "domain" else None previous_datasets = [ ExemplarDataset(model.exemplar_sets, target_transform=target_transform)]
def create_pretokenized_dataset(paths): datasets = [PreTokenizedFileDataset(p) for p in paths] dataset = ConcatDataset(datasets) return dataset
def test_concat_raises_index_error(self): result = ConcatDataset([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]) with self.assertRaises(IndexError): # this one goes to 11 result[11]
verbose=False, ) val_dataset = CSGODataset( transform=transform_multichannel, dataset_split='val', verbose=False, ) test_dataset = CSGODataset( transform=transform_multichannel, dataset_split='test', verbose=False, ) train_val_dataset = ConcatDataset([train_dataset, val_dataset]) # implicit else train_loader = torch.utils.data.DataLoader( train_val_dataset, batch_size=64, shuffle=True, num_workers=0, ) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=64, shuffle=False, num_workers=0, )
csv_file = filenames[0].split('/')[-1] #Créer data set pour un csv file en particulier # essai=DoodlesDataset(csv_file, path,nrows=select_nrows, size=size_image,skiprows=range(1,10)) # loader=DataLoader(essai,batch_size=10) # for image, label in loader: # print(image) # t1=image[0,0,:,:] # #imshow(t1) # print(label) doodles = ConcatDataset([ DoodlesDataset(fn.split('/')[-1], path, nrows=select_nrows, size=size_image) for fn in filenames ]) loader = DataLoader(doodles, batch_size=2, shuffle=True) i = 0 for image, label in loader: # print(image) t1 = image[0, 0, :, :] t2 = image[1, 0, :, :] # imshow(t1) # imshow(t2) i += 2 print(i) print(label)
def get_train_loaders(config): """ Returns dictionary containing the training and validation loaders (torch.utils.data.DataLoader) backed by the datasets.hdf5.HDF5Dataset. :param config: a top level configuration object containing the 'loaders' key :return: dict { 'train': <train_loader> 'val': <val_loader> } """ assert 'loaders' in config, 'Could not find data loaders configuration' loaders_config = config['loaders'] logger = get_logger('HDF5Dataset') logger.info('Creating training and validation set loaders...') # get train and validation files train_paths = loaders_config['train_path'] val_paths = loaders_config['val_path'] assert isinstance(train_paths, list) assert isinstance(val_paths, list) # get h5 internal paths for raw and label raw_internal_path = loaders_config['raw_internal_path'] label_internal_path = loaders_config['label_internal_path'] weight_internal_path = loaders_config.get('weight_internal_path', None) # get train/validation patch size and stride train_patch = tuple(loaders_config['train_patch']) train_stride = tuple(loaders_config['train_stride']) val_patch = tuple(loaders_config['val_patch']) val_stride = tuple(loaders_config['val_stride']) # get slice_builder_cls slice_builder_str = loaders_config.get('slice_builder', 'SliceBuilder') logger.info(f'Slice builder class: {slice_builder_str}') slice_builder_cls = _get_slice_builder_cls(slice_builder_str) train_datasets = [] for train_path in train_paths: try: logger.info(f'Loading training set from: {train_path}...') # create H5 backed training and validation dataset with data augmentation train_dataset = HDF5Dataset( train_path, train_patch, train_stride, phase='train', transformer_config=loaders_config['transformer'], raw_internal_path=raw_internal_path, label_internal_path=label_internal_path, weight_internal_path=weight_internal_path, slice_builder_cls=slice_builder_cls) train_datasets.append(train_dataset) except Exception: logger.info(f'Skipping training set: {train_path}', exc_info=True) val_datasets = [] for val_path in val_paths: try: logger.info(f'Loading validation set from: {val_path}...') val_dataset = HDF5Dataset( val_path, val_patch, val_stride, phase='val', transformer_config=loaders_config['transformer'], raw_internal_path=raw_internal_path, label_internal_path=label_internal_path, weight_internal_path=weight_internal_path) val_datasets.append(val_dataset) except Exception: logger.info(f'Skipping validation set: {val_path}', exc_info=True) num_workers = loaders_config.get('num_workers', 1) logger.info(f'Number of workers for train/val datasets: {num_workers}') # when training with volumetric data use batch_size of 1 due to GPU memory constraints return { 'train': DataLoader(ConcatDataset(train_datasets), batch_size=1, shuffle=True, num_workers=num_workers), 'val': DataLoader(ConcatDataset(val_datasets), batch_size=1, shuffle=True, num_workers=num_workers) }
def generate_random_dataset(path, nb_row_valid, nb_rows_test, nb_rows, dict_nb_lignes, size_image=224, encoding_dict=None, filenames=None, use_acc_proportionate_sampling=False): ''' Pour chaque classe dans filenames, on prend nb_rows données aléatoire dans le fichier :param path: :param nb_row_valid: :param nb_rows_test: :param nb_rows: :param size_image: :param encoding_dict: :param filenames: :return: ''' if filenames == None: filenames = os.listdir(path) if use_acc_proportionate_sampling: if os.path.isfile("saves_obj/dict_acc_per_class_valid.pk"): dict_acc_class = load_object( "saves_obj/dict_acc_per_class_valid.pk") else: print( "Aucun dictionnaire d'accuracy par classe trouvé; sampling uniforme utilisé" ) use_acc_proportionate_sampling = False nb_lignes_skip = nb_row_valid + nb_rows_test list_dataset = [] dict_nb_row_used_per_class = {} for fn in filenames: n = dict_nb_lignes[fn] skip = list(range(1, nb_lignes_skip)) + sorted( random.sample(range(nb_lignes_skip, n), n - nb_rows - nb_lignes_skip)) if use_acc_proportionate_sampling: acc = dict_acc_class[fn[:-4]] new_rows = round((1.1 - acc) * nb_rows) else: new_rows = nb_rows dict_nb_row_used_per_class[fn] = new_rows data_set = DoodlesDataset(fn, path, nrows=new_rows, size=size_image, skiprows=skip, encoding_dict=encoding_dict, mode="train") list_dataset.append(data_set) doodles = ConcatDataset(list_dataset) print( "Nombre de données d'entraînement (total:{}):".format( sum(dict_nb_row_used_per_class.values())), dict_nb_row_used_per_class) return doodles
def get_loaders(train_paths, val_paths, raw_internal_path, label_internal_path, label_dtype, train_patch, train_stride, val_patch, val_stride, transformer, pixel_wise_weight=False, curriculum_learning=False, ignore_index=None): """ Returns dictionary containing the training and validation loaders (torch.utils.data.DataLoader) backed by the datasets.hdf5.HDF5Dataset :param train_path: path to the H5 file containing the training set :param val_path: path to the H5 file containing the validation set :param raw_internal_path: :param label_internal_path: :param label_dtype: target type of the label dataset :param train_patch: :param train_stride: :param val_path: :param val_stride: :param transformer: :return: dict { 'train': <train_loader> 'val': <val_loader> } """ transformers = { 'LabelToBoundaryTransformer': LabelToBoundaryTransformer, 'RandomLabelToBoundaryTransformer': RandomLabelToBoundaryTransformer, 'AnisotropicRotationTransformer': AnisotropicRotationTransformer, 'IsotropicRotationTransformer': IsotropicRotationTransformer, 'StandardTransformer': StandardTransformer, 'BaseTransformer': BaseTransformer } assert transformer in transformers if curriculum_learning: slice_builder_cls = CurriculumLearningSliceBuilder else: slice_builder_cls = SliceBuilder train_datasets = [] for train_path in train_paths: # create H5 backed training and validation dataset with data augmentation train_dataset = HDF5Dataset(train_path, train_patch, train_stride, phase='train', label_dtype=label_dtype, raw_internal_path=raw_internal_path, label_internal_path=label_internal_path, transformer=transformers[transformer], weighted=pixel_wise_weight, ignore_index=ignore_index, slice_builder_cls=slice_builder_cls) train_datasets.append(train_dataset) val_datasets = [] for val_path in val_paths: val_dataset = HDF5Dataset(val_path, val_patch, val_stride, phase='val', label_dtype=label_dtype, raw_internal_path=raw_internal_path, label_internal_path=label_internal_path, transformer=transformers[transformer], weighted=pixel_wise_weight, ignore_index=ignore_index) val_datasets.append(val_dataset) # shuffle only if curriculum_learning scheme is not used return { 'train': DataLoader(ConcatDataset(train_datasets), batch_size=1, shuffle=not curriculum_learning), 'val': DataLoader(ConcatDataset(val_datasets), batch_size=1, shuffle=not curriculum_learning) }
#transforms.RandomAffine(180, translate=(10, 10)), #transforms.Normalize((0.1307,), (0.3081,)) ])) #false data augumentation tf_combinations = get_transform_combination2() for tf in tf_combinations: tf1 = [] tf1.extend(tf) tf1.append(transforms.CenterCrop(IMG_SIZE)) tf1.append(transforms.ToTensor()) false_aug = ImageDataset(input_file_path, DATA_ROOT_DIR, 0, transform=transforms.Compose(tf1)) false_img_dataset = ConcatDataset([false_img_dataset, false_aug]) kfold = KFold(n_splits=KFOLD) true_dataset_fold = kfold.split(true_img_dataset) false_dataset_fold = kfold.split(false_img_dataset) accuracy = [] #model training and test prediction with k fold cross validation for fold_idx, ( (true_train_idx, true_test_idx), (false_train_idx, false_test_idx) ) in\ enumerate( zip(true_dataset_fold, false_dataset_fold) ): true_train_data = [true_img_dataset[i] for i in true_train_idx] true_test_data = [true_img_dataset[i] for i in true_test_idx] false_train_data = [false_img_dataset[i] for i in false_train_idx] false_test_data = [false_img_dataset[i] for i in false_test_idx]
def load_data(data_root, dataset, phase, batch_size, sampler_dic=None, num_workers=8, cifar_imb_ratio=None, test_open=False, shuffle=True): txt = './data/%s/%s_%s.txt' % (dataset, dataset, ( phase if phase not in ['train_plain', 'tail'] else 'train')) if dataset != 'iNaturalist18': key = 'default' else: key = 'iNaturalist18' rgb_mean, rgb_std = RGB_statistics[key]['mean'], RGB_statistics[key]['std'] if phase not in ['train', 'val']: transform = get_data_transform('test', rgb_mean, rgb_std, key) else: transform = get_data_transform(phase, rgb_mean, rgb_std, key) # print('Use data transformation:', transform) if dataset == 'CIFAR10_LT': print('====> CIFAR10 Imbalance Ratio: ', cifar_imb_ratio) set_ = IMBALANCECIFAR10(phase, imbalance_ratio=cifar_imb_ratio, root=data_root) elif dataset == 'CIFAR100_LT': print('====> CIFAR100 Imbalance Ratio: ', cifar_imb_ratio) set_ = IMBALANCECIFAR100(phase, imbalance_ratio=cifar_imb_ratio, root=data_root) else: print('Loading data from %s' % (txt)) set_ = LT_Dataset(data_root, txt, transform, phase) if phase == 'test' and test_open: open_txt = './data/%s/%s_open.txt' % (dataset, dataset) print('Testing with opensets from %s' % (open_txt)) open_set_ = LT_Dataset('./data/%s/%s_open' % (dataset, dataset), open_txt, transform) set_ = ConcatDataset([set_, open_set_]) if sampler_dic and (phase == 'train' or phase == 'train_drw'): print('Using sampler.') # print('Sample %s samples per-class.' % sampler_dic['num_samples_cls']) print('Sampler parameters: ', sampler_dic['params']) return DataLoader(dataset=set_, batch_size=batch_size, sampler=sampler_dic['sampler']( set_, **sampler_dic['params']), num_workers=num_workers) else: print('No sampler.') print('Shuffle is %s.' % (shuffle)) return DataLoader(dataset=set_, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--device', type=str, default='gpu', help='For cpu: \'cpu\', for gpu: \'gpu\'') parser.add_argument('--chunk_size', type=int, default=36, help='chunk size(sequence length)') parser.add_argument('--step_size', type=int, default=1, help='sequence split step') parser.add_argument('--lr', type=float, default=5e-4, help='learning rate') parser.add_argument('--weight_decay', type=argtype.check_float, default='1e-2', help='weight_decay') parser.add_argument('--epoch', type=argtype.epoch, default='inf', help='the number of epoch for training') parser.add_argument('--batch_size', type=int, default=256, help='size of batches for training') parser.add_argument('--val_ratio', type=float, default=.3, help='validation set ratio') parser.add_argument('--model_name', type=str, default='main_model', help='model name to save') parser.add_argument('--transfer', type=argtype.boolean, default=False, help='whether fine tuning or not') parser.add_argument('--oversample_times', type=int, default=30, help='the times oversampling times for fine tuning') parser.add_argument('--patience', type=int, default=20, help='patience for early stopping') parser.add_argument('--c_loss', type=argtype.boolean, default=True, help='whether using custom loss or not') parser.add_argument('--predict', type=argtype.boolean, default=False, help='predict and save csv file or not') parser.add_argument('--filename', type=str, default='submission', help='csv file name to save predict result') parser.add_argument('--Y_list', type=argtype.str_to_list, default='Y12,Y15', help='target Y for pre-training') parser.add_argument('--window_size', type=int, default=1, help='window size for moving average') parser.add_argument('--attention', type=argtype.boolean, default=True, help='select model using attention mechanism') args = parser.parse_args() data_dir = './data' if args.device == 'gpu': args.device = 'cuda' device = torch.device(args.device) chunk_size = args.chunk_size step_size = args.step_size lr = args.lr weight_decay = args.weight_decay EPOCH = args.epoch batch_size = args.batch_size val_ratio = args.val_ratio model_name = args.model_name transfer_learning = args.transfer times = args.oversample_times patience = args.patience c_loss = args.c_loss pred = args.predict filename = args.filename Y_list = args.Y_list window_size = args.window_size attention = args.attention params = { 'chunk_size': chunk_size, 'step_size': step_size, 'learning_rate': lr, 'weight_decay': weight_decay, 'epoch size': EPOCH, 'batch_size': batch_size, 'valid_ratio': val_ratio, 'model_name': model_name, 'transfer_learning': transfer_learning, 'oversample_times': times, 'early_stopping_patience': patience, 'c_loss': c_loss, 'pred': pred, 'filename': filename, 'Y_list': Y_list, 'window_size': window_size, 'attention': attention } Y = '' for y in Y_list: Y += y model_name = f'{model_name}/{Y}' Dataframe = dataframe.Dataframe(data_dir=data_dir) input_size = len(Dataframe.feature_cols) if attention: model = regressor.Attention_Regressor(input_size).to(device) else: model = regressor.BiLSTM_Regressor().to(device) checkpoint = Checkpoint(model_name=model_name, transfer_learning=transfer_learning) early_stopping = Early_stopping(patience=patience) vis = Custom_Visdom(model_name, transfer_learning) vis.print_params(params) if transfer_learning: dataset_list = [] if attention: pre_df = Dataframe.get_pretrain_df()\ .iloc[-chunk_size+1:][Dataframe.feature_cols] df = Dataframe.get_y18_df() df = pd.concat([pre_df, df], axis=0) else: df = Dataframe.get_y18_df() train_dataset = datasets.CustomSequenceDataset(chunk_size=chunk_size, df=df, Y='Y18', step_size=step_size, noise=True, times=times) dataset_list.append(train_dataset) dataset = ConcatDataset(dataset_list) train_loader, valid_loader = datasets.split_dataset( dataset=dataset, batch_size=batch_size, val_ratio=val_ratio, shuffle=True) checkpoint.load_model(model) else: dataset_list = [] for y in Y_list: df = Dataframe.get_pretrain_df() df[y] = df[y].rolling(window=window_size, min_periods=1).mean() dataset = datasets.CustomSequenceDataset(chunk_size=chunk_size, df=df, Y=y, step_size=step_size, noise=False, times=1) dataset_list.append(dataset) dataset = ConcatDataset(dataset_list) train_loader, valid_loader = datasets.split_dataset( dataset=dataset, batch_size=batch_size, val_ratio=val_ratio, shuffle=True) optimizer = Adam(model.parameters(), lr=lr, weight_decay=float(weight_decay)) if c_loss: criterion = custom_loss.mse_AIFrenz_torch else: criterion = nn.MSELoss() training_time = time.time() epoch = 0 y_df = Dataframe.get_pretrain_df()[Y_list] y18_df = Dataframe.get_y18_df()[['Y18']] while epoch < EPOCH: print(f'\r Y: {Y} \ chunk size: {chunk_size} \ transfer: {transfer_learning}') epoch += 1 train_loss_per_epoch, train_loss_list_per_batch, batch_list = train( model=model, train_loader=train_loader, criterion=criterion, optimizer=optimizer, epoch=epoch, transfer_learning=transfer_learning, attention=attention, freeze_name='transfer_layer') valid_loss = valid(model=model, valid_loader=valid_loader, criterion=criterion, attention=attention) iter_time = time.time() - training_time print( f'\r Epoch: {epoch:3d}/{str(EPOCH):3s}\t', f'train time: {int(iter_time//60):2d}m {iter_time%60:5.2f}s\t' f'avg train loss: {train_loss_per_epoch:7.3f}\t' f'valid loss: {valid_loss:7.3f}') checkpoint.save_log(batch_list, epoch, train_loss_list_per_batch, train_loss_per_epoch, valid_loss) early_stop, is_best = early_stopping(valid_loss) checkpoint.save_checkpoint(model, optimizer, is_best) vis.print_training(EPOCH, epoch, training_time, train_loss_per_epoch, valid_loss, patience, early_stopping.counter) vis.loss_plot(checkpoint) print('-----' * 17) y_true, y_pred, y_idx = predict.trainset_predict( model=model, data_dir=data_dir, Y=Y_list[0], chunk_size=chunk_size, attention=attention, window_size=window_size) y18_true, y18_pred, y18_idx = predict.trainset_predict( model=model, data_dir=data_dir, Y='Y18', chunk_size=chunk_size, attention=attention, window_size=window_size) y_df['pred'] = y_pred y18_df['pred'] = y18_pred vis.predict_plot(y_df, 'pre') vis.predict_plot(y18_df, 'trans') vis.print_error() if early_stop: break if transfer_learning: checkpoint.load_model(model, transfer_learningd=True) else: checkpoint.load_model(model, transfer_learningd=False) y_true, y_pred, y_idx = predict.trainset_predict(model=model, data_dir=data_dir, Y=Y_list[0], chunk_size=chunk_size, attention=attention, window_size=window_size) y18_true, y18_pred, y18_idx = predict.trainset_predict( model=model, data_dir=data_dir, Y='Y18', chunk_size=chunk_size, attention=attention, window_size=window_size) y_df['pred'] = y_pred y18_df['pred'] = y18_pred vis.predict_plot(y_df, 'pre') vis.predict_plot(y18_df, 'trans') vis.print_error() if pred: predict.test_predict(model=model, chunk_size=chunk_size, filename=filename, attention=attention)
whole_corpus = datasets.TableFeatures(corpus, sherlock_feature_groups, topic_feature=topic, label_enc=label_enc, id_filter=None, max_col_count=MAX_COL_COUNT) if args.mode!='eval': train = copy.copy(whole_corpus).set_filter(train_ids) train_list.append(train) test = copy.copy(whole_corpus).set_filter(test_ids) test_list.append(test) if args.mode!='eval': training_data = ConcatDataset(train_list) testing_data = ConcatDataset(test_list) print('----------------------------------') end_loading = time.time() print("Loading done:", end_loading - start_loading) time_record['Load'] = end_loading - start_loading model = CRF(len(valid_types) , batch_first=True).to(device) #################### # Training
def __init__(self, config): print('Batch size: ', config.batch_size) print('read background_dataset!' + '\n') background_dataset = BackgroundDataset( [config.PRW_img_path, config.CUHK_SYSU_path]) self.background_dataloader = DataLoader(dataset=background_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.worker_num, drop_last=True) print('read surreal_dataset dataset!' + '\n') # 读取真实的uvmap surreal_dataset = RealTextureDataset(pkl_path=config.texture_pkl_path) self.surreal_dataloader = DataLoader(dataset=surreal_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.worker_num, drop_last=True) print('read reid_dataset dataset!' + '\n') print('read market_dataset dataset!' + '\n') dataset = Market1501Dataset() if config.triplet: print('4*4!') trainloader = DataLoader(ImageData(dataset.train), sampler=RandomIdentitySampler( dataset.train, config.num_instance), batch_size=config.batch_size, num_workers=config.worker_num, drop_last=True) queryloader = DataLoader(ImageData(dataset.query), sampler=RandomIdentitySampler( dataset.query, config.num_instance), batch_size=config.batch_size, num_workers=config.worker_num, drop_last=True) galleryloader = DataLoader(ImageData(dataset.gallery), sampler=RandomIdentitySampler( dataset.gallery, config.num_instance), batch_size=config.batch_size, num_workers=config.worker_num, drop_last=True) self.reid_dataloader = [trainloader, queryloader, galleryloader] ''' prw_dataset = PRWDataset(pkl_path = config.frames_mat_pkl_path,num_instance=4) market_dataset = Market1501Dataset(pkl_path = config.Market_all_pkl,num_instance=4) reid_dataset = ConcatDataset([market_dataset, prw_dataset]) #market_dataset = Market1501Dataset(pkl_path = '/unsullied/sharefs/zhongyunshan/isilon-home/datasets/Texture/market_1501_train.pkl',num_instance=4) market_dataset = Market1501Dataset(pkl_path = config.Market_all_pkl,num_instance=4) reid_dataset = market_dataset self.reid_dataloader = DataLoader(dataset=reid_dataset, batch_size=int(config.batch_size/config.num_instance), shuffle=True, num_workers=config.worker_num, drop_last=True) ''' else: print('16*1!') prw_dataset = PRWDataset(pkl_path=config.frames_mat_pkl_path, num_instance=1) market_dataset = Market1501Dataset(pkl_path=config.Market_all_pkl, num_instance=1) reid_dataset = ConcatDataset([market_dataset, prw_dataset]) self.reid_dataloader = DataLoader(dataset=reid_dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.worker_num, drop_last=True) # read the mask of face and hand texture_mask = TextureMask(size=64) # 设定读取64*64大小的mask self.face_mask = texture_mask.get_mask('face') self.hand_mask = texture_mask.get_mask('hand') self.mask = self.face_mask + self.hand_mask self.gpu_available = torch.cuda.is_available() if self.gpu_available: print('Use GPU! GPU num: ', config.gpu_nums) gpu_ids = [i for i in range(config.gpu_nums)] # 读取pretrained model if config.pretrained_model_path is None: print('No resume train model!') self.generator = UNet(input_channels=3, output_channels=3, gpu_ids=gpu_ids) else: print('resume train model!') print(config.epoch_now) self.generator = torch.load(config.pretrained_model_path) if config.reid_model == 'reid_loss_market1501': print('origin model!') from loss.reid_loss_market1501 import ReIDLoss config.num_classes = 1501 self.reid_loss = ReIDLoss(model_path=config.reid_weight_path, num_classes=config.num_classes, gpu_ids=gpu_ids, margin=config.margin) elif config.reid_model == 'PCB_intern_loss': print('PCB_intern_loss!') from loss.PCB_intern_loss import ReIDLoss self.reid_loss = ReIDLoss(model_path=config.reid_weight_path, num_classes=config.num_classes, gpu_ids=gpu_ids, margin=config.margin) elif config.reid_model == 'ImageNet_Resnet': print('ImageNet_Resnet!') print('layer: ', config.layer) from loss.ImageNet_Resnet import ReIDLoss self.reid_loss = ReIDLoss(gpu_ids=gpu_ids) elif config.reid_model == 'PCB_MiddleFeature': print('PCB_MiddleFeature!') print('layer: ', config.layer) from loss.PCB_MiddleFeature import ReIDLoss self.reid_loss = ReIDLoss(model_path=config.reid_weight_path, num_classes=config.num_classes, gpu_ids=gpu_ids, margin=config.margin, layer=config.layer) elif config.reid_model == 'NoPCB_Resnet': print('NoPCB_Resnet!') print('layer: ', config.layer) from loss.NoPCB_Resnet import ReIDLoss self.reid_loss = ReIDLoss(gpu_ids=gpu_ids) elif config.reid_model == 'NoPCB_Resnet_deepfashion': print('NoPCB_Resnet_deepfashion!') print('layer: ', config.layer) from loss.NoPCB_Resnet_deepfashion import ReIDLoss self.reid_loss = ReIDLoss(gpu_ids=gpu_ids) elif config.reid_model == 'PCB_softmax': print('PCB_softmax!') from loss.PCB_softmax_loss import ReIDLoss config.num_classes = 1501 self.reid_loss = ReIDLoss(model_path=config.reid_weight_path, num_classes=config.num_classes, gpu_ids=gpu_ids, margin=config.margin) elif config.reid_model == 'PCB_PerLoss': print('PCB_PerLoss!') from loss.PCB_PerLoss import ReIDLoss self.reid_loss = ReIDLoss(model_path=config.reid_weight_path, num_classes=config.num_classes, gpu_ids=gpu_ids) elif config.reid_model == 'PCB_AllCat': print('PCB_AllCat!') from loss.PCB_AllCat import ReIDLoss self.reid_loss = ReIDLoss(model_path=config.reid_weight_path, num_classes=config.num_classes, gpu_ids=gpu_ids, margin=config.margin) else: raise KeyError('{} not in keys!'.format(config.reid_model)) if self.gpu_available: self.generator = nn.DataParallel(self.generator) # multi-GPU self.generator = self.generator.cuda() self.reid_loss = self.reid_loss.cuda() self.mask = self.mask.cuda() self.texture2img = TextureToImage(action_npz=config.action_npz, batch_size=config.batch_size, use_gpu=self.gpu_available) # 计算face and hand 的共同 loss, 均方损失函数 self.face_loss = nn.MSELoss() # Unet optimizer self.generator_optimizer = Adam(params=self.generator.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) configure( os.path.join( config.runs_log_path, config.log_name + str(datetime.datetime.now()).replace(' ', '_'))) self.model_save_dir = os.path.join( config.model_log_path, config.log_name + str(datetime.datetime.now()).replace(' ', '_')) if not os.path.exists(self.model_save_dir): os.mkdir(self.model_save_dir)
def _create_dataloaders(config, dataset_class, tf1, tf2, partitions, target_transform=None, shuffle=False): train_imgs_list = [] for train_partition in partitions: if "STL10" == config.dataset: train_imgs_curr = dataset_class(root=config.dataset_root, transform=tf1, split=train_partition, target_transform=target_transform) else: train_imgs_curr = dataset_class(download=True, root=config.dataset_root, transform=tf1, train=train_partition, target_transform=target_transform) if hasattr(config, "mix_train"): if config.mix_train and (train_partition == "train+unlabeled"): train_imgs_curr = reorder_train_deterministic(train_imgs_curr) train_imgs_list.append(train_imgs_curr) train_imgs = ConcatDataset(train_imgs_list) train_dataloader = torch.utils.data.DataLoader( train_imgs, batch_size=int(config.dataloader_batch_sz), shuffle=shuffle, num_workers=6, drop_last=False, persistent_workers=True) if not shuffle: assert (isinstance(train_dataloader.sampler, torch.utils.data.sampler.SequentialSampler)) dataloaders = [train_dataloader] for d_i in range(config.num_dataloaders): print("Creating auxiliary dataloader ind %d out of %d time %s" % (d_i, config.num_dataloaders, datetime.now())) sys.stdout.flush() train_tf_imgs_list = [] for train_partition in partitions: if "STL10" == config.dataset: train_imgs_tf_curr = dataset_class( download=True, root=config.dataset_root, transform=tf2, # random per call split=train_partition, target_transform=target_transform) else: train_imgs_tf_curr = dataset_class( download=True, root=config.dataset_root, transform=tf2, train=train_partition, target_transform=target_transform) if hasattr(config, "mix_train"): if config.mix_train and (train_partition == "train+unlabeled"): train_imgs_tf_curr = reorder_train_deterministic( train_imgs_tf_curr) train_tf_imgs_list.append(train_imgs_tf_curr) train_imgs_tf = ConcatDataset(train_tf_imgs_list) train_tf_dataloader = \ torch.utils.data.DataLoader(train_imgs_tf, batch_size=int(config.dataloader_batch_sz), shuffle=shuffle, num_workers=6, drop_last=False, persistent_workers=True) if not shuffle: assert (isinstance(train_tf_dataloader.sampler, torch.utils.data.sampler.SequentialSampler)) assert (len(train_dataloader) == len(train_tf_dataloader)) dataloaders.append(train_tf_dataloader) num_train_batches = len(dataloaders[0]) print("Length of datasets vector %d" % len(dataloaders)) print("Number of batches per epoch: %d" % num_train_batches) sys.stdout.flush() return dataloaders
def dataset_loader(dataset_name, data_dir, categories, raw_input_dims, split, text_dim, text_feat, max_text_words, max_expert_tokens, vocab, attr_vocab, use_val=False): dataset_classes = {"CE": CE} if len(categories) > 1 and split == 'train': dataset_list = [] for cat in categories: dataset = dataset_classes[dataset_name]( data_dir=data_dir, text_dim=text_dim, category=cat, raw_input_dims=raw_input_dims, split=split, text_feat=text_feat, max_text_words=max_text_words, max_expert_tokens=max_expert_tokens, vocab=vocab, attr_vocab=attr_vocab, transforms=transforms.Compose([ transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])) dataset_list.append(dataset) if use_val: for cat in categories: dataset = dataset_classes[dataset_name]( data_dir=data_dir, text_dim=text_dim, category=cat, raw_input_dims=raw_input_dims, split='val', text_feat=text_feat, max_text_words=max_text_words, max_expert_tokens=max_expert_tokens, vocab=vocab, attr_vocab=attr_vocab, transforms=transforms.Compose([ transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])) dataset_list.append(dataset) dataset = ConcatDataset(dataset_list) # elif len(categories) > 1 and (split in ['val', 'val_trg', 'test', 'test_trg']): elif split in ['val', 'val_trg', 'test', 'test_trg']: dataset_list = [] for cat in categories: dataset = dataset_classes[dataset_name]( data_dir=data_dir, text_dim=text_dim, category=cat, raw_input_dims=raw_input_dims, split=split, text_feat=text_feat, max_text_words=max_text_words, max_expert_tokens=max_expert_tokens, vocab=vocab, attr_vocab=attr_vocab, transforms=transforms.Compose([ transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])) dataset_list.append(dataset) dataset = dataset_list else: dataset = dataset_classes[dataset_name]( data_dir=data_dir, text_dim=text_dim, category=categories[0], raw_input_dims=raw_input_dims, split=split, text_feat=text_feat, max_text_words=max_text_words, max_expert_tokens=max_expert_tokens, vocab=vocab, attr_vocab=attr_vocab, transforms=transforms.Compose([ transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ])) return dataset
cancer_targets = np.ones((cancer_set.shape[0])).astype(np.int64) not_cancer_targets = np.zeros((not_cancer_set.shape[0])).astype(np.int64) not_cancer_dataset = TensorDataset( torch.from_numpy(not_cancer_set.swapaxes(1, 3).swapaxes(2, 3)).float(), torch.from_numpy(not_cancer_targets), torch.from_numpy(seg_set)) del not_cancer_set del seg_set cancer_dataset = TensorDataset( torch.from_numpy(cancer_set.swapaxes(1, 3).swapaxes(2, 2)).float(), torch.from_numpy(cancer_targets), torch.from_numpy(np.zeros((len(cancer_set), 299, 299), dtype=np.bool))) del cancer_set gc.collect() complete_dataset = ConcatDataset((not_cancer_dataset, cancer_dataset)) num_total = len(complete_dataset) num_train = int(0.8 * num_total) num_val = int(0.1 * num_total) num_test = num_total - num_train - num_val torch.manual_seed(0) train_dataset, test_dataset, val_dataset = torch.utils.data.random_split( complete_dataset, [num_train, num_test, num_val]) datasets = {'train': train_dataset, 'test': test_dataset, 'val': val_dataset} dataset_sizes = { 'train': len(train_dataset), 'test': len(test_dataset), 'val': len(val_dataset) } dataloaders = { x: torch.utils.data.DataLoader(datasets[x],
def build_dataset(cfg, transforms, split='train', num_tta=0): assert split in ['train', 'valid', 'test'] dataset_config = cfg['dataset'] num_class = dataset_config['num_class'] fold = dataset_config['fold'] batch_size = dataset_config['batch_size'] num_workers = dataset_config['num_workers'] use_upsampling = dataset_config['upsampling'] is_test = split == 'test' if split == 'test': df = pd.read_csv(dataset_map['test']) image_dir = dataset_map['test_images'] else: df = pd.read_csv(dataset_map['fold']) image_dir = dataset_map['train_images'] if dataset_config['use_original']: if split == 'train': df = df[df['fold'] != fold] if use_upsampling: df = upsampling(df) elif split == 'valid': df = df[df['fold'] == fold] if split == 'valid': if not dataset_config['valid_with_both']: if dataset_config['valid_with_large']: df = df[df['large']] elif dataset_config['valid_with_small']: df = df[~df['large']] sampler_df = [df] dataset = BlindDataset(image_dir=image_dir, df=df, transforms=transforms, num_class=num_class, is_test=is_test, num_tta=num_tta) if split == 'train' and dataset_config['use_diabetic_retinopathy']: diabetic_df = pd.read_csv(diabetic_retinopathy_map['train'], index_col='Unnamed: 0') del diabetic_df['Unnamed: 0.1'] if use_upsampling: diabetic_df = upsampling(diabetic_df) # up sampling for diabetic diabetic_dataset = BlindDataset( image_dir=diabetic_retinopathy_map['train_images'], df=diabetic_df, transforms=transforms, num_class=num_class, is_test=is_test) if not dataset_config['use_original']: dataset = diabetic_dataset sampler_df = [diabetic_df] else: sampler_df += [diabetic_df] dataset = ConcatDataset([dataset, diabetic_dataset]) if split == 'train' and \ (dataset_config['use_class_ratio'] or dataset_config['use_dataset_ratio']): sampler = get_sampler(dataset_config['use_class_ratio'], dataset_config['use_dataset_ratio'], dataset_config['class_ratio'], dataset_config['dataset_ratio'], sampler_df) else: sampler = None data_loader = DataLoader( dataset, shuffle=True if sampler is None else False, batch_size=batch_size, num_workers=num_workers, sampler=sampler, ) return data_loader
def main(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument( "--data_dir", default=None, type=str, required=True, help= "The input data dir. Should contain the .tsv files (or other data files) for the task.", ) parser.add_argument( "--model_type", default=None, type=str, required=True, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys()), ) parser.add_argument( "--model_name_or_path", default=None, type=str, required=True, help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS), ) parser.add_argument( "--data_cache_name", default=None, type=str, help="The name of cached data", ) parser.add_argument( "--language", default=None, type=str, required=True, help= "Evaluation language. Also train language if `train_language` is set to None.", ) parser.add_argument( "--train_language", default=None, type=str, help="Train language if is different of the evaluation language.") parser.add_argument("--train_tasks", default=None, type=str, help="Training tasks in together finetuning.") parser.add_argument( "--task_name", default=None, type=str, required=True, help="The name of the task to train selected in the list: " + ", ".join(processors.keys()), ) parser.add_argument( "--output_dir", default=None, type=str, required=True, help= "The output directory where the model predictions and checkpoints will be written.", ) # Other parameters parser.add_argument( "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name") parser.add_argument("--gpu_id", default="", type=str, help="GPU id") parser.add_argument( "--tokenizer_name", default="", type=str, help="Pretrained tokenizer name or path if not the same as model_name", ) parser.add_argument( "--cache_dir", default="", type=str, help= "Where do you want to store the pre-trained models downloaded from s3", ) parser.add_argument( "--task_ratio", default=1.0, type=float, help="ratio of tasks between 0-1", ) parser.add_argument( "--max_seq_length", default=128, type=int, help= "The maximum total input sequence length after tokenization. Sequences longer " "than this will be truncated, sequences shorter will be padded.", ) parser.add_argument("--do_train", action="store_true", help="Whether to run training.") parser.add_argument("--do_eval", action="store_true", help="Whether to run eval on the test set.") parser.add_argument( "--evaluate_during_training", action="store_true", help="Rul evaluation during training at each logging step.") parser.add_argument( "--do_lower_case", action="store_true", help="Set this flag if you are using an uncased model.") parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation.") parser.add_argument( "--gradient_accumulation_steps", type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass.", ) parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") parser.add_argument( "--max_steps", default=-1, type=int, help= "If > 0: set total number of training steps to perform. Override num_train_epochs.", ) parser.add_argument( "--break_steps", default=-1, type=int, help= "If > 0: set total number of training steps to perform. Override num_train_epochs.", ) parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument("--logging_steps", type=int, default=50, help="Log every X updates steps.") parser.add_argument("--logging_each_epoch", action="store_true", help="Whether to log after each epoch.") parser.add_argument("--logging_steps_in_sample", type=int, default=-1, help="log every X samples.") parser.add_argument("--save_steps", type=int, default=50, help="Save checkpoint every X updates steps.") parser.add_argument( "--eval_all_checkpoints", action="store_true", help= "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number", ) parser.add_argument("--no_cuda", action="store_true", help="Avoid using CUDA when available") parser.add_argument("--overwrite_output_dir", action="store_true", help="Overwrite the content of the output directory") parser.add_argument( "--overwrite_cache", action="store_true", help="Overwrite the cached training and evaluation sets") parser.add_argument("--seed", type=int, default=42, help="random seed for initialization") parser.add_argument( "--fp16", action="store_true", help= "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit", ) parser.add_argument( "--fp16_opt_level", type=str, default="O1", help= "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "See details at https://nvidia.github.io/apex/amp.html", ) parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") parser.add_argument("--server_ip", type=str, default="", help="For distant debugging.") parser.add_argument("--server_port", type=str, default="", help="For distant debugging.") args = parser.parse_args() if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir): raise ValueError( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) # Setup distant debugging if needed if args.server_ip and args.server_port: # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script import ptvsd print("Waiting for debugger attach") ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True) ptvsd.wait_for_attach() # Setup CUDA, GPU & distributed training os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend="nccl") args.n_gpu = 1 args.device = device # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16, ) # preprocess args if args.train_language is None or args.train_language == "all": args.train_language = args.language assert not (args.logging_steps != -1 and args.logging_steps_in_sample != -1 ), "these two parameters can't both be setted" if args.logging_steps == -1 and args.logging_steps_in_sample != -1: total_batch_size = args.n_gpu * args.per_gpu_train_batch_size * args.gradient_accumulation_steps args.logging_steps = args.logging_steps_in_sample // total_batch_size # Set seed set_seed(args) # Prepare XNLI task # args.task_name = "xnli" if args.task_name not in processors: raise ValueError("Task not found: %s" % (args.task_name)) num_labels = [] for task_name in args.train_tasks.split(","): processor = processors[task_name](language=TASK_LANGS[task_name], train_language=args.train_language) args.output_mode = output_modes[args.task_name] label_list = processor.get_labels() num_labels.append(len(label_list)) # Load pretrained model and tokenizer if args.local_rank not in [-1, 0]: torch.distributed.barrier( ) # Make sure only the first process in distributed training will download model & vocab args.model_type = args.model_type.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained( args.config_name if args.config_name else args.model_name_or_path, num_labels=num_labels, finetuning_task=args.task_name, cache_dir=args.cache_dir if args.cache_dir else None, ) tokenizer = tokenizer_class.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None, ) model = model_class.from_pretrained( args.model_name_or_path, from_tf=bool(".ckpt" in args.model_name_or_path), config=config, cache_dir=args.cache_dir if args.cache_dir else None, ) if args.local_rank == 0: torch.distributed.barrier( ) # Make sure only the first process in distributed training will download model & vocab model.to(args.device) logger.info("Training/evaluation parameters %s", args) # Training if args.do_train: task_dataset_list = [] train_tasks = args.train_tasks.split(",") for task_name in train_tasks: train_langs = args.train_language.split(',') dataset_list = [] for lang in train_langs: lg_train_dataset, guids = load_and_cache_examples( args, task_name, tokenizer, lang, split="train") dataset_list.append(lg_train_dataset) train_dataset = ConcatDataset(dataset_list) task_dataset_list.append(train_dataset) # train_dataset = load_and_cache_examples(args, args.task_name, tokenizer, args.train_language, split="train") global_step, tr_loss = train(args, task_dataset_list, model, tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() if args.do_train and (args.local_rank == -1 or torch.distributed.get_rank() == 0): # Create output directory if needed if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]: os.makedirs(args.output_dir) logger.info("Saving model checkpoint to %s", args.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` model_to_save = (model.module if hasattr(model, "module") else model ) # Take care of distributed/parallel training model_to_save.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir) # Good practice: save your training arguments together with the trained model torch.save(args, os.path.join(args.output_dir, "training_args.bin")) # Load a trained model and vocabulary that you have fine-tuned model = model_class.from_pretrained(args.output_dir) tokenizer = tokenizer_class.from_pretrained(args.output_dir) model.to(args.device) # Evaluation results = {} return results
def test_concat_two_singletons(self): result = ConcatDataset([[0], [1]]) self.assertEqual(2, len(result)) self.assertEqual(0, result[0]) self.assertEqual(1, result[1])