def build_criterion(self, args): from fairseq import criterions if self.data_cfg.prepend_tgt_lang_tag and args.ignore_prefix_size != 1: raise ValueError('Please set "--ignore-prefix-size 1" since ' "target language ID token is prepended as BOS.") return criterions.build_criterion(args, self)
def _gpu_train_step(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) model = models.build_model(test_args, src_dict, tgt_dict) criterion = criterions.build_criterion(test_args, src_dict, tgt_dict) trainer = Trainer(test_args, model, criterion) logging_dict = trainer.train_step(next(samples)) return trainer, logging_dict
def build_criterion(self, args: Namespace): """ Build the :class:`~fairseq.criterions.FairseqCriterion` instance for this task. Args: args (argparse.Namespace): parsed command-line arguments Returns: a :class:`~fairseq.criterions.FairseqCriterion` instance """ from fairseq import criterions return criterions.build_criterion(args, self)
def build_criterion(self, args): """ Build the :class:`~fairseq.criterions.FairseqCriterion` instance for this task. Args: args (argparse.Namespace): parsed command-line arguments Returns: a :class:`~fairseq.criterions.FairseqCriterion` instance """ from fairseq import criterions return criterions.build_criterion(args, self)
def build_criterion(self, cfg: DictConfig): """ Build the :class:`~fairseq.criterions.FairseqCriterion` instance for this task. Args: cfg (omegaconf.DictConfig): configration object Returns: a :class:`~fairseq.criterions.FairseqCriterion` instance """ from fairseq import criterions return criterions.build_criterion(cfg, self)
def build_criterion(self, args): from fairseq import criterions if len(self.multitask_tasks) > 0: if self.args.target_is_code and args._name != "speech_to_unit": raise ValueError( "set --criterion speech_to_unit for speech-to-unit loss with multitask" ) elif not self.args.target_is_code and args._name != "speech_to_spectrogram": raise ValueError( "set --criterion speech_to_spectrogram for speech-to-spectrogram loss with multitask" ) return criterions.build_criterion(args, self)
def build_criterion(self, args): """ Build the :class:`~fairseq.criterions.FairseqCriterion` instance for this task. Args: args (argparse.Namespace): parsed command-line arguments Returns: a :class:`~fairseq.criterions.FairseqCriterion` instance """ from fairseq import criterions criterion = criterions.build_criterion(args, self) assert isinstance( criterion, criterions.fairseq_criterion.FairseqSequenceCriterion) return criterion
def build_criterion(self, args, criterion_type=None): from fairseq import criterions return criterions.build_criterion(args, self, criterion_type)
def build_criterion(self, args): return criterions.build_criterion(args, self)
def __init__(self, opt, shared=None): # In general use a basic TorchAgent wherever possible super().__init__(opt, shared) if not shared: # this is not a shared instance of this class, so do full initialization # check early if we're going to be loading the model from a checkpoint model_file_exists = (self.opt.get('model_file') and os.path.isfile(self.opt['model_file'])) # fairseq expects options to be in argparse format, instead of a dict # We also need to do some argument postprocessing and whatnot # We'll skip pretrained embeddings if we're going to override them with # a model checkpoint anyway self.args, self.opt = _fairseq_opt_wrapper(opt, model_file_exists) # seed the RNG torch.manual_seed(self.args.seed) # Just some identifying info self.id = "fairseq:{}".format(self.args.arch) # We need a placeholder task for fairseq self.task = _ParlaiTask(self.dict) # actually construct the model and generator self.model = self.build_model() # Construct the generator and scorer self.generator = SequenceGenerator( [self.model], tgt_dict=self.dict, beam_size=self.args.beam, stop_early=(not self.args.no_early_stop), normalize_scores=(not self.args.unnormalized), len_penalty=self.args.lenpen, unk_penalty=self.args.unkpen, sampling=self.args.sampling, sampling_topk=self.args.sampling_topk, sampling_temperature=self.args.sampling_temperature, ) self.scorer = SequenceScorer([self.model], self.dict) # set up the grader and the trainer self.criterion = criterions.build_criterion(self.args, self.task) if getattr(self.args, 'fp16', None): self.trainer = fp16_trainer.FP16Trainer( self.args, self.task, self.model, self.criterion) else: # TODO: we might choose to add a --no-fp16 opt in the future to # explicitly disable fp16 instead if torch.cuda.get_device_capability(0)[0] >= 7: print("Heads up: using --fp16 could be a lot faster!") self.trainer = trainer.Trainer(self.args, self.task, self.model, self.criterion) # if the model already existed, let's preload it and the trainer if model_file_exists: print('Loading existing model params from ' + self.opt['model_file']) self.load(self.opt.get('model_file')) # move things to the GPU if possible if self.use_cuda: self.model = self.model.cuda() self.generator = self.generator.cuda() else: self.model = shared['model'] self.trainer = shared['trainer'] self.generator = shared['generator'] self.dict = shared['dict'] self.args = shared['args'] # Start things off clean self.reset()
def build_criterion(self, args): from fairseq import criterions return criterions.build_criterion(args, self)
def setup_training(args): """Parse args, load dataset, and load model trainer.""" if not torch.cuda.is_available(): raise NotImplementedError("Training on CPU is not supported") torch.cuda.set_device(args.device_id) torch.manual_seed(args.seed) # Load dataset splits = [args.train_subset, args.valid_subset] validate_and_set_default_args(args) train_corpus = pytorch_translate_data.ParallelCorpusConfig( source=pytorch_translate_data.CorpusConfig( dialect=args.source_lang, data_file=args.train_source_binary_path), target=pytorch_translate_data.CorpusConfig( dialect=args.target_lang, data_file=args.train_target_binary_path), weights_file=args.train_weights_path if hasattr( args, "train_weights_path") else None, ) eval_corpus = pytorch_translate_data.ParallelCorpusConfig( source=pytorch_translate_data.CorpusConfig( dialect=args.source_lang, data_file=args.eval_source_binary_path), target=pytorch_translate_data.CorpusConfig( dialect=args.target_lang, data_file=args.eval_target_binary_path), weights_file=None, ) if args.log_verbose: print("Starting to load binarized data files.", flush=True) use_char_source = args.arch == "char_source" dataset = pytorch_translate_data.load_binarized_dataset( train_corpus=train_corpus, eval_corpus=eval_corpus, train_split=args.train_subset, eval_split=args.valid_subset, args=args, use_char_source=use_char_source, ) if args.log_verbose: print("Finished loading dataset", flush=True) if args.source_lang is None or args.target_lang is None: # record inferred languages in args, so that it's saved in checkpoints args.source_lang, args.target_lang = dataset.src, dataset.dst print(f"| [{dataset.src}] dictionary: {len(dataset.src_dict)} types") print(f"| [{dataset.dst}] dictionary: {len(dataset.dst_dict)} types") for split in splits: print(f"| {split} {len(dataset.splits[split])} examples") # Build model and criterion model = models.build_model(args, dataset.src_dict, dataset.dst_dict) print("building criterion") criterion = criterions.build_criterion(args, dataset.src_dict, dataset.dst_dict) print(f"| model {args.arch}, criterion {criterion.__class__.__name__}") print(f"| num. model params: \ {sum(p.numel() for p in model.parameters())}") # Load pretrained model weights if applicable if args.pretrained_weights_file: utils.load_model_state(args.pretrained_weights_file, model, cuda_device=torch.cuda.current_device()) # Build trainer trainer = Trainer(args, model, criterion) print(f"| training on {args.distributed_world_size} GPUs") print( f"| max tokens per GPU = {args.max_tokens} and \ max sentences per GPU = {args.max_sentences}", flush=True, ) os.makedirs(args.save_dir, exist_ok=True) checkpoint_path = os.path.join(args.save_dir, args.restore_file) if not os.path.isfile(checkpoint_path) and args.multi_model_restore_files: print( f"| Restoring individual models from {args.multi_model_restore_files}" ) extra_state = multi_model.import_individual_models( args.multi_model_restore_files, trainer) else: extra_state = load_existing_checkpoint(checkpoint_path, trainer) return extra_state, trainer, dataset
def setup_training(args): """Parse args, load dataset, and load model trainer.""" if not torch.cuda.is_available(): raise NotImplementedError("Training on CPU is not supported") torch.cuda.set_device(args.device_id) torch.manual_seed(args.seed) # Load dataset splits = [args.train_subset, args.valid_subset] validate_and_set_default_args(args) train_corpus = pytorch_translate_data.ParallelCorpusConfig( source=pytorch_translate_data.CorpusConfig( dialect=args.source_lang, data_file=args.train_source_binary_path), target=pytorch_translate_data.CorpusConfig( dialect=args.target_lang, data_file=args.train_target_binary_path), weights_file=args.train_weights_path if hasattr( args, "train_weights_path") else None, ) eval_corpus = pytorch_translate_data.ParallelCorpusConfig( source=pytorch_translate_data.CorpusConfig( dialect=args.source_lang, data_file=args.eval_source_binary_path), target=pytorch_translate_data.CorpusConfig( dialect=args.target_lang, data_file=args.eval_target_binary_path), weights_file=None, ) if args.log_verbose: print("Starting to load binarized data files.", flush=True) use_char_source = args.arch == "char_source" dataset = pytorch_translate_data.load_binarized_dataset( train_corpus=train_corpus, eval_corpus=eval_corpus, train_split=args.train_subset, eval_split=args.valid_subset, args=args, use_char_source=use_char_source, ) if args.log_verbose: print("Finished loading dataset", flush=True) if args.source_lang is None or args.target_lang is None: # record inferred languages in args, so that it's saved in checkpoints args.source_lang, args.target_lang = dataset.src, dataset.dst print(f"| [{dataset.src}] dictionary: {len(dataset.src_dict)} types") print(f"| [{dataset.dst}] dictionary: {len(dataset.dst_dict)} types") for split in splits: print(f"| {split} {len(dataset.splits[split])} examples") # Build model and criterion model = models.build_model(args, dataset.src_dict, dataset.dst_dict) print("building criterion") criterion = criterions.build_criterion(args, dataset.src_dict, dataset.dst_dict) print(f"| model {args.arch}, criterion {criterion.__class__.__name__}") print(f"| num. model params: \ {sum(p.numel() for p in model.parameters())}") # Build trainer trainer = Trainer(args, model, criterion) print(f"| training on {args.distributed_world_size} GPUs") print( f"| max tokens per GPU = {args.max_tokens} and \ max sentences per GPU = {args.max_sentences}", flush=True, ) os.makedirs(args.save_dir, exist_ok=True) # If --restore-file is already present under --save-dir, use that one # instead of the --restore-file that may be present under # --restore-checkpoint-dir. The idea is that --restore-checkpoint-dir # allows the user to specify restoring from a different run's # checkpoint (possibly with different training params), while not # polluting the previous run's checkpoint directory with new checkpoints. # However, if training gets interrupted and the user restarts training, # we want to resume from the checkpoints under --save-dir, instead of # restarting again from the old run's checkpoint under # --restore-checkpoint-dir. # # Note that if args.restore_file is an absolute path, os.path.join() will # ignore previous directory args and just use the absolute path as is. checkpoint_path = os.path.join(args.save_dir, args.restore_file) if os.path.exists(checkpoint_path): print(f"Using --save-dir={args.save_dir}, " f"--restore-file={args.restore_file}.") elif args.restore_checkpoint_dir: checkpoint_path = os.path.join(args.restore_checkpoint_dir, args.restore_file) print(f"Using --restore-checkpoint-dir={args.restore_checkpoint_dir}, " f"--restore-file={args.restore_file}.") if not os.path.isfile(checkpoint_path) and args.multi_model_restore_files: print( f"| Restoring individual models from {args.multi_model_restore_files}" ) extra_state = multi_model.import_individual_models( args.multi_model_restore_files, trainer) else: extra_state = load_existing_checkpoint( checkpoint_path=checkpoint_path, trainer=trainer, restore_state=args.restore_checkpoint_state, ) return extra_state, trainer, dataset
def main(args): print(args) if not torch.cuda.is_available(): raise NotImplementedError('Training on CPU is not supported') torch.cuda.set_device(args.device_id) torch.manual_seed(args.seed) # Load dataset splits = ['train', 'valid'] if data.has_binary_files(args.data, splits): dataset = data.load_dataset(args.data, splits, args.source_lang, args.target_lang) else: dataset = data.load_raw_text_dataset(args.data, splits, args.source_lang, args.target_lang) if args.source_lang is None or args.target_lang is None: # record inferred languages in args, so that it's saved in checkpoints args.source_lang, args.target_lang = dataset.src, dataset.dst print('| [{}] dictionary: {} types'.format(dataset.src, len(dataset.src_dict))) print('| [{}] dictionary: {} types'.format(dataset.dst, len(dataset.dst_dict))) for split in splits: print('| {} {} {} examples'.format(args.data, split, len(dataset.splits[split]))) # Build model and criterion model = models.build_model(args, dataset.src_dict, dataset.dst_dict) if 0.0 < args.rank_scale < 1.0: patch_transformer(args, model) if args.wd2fd: no_decay, skiplist = [ 'fc1', 'fc2', 'embed_tokens', 'embed_positions', 'out_embed' ], [] else: no_decay, skiplist = [], [ 'fc1', 'fc2', 'embed_tokens', 'embed_positions', 'out_embed' ] else: no_decay, skiplist = [], [] spectral_init(args, model) criterion = criterions.build_criterion(args, dataset.src_dict, dataset.dst_dict) print('| model {}, criterion {}'.format(args.arch, criterion.__class__.__name__)) print('| num. model params: {}'.format( sum(p.data.numel() for p in model.parameters()))) # Build trainer no_decay, skiplist = [], [] if args.wd2fd_quekey: no_decay.extend(['_query.weight', '_key.weight']) else: skiplist.append('quekey') if args.wd2fd_outval: no_decay.extend(['_value.weight', 'output_perform.weight']) else: skiplist.append('outval') trainer = Trainer(args, model, criterion, skiplist=skiplist, no_decay=no_decay) print('| training on {} GPUs'.format(args.distributed_world_size)) print('| max tokens per GPU = {} and max sentences per GPU = {}'.format( args.max_tokens, args.max_sentences, )) # Load the latest checkpoint if one is available os.makedirs(args.save_dir, exist_ok=True) checkpoint_path = os.path.join(args.save_dir, args.restore_file) extra_state = trainer.load_checkpoint(checkpoint_path) if extra_state is not None: epoch = extra_state['epoch'] batch_offset = extra_state['batch_offset'] print('| loaded checkpoint {} (epoch {})'.format( checkpoint_path, epoch)) if batch_offset == 0: trainer.lr_step(epoch) epoch += 1 else: epoch, batch_offset = 1, 0 if args.distributed_rank <= 0: writer = SummaryWriter(args.save_dir) with open(os.path.join(args.save_dir, 'args.json'), 'w') as f: json.dump(vars(args), f, indent=4) else: writer = SummaryWriter( os.path.join(args.save_dir, str(args.distributed_rank))) # Train until the learning rate gets too small max_epoch = args.max_epoch or math.inf max_update = args.max_update or math.inf lr = trainer.get_lr() train_meter = StopwatchMeter() train_meter.start() while lr > args.min_lr and epoch <= max_epoch: if args.distributed_rank <= 0: writer.add_scalar('hyper/lr', lr, epoch) for form in ['QueKey', 'OutVal']: frobnorm, nucnorm, bound, nonorth = [], [], [], [] for module in model.modules(): if hasattr(module, form.lower()): U, VT = getattr(module, form.lower()).get_UVT() for u, vt in zip(U, VT): frobnorm.append(frobenius_norm(u, vt)) nucnorm.append( torch.norm(torch.matmul(u, vt), 'nuc')) bound.append( (u.pow(2).sum() + vt.pow(2).sum()) / 2.) nonorth.append(sum(non_orthogonality(u, vt)) / 2.) writer.add_scalar('FrobNorm/' + form, sum(frobnorm) / len(frobnorm), epoch) writer.add_scalar('NucNorm/' + form, sum(nucnorm) / len(nucnorm), epoch) writer.add_scalar('NucNorm/' + form + '-Bound', sum(bound) / len(bound), epoch) writer.add_scalar('NonOrth/' + form, sum(nonorth) / len(nonorth), epoch) frobnorm, nucnorm, bound, nonorth = [], [], [], [] for name, module in model.named_modules(): if not any( block in name for block in ['embed', '_query', '_key', '_value', 'output_perform']): if hasattr(module, 'frobgrad') and not hasattr(module, 'get_UVT'): U, VT = module.U.data, module.VT.data frobnorm.append(frobenius_norm(U, VT)) nucnorm.append(torch.norm(torch.matmul(U, VT), 'nuc')) nonorth.append(sum(non_orthogonality(U, VT)) / 2.) bound.append((U.pow(2).sum() + VT.pow(2).sum()) / 2.) elif hasattr(module, 'weight'): frobnorm.append(torch.norm(module.weight.data)) nucnorm.append(torch.norm(module.weight.data, 'nuc')) writer.add_scalar('FrobNorm/Linear', sum(frobnorm) / len(frobnorm), epoch) writer.add_scalar('NucNorm/Linear', sum(nucnorm) / len(nucnorm), epoch) if nonorth: writer.add_scalar('NucNorm/Linear-Bound', sum(bound) / len(bound), epoch) writer.add_scalar('NonOrth/Linear', sum(nonorth) / len(nonorth), epoch) # train for one epoch train(args, trainer, dataset, epoch, batch_offset) # evaluate on validate set if epoch % args.validate_interval == 0: for k, subset in enumerate(args.valid_subset.split(',')): val_loss = validate(args, trainer, dataset, subset, epoch) if k == 0: # only use first validation loss to update the learning schedule lr = trainer.lr_step(epoch, val_loss) # save checkpoint if not args.no_save: save_checkpoint(trainer, args, epoch, 0, val_loss) for k in ['loss', 'nll_loss']: writer.add_scalar('valid/' + k, trainer.meters['valid_' + k].avg, epoch) writer.add_scalar('train/' + k, trainer.meters['train_' + k].avg, epoch) else: lr = trainer.lr_step(epoch) epoch += 1 batch_offset = 0 if trainer.get_num_updates() >= max_update: break train_meter.stop() print('| done training in {:.1f} seconds'.format(train_meter.sum)) writer.flush() newpar = sum(p.numel() for p in model.parameters()) if 0.0 < args.rank_scale < 1.0: args.rank_scale = 1.0 origpar = sum(p.numel() for p in models.build_model( args, dataset.src_dict, dataset.dst_dict).parameters()) else: origpar = newpar if args.distributed_rank <= 0: with open(os.path.join(args.save_dir, 'results.json'), 'w') as f: json.dump( { 'final validation loss': trainer.meters['valid_nll_loss'].avg, 'original parameter count': origpar, 'compressed parameter count': newpar, 'compression ratio': newpar / origpar }, f, indent=4)
def build_criterion(self): """Set up the grader.""" # TorchAgent will call this without ready=True before self.args is ready return criterions.build_criterion(self.args, self.task)
def setup_training(args): """Parse args, load dataset, and load model trainer.""" if not torch.cuda.is_available(): raise NotImplementedError("Training on CPU is not supported") torch.cuda.set_device(args.device_id) torch.manual_seed(args.seed) # Load dataset splits = [args.train_subset, args.valid_subset] validate_and_set_default_args(args) train_corpus = pytorch_translate_data.ParallelCorpusConfig( source=pytorch_translate_data.CorpusConfig( dialect=args.source_lang, data_file=args.train_source_binary_prefix), target=pytorch_translate_data.CorpusConfig( dialect=args.target_lang, data_file=args.train_target_binary_prefix), ) eval_corpus = pytorch_translate_data.ParallelCorpusConfig( source=pytorch_translate_data.CorpusConfig( dialect=args.source_lang, data_file=args.eval_source_binary_prefix), target=pytorch_translate_data.CorpusConfig( dialect=args.target_lang, data_file=args.eval_target_binary_prefix), ) if args.log_verbose: print("Starting to load binarized data files.", flush=True) dataset = pytorch_translate_data.load_binarized_dataset( train_corpus=train_corpus, eval_corpus=eval_corpus, train_split=args.train_subset, eval_split=args.valid_subset, args=args, ) if args.log_verbose: print("Finished loading dataset", flush=True) if args.source_lang is None or args.target_lang is None: # record inferred languages in args, so that it's saved in checkpoints args.source_lang, args.target_lang = dataset.src, dataset.dst print(f"| [{dataset.src}] dictionary: {len(dataset.src_dict)} types") print(f"| [{dataset.dst}] dictionary: {len(dataset.dst_dict)} types") for split in splits: print(f"| {split} {len(dataset.splits[split])} examples") # Build model and criterion model = models.build_model(args, dataset.src_dict, dataset.dst_dict) criterion = criterions.build_criterion(args, dataset.src_dict, dataset.dst_dict) print(f"| model {args.arch}, criterion {criterion.__class__.__name__}") print(f"| num. model params: \ {sum(p.data.numel() for p in model.parameters())}") # Build trainer trainer = Trainer(args, model, criterion) print(f"| training on {args.distributed_world_size} GPUs") print( f"| max tokens per GPU = {args.max_tokens} and \ max sentences per GPU = {args.max_sentences}", flush=True, ) extra_state = load_existing_checkpoint(args.save_dir, args.restore_file, trainer) return extra_state, trainer, dataset
def main(args): print(args) if not torch.cuda.is_available(): raise NotImplementedError('Training on CPU is not supported') torch.cuda.set_device(args.device_id) torch.manual_seed(args.seed) # Load dataset splits = ['train', 'valid'] if data.has_binary_files(args.data, splits): dataset = data.load_dataset( args.data, splits, args.source_lang, args.target_lang) else: dataset = data.load_raw_text_dataset( args.data, splits, args.source_lang, args.target_lang) if args.source_lang is None or args.target_lang is None: # record inferred languages in args, so that it's saved in checkpoints args.source_lang, args.target_lang = dataset.src, dataset.dst print('| [{}] dictionary: {} types'.format(dataset.src, len(dataset.src_dict))) print('| [{}] dictionary: {} types'.format(dataset.dst, len(dataset.dst_dict))) for split in splits: print('| {} {} {} examples'.format(args.data, split, len(dataset.splits[split]))) # Build model and criterion model = models.build_model(args, dataset.src_dict, dataset.dst_dict) criterion = criterions.build_criterion(args, dataset.src_dict, dataset.dst_dict) print('| model {}, criterion {}'.format(args.arch, criterion.__class__.__name__)) print('| num. model params: {}'.format(sum(p.data.numel() for p in model.parameters()))) # Build trainer trainer = Trainer(args, model, criterion) print('| training on {} GPUs'.format(args.distributed_world_size)) print('| max tokens per GPU = {} and max sentences per GPU = {}'.format( args.max_tokens, args.max_sentences, )) # Load the latest checkpoint if one is available os.makedirs(args.save_dir, exist_ok=True) checkpoint_path = os.path.join(args.save_dir, args.restore_file) extra_state = trainer.load_checkpoint(checkpoint_path) if extra_state is not None: epoch = extra_state['epoch'] batch_offset = extra_state['batch_offset'] print('| loaded checkpoint {} (epoch {})'.format(checkpoint_path, epoch)) if batch_offset == 0: trainer.lr_step(epoch) epoch += 1 else: epoch, batch_offset = 1, 0 # Train until the learning rate gets too small max_epoch = args.max_epoch or math.inf lr = trainer.get_lr() train_meter = StopwatchMeter() train_meter.start() while lr > args.min_lr and epoch <= max_epoch: # train for one epoch train(args, trainer, dataset, epoch, batch_offset) # evaluate on validate set for k, subset in enumerate(args.valid_subset.split(',')): val_loss = validate(args, trainer, dataset, subset, epoch) if k == 0: # only use first validation loss to update the learning schedule lr = trainer.lr_step(epoch, val_loss) # save checkpoint if not args.no_save: save_checkpoint(trainer, args, epoch, 0, val_loss) epoch += 1 batch_offset = 0 train_meter.stop() print('| done training in {:.1f} seconds'.format(train_meter.sum))
def setup_training(args): """Parse args, load dataset, and load model trainer.""" if not torch.cuda.is_available(): raise NotImplementedError('Training on CPU is not supported') torch.cuda.set_device(args.device_id) torch.manual_seed(args.seed) # Load dataset splits = [args.train_subset, args.valid_subset] if args.source_lang is None: args.source_lang = 'src' if args.target_lang is None: args.target_lang = 'tgt' assert_corpora_files_specified(args) train_corpus = pytorch_translate_data.ParallelCorpusConfig( source=pytorch_translate_data.CorpusConfig( dialect=args.source_lang, data_file=args.train_source_text_file, ), target=pytorch_translate_data.CorpusConfig( dialect=args.target_lang, data_file=args.train_target_text_file, ), ) eval_corpus = pytorch_translate_data.ParallelCorpusConfig( source=pytorch_translate_data.CorpusConfig( dialect=args.source_lang, data_file=args.eval_source_text_file, ), target=pytorch_translate_data.CorpusConfig( dialect=args.target_lang, data_file=args.eval_target_text_file, ), ) if args.log_verbose: print('Starting to load raw text files.', flush=True) dataset = pytorch_translate_data.load_raw_text_dataset( train_corpus=train_corpus, eval_corpus=eval_corpus, train_split=args.train_subset, eval_split=args.valid_subset, args=args, ) if args.log_verbose: print('Finished loading dataset', flush=True) if args.source_lang is None or args.target_lang is None: # record inferred languages in args, so that it's saved in checkpoints args.source_lang, args.target_lang = dataset.src, dataset.dst print('| [{}] dictionary: {} types'.format( dataset.src, len(dataset.src_dict), )) print('| [{}] dictionary: {} types'.format( dataset.dst, len(dataset.dst_dict), )) for split in splits: print('| {} {} examples'.format( split, len(dataset.splits[split]), )) # Build model and criterion model = models.build_model(args, dataset.src_dict, dataset.dst_dict) criterion = criterions.build_criterion(args, dataset.src_dict, dataset.dst_dict) print('| model {}, criterion {}'.format( args.arch, criterion.__class__.__name__, )) print('| num. model params: {}'.format( sum(p.data.numel() for p in model.parameters()))) # Build trainer trainer = Trainer(args, model, criterion) print('| training on {} GPUs'.format(args.distributed_world_size)) print( '| max tokens per GPU = {} and max sentences per GPU = {}'.format( args.max_tokens, args.max_sentences, ), flush=True, ) extra_state = load_existing_checkpoint( args.save_dir, args.restore_file, trainer, ) return extra_state, trainer, dataset