def main(cl_arguments): """ Run REPL for a CoLA model """ # Arguments handling # cl_args = handle_arguments(cl_arguments) args = config.params_from_file(cl_args.config_file, cl_args.overrides) check_arg_name(args) assert args.target_tasks == "cola", "Currently only supporting CoLA. ({})".format( args.target_tasks) if args.cuda >= 0: try: if not torch.cuda.is_available(): raise EnvironmentError("CUDA is not available, or not detected" " by PyTorch.") log.info("Using GPU %d", args.cuda) torch.cuda.set_device(args.cuda) except Exception: log.warning("GPU access failed. You might be using a CPU-only" " installation of PyTorch. Falling back to CPU.") args.cuda = -1 if args.tokenizer == "auto": args.tokenizer = select_tokenizer(args) if args.pool_type == "auto": args.pool_type = select_pool_type(args) # Prepare data # _, target_tasks, vocab, word_embs = build_tasks(args) tasks = sorted(set(target_tasks), key=lambda x: x.name) # Build or load model # cuda_device = parse_cuda_list_arg(args.cuda) model = build_model(args, vocab, word_embs, tasks, cuda_device) log.info("Loading existing model from %s...", cl_args.model_file_path) load_model_state(model, cl_args.model_file_path, args.cuda, [], strict=False) # Inference Setup # model.eval() vocab = Vocabulary.from_files(os.path.join(args.exp_dir, "vocab")) indexers = build_indexers(args) task = take_one(tasks) model_preprocessing_interface = ModelPreprocessingInterface(args) # Run Inference # if cl_args.inference_mode == "repl": assert cl_args.input_path is None assert cl_args.output_path is None print("Running REPL for task: {}".format(task.name)) run_repl(model, model_preprocessing_interface, vocab, indexers, task, args) elif cl_args.inference_mode == "corpus": run_corpus_inference( model, model_preprocessing_interface, vocab, indexers, task, args, cl_args.input_path, cl_args.input_format, cl_args.output_path, cl_args.eval_output_path, ) else: raise KeyError(cl_args.inference_mode)
def main(cl_arguments): """ Train a model for multitask-training.""" cl_args = handle_arguments(cl_arguments) args = config.params_from_file(cl_args.config_file, cl_args.overrides) # Check for deprecated arg names check_arg_name(args) args, seed = initial_setup(args, cl_args) # Load tasks log.info("Loading tasks...") start_time = time.time() cuda_device = parse_cuda_list_arg(args.cuda) pretrain_tasks, target_tasks, vocab, word_embs = build_tasks(args) tasks = sorted(set(pretrain_tasks + target_tasks), key=lambda x: x.name) log.info("\tFinished loading tasks in %.3fs", time.time() - start_time) log.info("\t Tasks: {}".format([task.name for task in tasks])) # Build model log.info("Building model...") start_time = time.time() model = build_model(args, vocab, word_embs, tasks, cuda_device) log.info("Finished building model in %.3fs", time.time() - start_time) # Start Tensorboard if requested if cl_args.tensorboard: tb_logdir = os.path.join(args.run_dir, "tensorboard") _run_background_tensorboard(tb_logdir, cl_args.tensorboard_port) check_configurations(args, pretrain_tasks, target_tasks) if args.do_pretrain: # Train on pretrain tasks log.info("Training...") stop_metric = pretrain_tasks[0].val_metric if len(pretrain_tasks) == 1 else "macro_avg" should_decrease = ( pretrain_tasks[0].val_metric_decreases if len(pretrain_tasks) == 1 else False ) trainer, _, opt_params, schd_params = build_trainer( args, cuda_device, [], model, args.run_dir, should_decrease, phase="pretrain" ) to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad] _ = trainer.train( pretrain_tasks, stop_metric, args.batch_size, args.weighting_method, args.scaling_method, to_train, opt_params, schd_params, args.load_model, phase="pretrain", ) # For checkpointing logic if not args.do_target_task_training: strict = True else: strict = False if args.do_target_task_training: # Train on target tasks pre_target_train_path = setup_target_task_training(args, target_tasks, model, strict) target_tasks_to_train = copy.deepcopy(target_tasks) # Check for previous target train checkpoints task_to_restore, _, _ = check_for_previous_checkpoints( args.run_dir, target_tasks_to_train, "target_train", args.load_model ) if task_to_restore is not None: # If there is a task to restore from, target train only on target tasks # including and following that task. last_task_index = [task.name for task in target_tasks_to_train].index(task_to_restore) target_tasks_to_train = target_tasks_to_train[last_task_index:] for task in target_tasks_to_train: # Skip tasks that should not be trained on. if task.eval_only_task: continue params_to_train = load_model_for_target_train_run( args, pre_target_train_path, model, strict, task, cuda_device ) trainer, _, opt_params, schd_params = build_trainer( args, cuda_device, [task.name], model, args.run_dir, task.val_metric_decreases, phase="target_train", ) _ = trainer.train( tasks=[task], stop_metric=task.val_metric, batch_size=args.batch_size, weighting_method=args.weighting_method, scaling_method=args.scaling_method, train_params=params_to_train, optimizer_params=opt_params, scheduler_params=schd_params, load_model=(task.name == task_to_restore), phase="target_train", ) if args.do_full_eval: log.info("Evaluating...") splits_to_write = evaluate.parse_write_preds_arg(args.write_preds) # Evaluate on target_tasks. for task in target_tasks: # Find the task-specific best checkpoint to evaluate on. task_params = get_model_attribute(model, "_get_task_params", cuda_device) task_to_use = task_params(task.name).get("use_classifier", task.name) ckpt_path = get_best_checkpoint_path(args, "eval", task_to_use) assert ckpt_path is not None load_model_state(model, ckpt_path, cuda_device, skip_task_models=[], strict=strict) evaluate_and_write(args, model, [task], splits_to_write, cuda_device) if args.delete_checkpoints_when_done and not args.keep_all_checkpoints: log.info("Deleting all checkpoints.") delete_all_checkpoints(args.run_dir) log.info("Done!")
def infer_jiant(exp_dir, task, items, batch_size=4): # use cached tokenizer path = join(exp_dir, 'transformers_cache') with env(PYTORCH_TRANSFORMERS_CACHE=path): reload(transformers.file_utils) # use terra model for lidirus run_dir = join( exp_dir, TERRA if task == LIDIRUS else task ) loggers = [ LOGGER, pytorch_pretrained_bert.modeling.logger, transformers.file_utils.logger, transformers.configuration_utils.logger, transformers.modeling_utils.logger, transformers.tokenization_utils.logger, allennlp.nn.initializers.logger ] with no_loggers(loggers): path = join(run_dir, 'params.conf') args = params_from_file(path) cuda_device = parse_cuda_list_arg('auto') args.local_log_path = join(run_dir, 'log.log') args.exp_dir = args.project_dir = exp_dir args.run_dir = run_dir log('Build tasks') with no_loggers(loggers), TemporaryDirectory() as dir: args.exp_dir = args.data_dir = dir # hide pkl, preproc dump_task(dir, task, items=[]) # mock empty train, val, test if task in (TERRA, LIDIRUS): dump_task(dir, LIDIRUS if task == TERRA else TERRA, items=[]) _, tasks, vocab, word_embs = build_tasks(args, cuda_device) log('Build model, load transformers pretrain') with no_loggers(loggers): args.exp_dir = exp_dir # use transformers cache model = build_model(args, vocab, word_embs, tasks, cuda_device) path = join(run_dir, 'model.th') log(f'Load state {path!r}') load_model_state(model, path, cuda_device) log(f'Build mock task, infer via eval, batch_size={batch_size}') with no_loggers(loggers), TemporaryDirectory() as dir: args.exp_dir = args.data_dir = dir dump_task(dir, task, items) if task in (TERRA, LIDIRUS): # choose one at inference args.pretrain_tasks = task args.target_tasks = task _, tasks, _, _ = build_tasks(args, cuda_device) _, preds = evaluate.evaluate( model, tasks, batch_size, cuda_device, 'test' ) evaluate.write_preds( tasks, preds, dir, 'test', args.write_strict_glue_format ) return list(load_preds(dir, task))