def main(cl_arguments): """ Run REPL for a CoLA model """ # Arguments handling # cl_args = handle_arguments(cl_arguments) args = config.params_from_file(cl_args.config_file, cl_args.overrides) check_arg_name(args) assert args.target_tasks == "cola", "Currently only supporting CoLA. ({})".format( args.target_tasks ) if args.cuda >= 0: try: if not torch.cuda.is_available(): raise EnvironmentError("CUDA is not available, or not detected" " by PyTorch.") log.info("Using GPU %d", args.cuda) torch.cuda.set_device(args.cuda) except Exception: log.warning( "GPU access failed. You might be using a CPU-only" " installation of PyTorch. Falling back to CPU." ) args.cuda = -1 # Prepare data # _, target_tasks, vocab, word_embs = build_tasks(args) tasks = sorted(set(target_tasks), key=lambda x: x.name) # Build or load model # model = build_model(args, vocab, word_embs, tasks) log.info("Loading existing model from %s...", cl_args.model_file_path) load_model_state(model, cl_args.model_file_path, args.cuda, [], strict=False) # Inference Setup # model.eval() vocab = Vocabulary.from_files(os.path.join(args.exp_dir, "vocab")) indexers = build_indexers(args) task = take_one(tasks) # Run Inference # if cl_args.inference_mode == "repl": assert cl_args.input_path is None assert cl_args.output_path is None print("Running REPL for task: {}".format(task.name)) run_repl(model, vocab, indexers, task, args) elif cl_args.inference_mode == "corpus": run_corpus_inference( model, vocab, indexers, task, args, cl_args.input_path, cl_args.input_format, cl_args.output_path, cl_args.eval_output_path, ) else: raise KeyError(cl_args.inference_mode)
def load_model_for_target_train_run(args, ckpt_path, model, strict, task, cuda_devices): """ Function that reloads model if necessary and extracts trainable parts of the model in preparation for target_task training. It only reloads model after the first task is trained. Parameters ------------------- args: config.Param object, ckpt_path: str: path to reload model from, model: MultiTaskModel object, strict: bool, task: Task object Returns ------------------- to_train: List of tuples of (name, weight) of trainable parameters """ if args.transfer_paradigm == "finetune": load_model_state(model, ckpt_path, cuda_devices, skip_task_models=[task.name], strict=strict) # Train both the task specific models as well as sentence encoder. to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad] else: # args.transfer_paradigm == "frozen": # will be empty if args.input_module != "elmo", scalar_mix_0 should always be # pretrain scalars elmo_scalars = [(n, p) for n, p in model.named_parameters() if "scalar_mix" in n and "scalar_mix_0" not in n] # Fails when sep_embs_for_skip is 0 and elmo_scalars has nonzero # length. assert_for_log( not elmo_scalars or args.sep_embs_for_skip, "Error: ELMo scalars loaded and will be updated in do_target_task_training but " "they should not be updated! Check sep_embs_for_skip flag or make an issue.", ) # Only train task-specific module pred_module = get_model_attribute(model, "%s_mdl" % task.name, cuda_devices) to_train = [(n, p) for n, p in pred_module.named_parameters() if p.requires_grad] to_train += elmo_scalars model = model.cuda() if uses_cuda(cuda_devices) else model if isinstance(cuda_devices, list): model = nn.DataParallel(model, device_ids=cuda_devices) return to_train
def main(cl_arguments): """ Train a model for multitask-training.""" cl_args = handle_arguments(cl_arguments) args = config.params_from_file(cl_args.config_file, cl_args.overrides) train_type = args.get('train_type', "SamplingMultiTaskTrainer") if train_type != "SamplingMultiTaskTrainer": print("\n\n\n", train_type, "\n\n\n") # Check for deprecated arg names check_arg_name(args) args, seed = initial_setup(args, cl_args) # Load tasks log.info("Loading tasks...") start_time = time.time() pretrain_tasks, target_tasks, vocab, word_embs = build_tasks(args) tasks = sorted(set(pretrain_tasks + target_tasks), key=lambda x: x.name) log.info("\tFinished loading tasks in %.3fs", time.time() - start_time) log.info("\t Tasks: {}".format([task.name for task in tasks])) # Build model log.info("Building model...") start_time = time.time() model = build_model(args, vocab, word_embs, tasks) log.info("Finished building model in %.3fs", time.time() - start_time) # Start Tensorboard if requested if cl_args.tensorboard: tb_logdir = os.path.join(args.run_dir, "tensorboard") _run_background_tensorboard(tb_logdir, cl_args.tensorboard_port) check_configurations(args, pretrain_tasks, target_tasks) if args.do_pretrain: # Train on pretrain tasks log.info("Training...") stop_metric = pretrain_tasks[0].val_metric if len( pretrain_tasks) == 1 else "macro_avg" should_decrease = (pretrain_tasks[0].val_metric_decreases if len(pretrain_tasks) == 1 else False) trainer, _, opt_params, schd_params = build_trainer( args, [], model, args.run_dir, should_decrease, phase="pretrain", train_type=train_type) to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad] _ = trainer.train( pretrain_tasks, stop_metric, args.batch_size, args.weighting_method, args.scaling_method, to_train, opt_params, schd_params, args.load_model, phase="pretrain", ) # For checkpointing logic if not args.do_target_task_training: strict = True else: strict = False if args.do_target_task_training: # Train on target tasks pre_target_train_path = setup_target_task_training( args, target_tasks, model, strict) target_tasks_to_train = copy.deepcopy(target_tasks) # Check for previous target train checkpoints task_to_restore, _, _ = check_for_previous_checkpoints( args.run_dir, target_tasks_to_train, "target_train", args.load_model) if task_to_restore is not None: # If there is a task to restore from, target train only on target tasks # including and following that task. last_task_index = [task.name for task in target_tasks_to_train ].index(task_to_restore) target_tasks_to_train = target_tasks_to_train[last_task_index:] for task in target_tasks_to_train: # Skip tasks that should not be trained on. if task.eval_only_task: continue params_to_train = load_model_for_target_train_run( args, pre_target_train_path, model, strict, task) trainer, _, opt_params, schd_params = build_trainer( args, [task.name], model, args.run_dir, task.val_metric_decreases, phase="target_train", train_type=train_type) _ = trainer.train( tasks=[task], stop_metric=task.val_metric, batch_size=args.batch_size, weighting_method=args.weighting_method, scaling_method=args.scaling_method, train_params=params_to_train, optimizer_params=opt_params, scheduler_params=schd_params, load_model=(task.name == task_to_restore), phase="target_train", ) if args.do_full_eval: log.info("Evaluating...") splits_to_write = evaluate.parse_write_preds_arg(args.write_preds) # Evaluate on target_tasks. for task in target_tasks: # Find the task-specific best checkpoint to evaluate on. task_to_use = model._get_task_params(task.name).get( "use_classifier", task.name) ckpt_path = get_best_checkpoint_path(args, "eval", task_to_use) assert ckpt_path is not None load_model_state(model, ckpt_path, args.cuda, skip_task_models=[], strict=strict) evaluate_and_write(args, model, [task], splits_to_write) if args.delete_checkpoints_when_done and not args.keep_all_checkpoints: log.info("Deleting all checkpoints.") delete_all_checkpoints(args.run_dir) log.info("Done!")
def infer_jiant(exp_dir, task, items, batch_size=4): # use cached tokenizer path = join(exp_dir, 'transformers_cache') with env(PYTORCH_TRANSFORMERS_CACHE=path): reload(transformers.file_utils) # use terra model for lidirus run_dir = join( exp_dir, TERRA if task == LIDIRUS else task ) loggers = [ LOGGER, pytorch_pretrained_bert.modeling.logger, transformers.file_utils.logger, transformers.configuration_utils.logger, transformers.modeling_utils.logger, transformers.tokenization_utils.logger, allennlp.nn.initializers.logger ] with no_loggers(loggers): path = join(run_dir, 'params.conf') args = params_from_file(path) cuda_device = parse_cuda_list_arg('auto') args.local_log_path = join(run_dir, 'log.log') args.exp_dir = args.project_dir = exp_dir args.run_dir = run_dir log('Build tasks') with no_loggers(loggers), TemporaryDirectory() as dir: args.exp_dir = args.data_dir = dir # hide pkl, preproc dump_task(dir, task, items=[]) # mock empty train, val, test if task in (TERRA, LIDIRUS): dump_task(dir, LIDIRUS if task == TERRA else TERRA, items=[]) _, tasks, vocab, word_embs = build_tasks(args, cuda_device) log('Build model, load transformers pretrain') with no_loggers(loggers): args.exp_dir = exp_dir # use transformers cache model = build_model(args, vocab, word_embs, tasks, cuda_device) path = join(run_dir, 'model.th') log(f'Load state {path!r}') load_model_state(model, path, cuda_device) log(f'Build mock task, infer via eval, batch_size={batch_size}') with no_loggers(loggers), TemporaryDirectory() as dir: args.exp_dir = args.data_dir = dir dump_task(dir, task, items) if task in (TERRA, LIDIRUS): # choose one at inference args.pretrain_tasks = task args.target_tasks = task _, tasks, _, _ = build_tasks(args, cuda_device) _, preds = evaluate.evaluate( model, tasks, batch_size, cuda_device, 'test' ) evaluate.write_preds( tasks, preds, dir, 'test', args.write_strict_glue_format ) return list(load_preds(dir, task))
def main(cl_arguments): """ Train a model for multitask-training.""" cl_args = handle_arguments(cl_arguments) args = config.params_from_file(cl_args.config_file, cl_args.overrides) # Check for deprecated arg names check_arg_name(args) args, seed = initial_setup(args, cl_args) #XXX Dylan's code try: log.info(f'\nK syn is {args.k_syn}') log.info(f'\nK sem is {args.k_sem}\n') except Exception: log.info('No projection matrices.') pass #XXX # Load tasks log.info("Loading tasks...") start_time = time.time() pretrain_tasks, target_tasks, vocab, word_embs = build_tasks(args) #pretrain_tasks[0].load_data() #exit() tasks = sorted(set(pretrain_tasks + target_tasks), key=lambda x: x.name) log.info("\tFinished loading tasks in %.3fs", time.time() - start_time) log.info("\t Tasks: {}".format([task.name for task in tasks])) training_flag = args.do_pretrain if training_flag and args.records_pickle_path: with open(args.records_pickle_path, 'wb') as f: records_dict = dict() records_dict['run_name'] = args.run_name records_dict['last_checkpoint'] = '' records_dict['training'] = dict() records_dict['best_val'] = dict() records_dict['last_val'] = dict() pickle.dump(records_dict, f) # Build model log.info("Building model...") start_time = time.time() model = build_model(args, vocab, word_embs, tasks) log.info("Finished building model in %.3fs", time.time() - start_time) # Start Tensorboard if requested if cl_args.tensorboard: tb_logdir = os.path.join(args.run_dir, "tensorboard_" + str(args.run_name)) _run_background_tensorboard(tb_logdir, cl_args.tensorboard_port) check_configurations(args, pretrain_tasks, target_tasks) if args.do_pretrain: # Train on pretrain tasks log.info("Training...") stop_metric = pretrain_tasks[0].val_metric if len( pretrain_tasks) == 1 else "macro_avg" should_decrease = (pretrain_tasks[0].val_metric_decreases if len(pretrain_tasks) == 1 else False) trainer, _, opt_params, schd_params = build_trainer(args, [], model, args.run_dir, should_decrease, phase="pretrain") to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad] _ = trainer.train(pretrain_tasks, stop_metric, args.batch_size, args.weighting_method, args.scaling_method, to_train, opt_params, schd_params, args.load_model, phase="pretrain", args=args) # For checkpointing logic if not args.do_target_task_training: strict = True else: strict = False if args.do_target_task_training: # Train on target tasks pre_target_train_path = setup_target_task_training( args, target_tasks, model, strict) target_tasks_to_train = copy.deepcopy(target_tasks) # Check for previous target train checkpoints task_to_restore, _, _ = check_for_previous_checkpoints( args.run_dir, target_tasks_to_train, "target_train", args.load_model) if task_to_restore is not None: # If there is a task to restore from, target train only on target tasks # including and following that task. last_task_index = [task.name for task in target_tasks_to_train ].index(task_to_restore) target_tasks_to_train = target_tasks_to_train[last_task_index:] for task in target_tasks_to_train: # Skip tasks that should not be trained on. if task.eval_only_task: continue params_to_train = load_model_for_target_train_run( args, pre_target_train_path, model, strict, task) trainer, _, opt_params, schd_params = build_trainer( args, [task.name], model, args.run_dir, task.val_metric_decreases, phase="target_train", ) _ = trainer.train( tasks=[task], stop_metric=task.val_metric, batch_size=args.batch_size, weighting_method=args.weighting_method, scaling_method=args.scaling_method, train_params=params_to_train, optimizer_params=opt_params, scheduler_params=schd_params, load_model=(task.name == task_to_restore), phase="target_train", ) tasks_for_eval = [ task for task in target_tasks if (not 'adv' in task.name and not 'discriminator' in task.name) ] if args.do_full_eval: log.info("Evaluating...") splits_to_write = evaluate.parse_write_preds_arg(args.write_preds) # Evaluate on target_tasks. #for task in target_tasks: for task in tasks_for_eval: # Find the task-specific best checkpoint to evaluate on. task_to_use = model._get_task_params(task.name).get( "use_classifier", task.name) ckpt_path = get_best_checkpoint_path(args, "eval", task_to_use) assert ckpt_path is not None load_model_state(model, ckpt_path, args.cuda, skip_task_models=[], strict=strict) records_dict = get_records_dict( args.records_pickle_path) if args.evaluate_final else None evaluate_and_write( args, model, [task], splits_to_write, mode='best_val', do_write=(not args.evaluate_final) or (records_dict != None and ckpt_path == records_dict['last_checkpoint'])) if args.evaluate_final: records_dict = get_records_dict(args.records_pickle_path) if ckpt_path != records_dict['last_checkpoint']: try: load_model_state(model, records_dict['last_checkpoint'], args.cuda, skip_task_models=[], strict=strict) for task in tasks_for_eval: evaluate_and_write(args, model, [task], splits_to_write, mode='last_val', do_write=True) except Exception: log.info( f"Did not record last_checkpoint path properly. Looks like: {records_dict['last_checkpoint']}" ) else: records_dict['last_val'] = records_dict['best_val'] write_records_dict(records_dict, args.records_pickle_path) log.info("Done!")
def main(cl_arguments): """ Train a model for multitask-training.""" cl_args = handle_arguments(cl_arguments) args = config.params_from_file(cl_args.config_file, cl_args.overrides) # Check for deprecated arg names check_arg_name(args) args, seed = initial_setup(args, cl_args) #Store the run description, if any if FLAGS.description: with open(Path(args.run_dir, 'description.txt'), 'w') as f: f.write(FLAGS.description) # Load tasks log.info("Loading tasks...") start_time = time.time() # cuda_device = parse_cuda_list_arg(args.cuda) cuda_device = FLAGS.device_idxs pretrain_tasks, target_tasks, vocab, word_embs = build_tasks( args, cuda_device) tasks = sorted(set(pretrain_tasks + target_tasks), key=lambda x: x.name) log.info("\tFinished loading tasks in %.3fs", time.time() - start_time) log.info("\t Tasks: {}".format([task.name for task in tasks])) # Build model log.info("Building model...") start_time = time.time() model = build_model(args, vocab, word_embs, tasks, cuda_device) log.info("Finished building model in %.3fs", time.time() - start_time) # Start Tensorboard if requested if cl_args.tensorboard: tb_logdir = os.path.join(args.run_dir, "tensorboard") _run_background_tensorboard(tb_logdir, cl_args.tensorboard_port) check_configurations(args, pretrain_tasks, target_tasks) if args.do_pretrain: # Train on pretrain tasks log.info("Training...") stop_metric = pretrain_tasks[0].val_metric if len( pretrain_tasks) == 1 else "macro_avg" should_decrease = (pretrain_tasks[0].val_metric_decreases if len(pretrain_tasks) == 1 else False) trainer, _, opt_params, schd_params = build_trainer(args, cuda_device, [], model, args.run_dir, should_decrease, phase="pretrain") to_train = [(n, p) for n, p in model.named_parameters() if p.requires_grad] _ = trainer.train( pretrain_tasks, stop_metric, args.batch_size, args.weighting_method, args.scaling_method, to_train, opt_params, schd_params, args.load_model, phase="pretrain", ) # For checkpointing logic if not args.do_target_task_training: strict = True else: strict = False if args.do_target_task_training: # Train on target tasks pre_target_train_path = setup_target_task_training( args, target_tasks, model, strict) target_tasks_to_train = copy.deepcopy(target_tasks) # Check for previous target train checkpoints task_to_restore, _, _ = check_for_previous_checkpoints( args.run_dir, target_tasks_to_train, "target_train", args.load_model) if task_to_restore is not None: # If there is a task to restore from, target train only on target tasks # including and following that task. last_task_index = [task.name for task in target_tasks_to_train ].index(task_to_restore) target_tasks_to_train = target_tasks_to_train[last_task_index:] for task in target_tasks_to_train: # Skip tasks that should not be trained on. if task.eval_only_task: continue params_to_train = load_model_for_target_train_run( args, pre_target_train_path, model, strict, task, cuda_device) trainer, _, opt_params, schd_params = build_trainer( args, cuda_device, [task.name], model, args.run_dir, task.val_metric_decreases, phase="target_train", ) _ = trainer.train( tasks=[task], stop_metric=task.val_metric, batch_size=args.batch_size, weighting_method=args.weighting_method, scaling_method=args.scaling_method, train_params=params_to_train, optimizer_params=opt_params, scheduler_params=schd_params, load_model=(task.name == task_to_restore), phase="target_train", ) if args.do_full_eval: log.info("Evaluating...") splits_to_write = evaluate.parse_write_preds_arg(args.write_preds) results_dict = {'run_name': [args.run_name]} # Evaluate on target_tasks. for task in target_tasks: # Find the task-specific best checkpoint to evaluate on. task_params = get_model_attribute(model, "_get_task_params", cuda_device) task_to_use = task_params(task.name).get("use_classifier", task.name) ckpt_path = get_best_checkpoint_path(args, "eval", task_to_use) assert ckpt_path is not None load_model_state(model, ckpt_path, cuda_device, skip_task_models=[], strict=strict) current_tasks_val_results = evaluate_and_write( args, model, [task], splits_to_write, cuda_device) results_dict = {**results_dict, **current_tasks_val_results} tabular_results_csv = os.path.join(SMALL_SHARED_SERVER_DIR, "tabular_results.csv") existing_results_df = pd.read_csv(tabular_results_csv, index_col=False) new_results_df = pd.DataFrame.from_dict(results_dict) updated_results_df = new_results_df.append(existing_results_df, sort=False) with open(tabular_results_csv, 'w') as f: log.info(f"Prepending results to {tabular_results_csv}.") updated_results_df.to_csv(f, header=True, index=False) if args.delete_checkpoints_when_done and not args.keep_all_checkpoints: log.info("Deleting all checkpoints.") delete_all_checkpoints(args.run_dir) log.info("Done!")