def main(args): # Initialize Emmental config = parse_args_to_config(args) emmental.init(log_dir=config["meta_config"]["log_path"], config=config) # Log configuration into files cmd_msg = " ".join(sys.argv) logger.info(f"COMMAND: {cmd_msg}") write_to_file(f"{emmental.Meta.log_path}/cmd.txt", cmd_msg) logger.info(f"Config: {emmental.Meta.config}") write_to_file(f"{emmental.Meta.log_path}/config.txt", emmental.Meta.config) # Create dataloaders dataloaders = get_dataloaders(args) # Assign transforms to dataloaders aug_dataloaders = [] if args.augment_policy: for idx in range(len(dataloaders)): if dataloaders[idx].split in args.train_split: dataloaders[idx].dataset.transform_cls = Augmentation( args=args) config["learner_config"]["task_scheduler_config"][ "task_scheduler"] = AugScheduler(augment_k=args.augment_k, enlarge=args.augment_enlarge) emmental.Meta.config["learner_config"]["task_scheduler_config"][ "task_scheduler"] = config["learner_config"]["task_scheduler_config"][ "task_scheduler"] # Create tasks model = EmmentalModel(name=f"{args.task}_task") model.add_task(create_task(args)) # Set cudnn benchmark cudnn.benchmark = True # Load the best model from the pretrained model if config["model_config"]["model_path"] is not None: model.load(config["model_config"]["model_path"]) if args.train: emmental_learner = EmmentalLearner() emmental_learner.learn(model, dataloaders + aug_dataloaders) # Remove all extra augmentation policy for idx in range(len(dataloaders)): dataloaders[idx].dataset.transform_cls = None scores = model.score(dataloaders) # Save metrics and models logger.info(f"Metrics: {scores}") scores["log_path"] = emmental.Meta.log_path write_to_json_file(f"{emmental.Meta.log_path}/metrics.txt", scores) model.save(f"{emmental.Meta.log_path}/last_model.pth")
def main(args): # Initialize Emmental config = parse_args_to_config(args) emmental.init(log_dir=config["meta_config"]["log_path"], config=config) # Log configuration into files cmd_msg = " ".join(sys.argv) logger.info(f"COMMAND: {cmd_msg}") write_to_file(f"{emmental.Meta.log_path}/cmd.txt", cmd_msg) logger.info(f"Config: {emmental.Meta.config}") write_to_file(f"{emmental.Meta.log_path}/config.txt", emmental.Meta.config) # Create dataloaders dataloaders = get_dataloaders(args) config["learner_config"]["task_scheduler_config"][ "task_scheduler"] = AugScheduler(augment_k=args.augment_k, enlarge=args.augment_enlarge) emmental.Meta.config["learner_config"]["task_scheduler_config"][ "task_scheduler"] = config["learner_config"]["task_scheduler_config"][ "task_scheduler"] # Specify parameter group for Adam BERT def grouped_parameters(model): no_decay = ["bias", "LayerNorm.weight"] return [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": emmental.Meta.config["learner_config"]["optimizer_config"] ["l2"], }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0, }, ] emmental.Meta.config["learner_config"]["optimizer_config"][ "parameters"] = grouped_parameters # Create tasks model = EmmentalModel(name=f"{args.task}_task") model.add_task(create_task(args)) # Load the best model from the pretrained model if config["model_config"]["model_path"] is not None: model.load(config["model_config"]["model_path"]) if args.train: emmental_learner = EmmentalLearner() emmental_learner.learn(model, dataloaders) # Remove all extra augmentation policy for idx in range(len(dataloaders)): dataloaders[idx].dataset.transform_cls = None dataloaders[idx].dataset.k = 1 scores = model.score(dataloaders) # Save metrics and models logger.info(f"Metrics: {scores}") scores["log_path"] = emmental.Meta.log_path write_to_json_file(f"{emmental.Meta.log_path}/metrics.txt", scores) model.save(f"{emmental.Meta.log_path}/last_model.pth")
def test_model(caplog): """Unit test of model.""" caplog.set_level(logging.INFO) dirpath = "temp_test_model" Meta.reset() emmental.init(dirpath) def ce_loss(module_name, immediate_output_dict, Y, active): return F.cross_entropy(immediate_output_dict[module_name][0][active], (Y.view(-1))[active]) def output(module_name, immediate_output_dict): return F.softmax(immediate_output_dict[module_name][0], dim=1) task1 = EmmentalTask( name="task_1", module_pool=nn.ModuleDict({ "m1": nn.Linear(10, 10, bias=False), "m2": nn.Linear(10, 2, bias=False) }), task_flow=[ { "name": "m1", "module": "m1", "inputs": [("_input_", "data")] }, { "name": "m2", "module": "m2", "inputs": [("m1", 0)] }, ], loss_func=partial(ce_loss, "m2"), output_func=partial(output, "m2"), scorer=Scorer(metrics=["accuracy"]), ) new_task1 = EmmentalTask( name="task_1", module_pool=nn.ModuleDict({ "m1": nn.Linear(10, 5, bias=False), "m2": nn.Linear(5, 2, bias=False) }), task_flow=[ { "name": "m1", "module": "m1", "inputs": [("_input_", "data")] }, { "name": "m2", "module": "m2", "inputs": [("m1", 0)] }, ], loss_func=partial(ce_loss, "m2"), output_func=partial(output, "m2"), scorer=Scorer(metrics=["accuracy"]), ) task2 = EmmentalTask( name="task_2", module_pool=nn.ModuleDict({ "m1": nn.Linear(10, 5, bias=False), "m2": nn.Linear(5, 2, bias=False) }), task_flow=[ { "name": "m1", "module": "m1", "inputs": [("_input_", "data")] }, { "name": "m2", "module": "m2", "inputs": [("m1", 0)] }, ], loss_func=partial(ce_loss, "m2"), output_func=partial(output, "m2"), scorer=Scorer(metrics=["accuracy"]), ) config = {"model_config": {"dataparallel": False}} emmental.Meta.update_config(config) model = EmmentalModel(name="test", tasks=task1) assert repr(model) == "EmmentalModel(name=test)" assert model.name == "test" assert model.task_names == set(["task_1"]) assert model.module_pool["m1"].weight.data.size() == (10, 10) assert model.module_pool["m2"].weight.data.size() == (2, 10) model.update_task(new_task1) assert model.module_pool["m1"].weight.data.size() == (5, 10) assert model.module_pool["m2"].weight.data.size() == (2, 5) model.update_task(task2) assert model.task_names == set(["task_1"]) model.add_task(task2) assert model.task_names == set(["task_1", "task_2"]) model.remove_task("task_1") assert model.task_names == set(["task_2"]) model.remove_task("task_1") assert model.task_names == set(["task_2"]) model.save(f"{dirpath}/saved_model.pth") model.load(f"{dirpath}/saved_model.pth") # Test add_tasks model = EmmentalModel(name="test") model.add_tasks([task1, task2]) assert model.task_names == set(["task_1", "task_2"]) shutil.rmtree(dirpath)
if args.train: emmental_learner = EmmentalLearner() emmental_learner.learn(model, dataloaders) # Remove all extra augmentation policy for idx in range(len(dataloaders)): dataloaders[idx].dataset.transform_cls = None scores = model.score(dataloaders) # Save metrics and models logger.info(f"Metrics: {scores}") scores["log_path"] = emmental.Meta.log_path write_to_json_file(f"{emmental.Meta.log_path}/metrics.txt", scores) model.save(f"{emmental.Meta.log_path}/last_model.pth") for dataloader in dataloaders: if dataloader.split == "train": continue preds = model.predict(dataloader, return_preds=True) res = "" for uid, pred in zip(preds["uids"]["TACRED"], preds["preds"]["TACRED"]): res += f"{uid}\t{ID_TO_LABEL[pred]}\n" write_to_file( f"{emmental.Meta.log_path}/{dataloader.split}_predictions.txt", res.strip()) res = "" for uid, pred in zip(preds["uids"]["TACRED"],
def run_model(mode, config, run_config_path=None): """ Main run method for Emmental Bootleg models. Args: mode: run mode (train, eval, dump_preds, dump_embs) config: parsed model config run_config_path: original config path (for saving) Returns: """ # Set up distributed backend and save configuration files setup(config, run_config_path) # Load entity symbols log_rank_0_info(logger, f"Loading entity symbols...") entity_symbols = EntitySymbols.load_from_cache( load_dir=os.path.join(config.data_config.entity_dir, config.data_config.entity_map_dir), alias_cand_map_file=config.data_config.alias_cand_map, alias_idx_file=config.data_config.alias_idx_map, ) # Create tasks tasks = [NED_TASK] if config.data_config.type_prediction.use_type_pred is True: tasks.append(TYPE_PRED_TASK) # Create splits for data loaders data_splits = [TRAIN_SPLIT, DEV_SPLIT, TEST_SPLIT] # Slices are for eval so we only split on test/dev slice_splits = [DEV_SPLIT, TEST_SPLIT] # If doing eval, only run on test data if mode in ["eval", "dump_preds", "dump_embs"]: data_splits = [TEST_SPLIT] slice_splits = [TEST_SPLIT] # We only do dumping if weak labels is True if mode in ["dump_preds", "dump_embs"]: if config.data_config[ f"{TEST_SPLIT}_dataset"].use_weak_label is False: raise ValueError( f"When calling dump_preds or dump_embs, we require use_weak_label to be True." ) # Gets embeddings that need to be prepped during data prep or in the __get_item__ method batch_on_the_fly_kg_adj = get_dataloader_embeddings(config, entity_symbols) # Gets dataloaders dataloaders = get_dataloaders( config, tasks, data_splits, entity_symbols, batch_on_the_fly_kg_adj, ) slice_datasets = get_slicedatasets(config, slice_splits, entity_symbols) configure_optimizer(config) # Create models and add tasks if config.model_config.attn_class == "BERTNED": log_rank_0_info(logger, f"Starting NED-Base Model") assert (config.data_config.type_prediction.use_type_pred is False), f"NED-Base does not support type prediction" assert ( config.data_config.word_embedding.use_sent_proj is False ), f"NED-Base requires word_embeddings.use_sent_proj to be False" model = EmmentalModel(name="NED-Base") model.add_tasks( ned_task.create_task(config, entity_symbols, slice_datasets)) else: log_rank_0_info(logger, f"Starting Bootleg Model") model = EmmentalModel(name="Bootleg") # TODO: make this more general for other tasks -- iterate through list of tasks # and add task for each model.add_task( ned_task.create_task(config, entity_symbols, slice_datasets)) if TYPE_PRED_TASK in tasks: model.add_task( type_pred_task.create_task(config, entity_symbols, slice_datasets)) # Add the mention type embedding to the embedding payload type_pred_task.update_ned_task(model) # Print param counts if mode == "train": log_rank_0_debug(logger, "PARAMS WITH GRAD\n" + "=" * 30) total_params = count_parameters(model, requires_grad=True, logger=logger) log_rank_0_info(logger, f"===> Total Params With Grad: {total_params}") log_rank_0_debug(logger, "PARAMS WITHOUT GRAD\n" + "=" * 30) total_params = count_parameters(model, requires_grad=False, logger=logger) log_rank_0_info(logger, f"===> Total Params Without Grad: {total_params}") # Load the best model from the pretrained model if config["model_config"]["model_path"] is not None: model.load(config["model_config"]["model_path"]) # Barrier if config["learner_config"]["local_rank"] == 0: torch.distributed.barrier() # Train model if mode == "train": emmental_learner = EmmentalLearner() emmental_learner._set_optimizer(model) emmental_learner.learn(model, dataloaders) if config.learner_config.local_rank in [0, -1]: model.save(f"{emmental.Meta.log_path}/last_model.pth") # Multi-gpu DataParallel eval (NOT distributed) if mode in ["eval", "dump_embs", "dump_preds"]: # This happens inside EmmentalLearner for training if (config["learner_config"]["local_rank"] == -1 and config["model_config"]["dataparallel"]): model._to_dataparallel() # If just finished training a model or in eval mode, run eval if mode in ["train", "eval"]: scores = model.score(dataloaders) # Save metrics and models log_rank_0_info(logger, f"Saving metrics to {emmental.Meta.log_path}") log_rank_0_info(logger, f"Metrics: {scores}") scores["log_path"] = emmental.Meta.log_path if config.learner_config.local_rank in [0, -1]: write_to_file(f"{emmental.Meta.log_path}/{mode}_metrics.txt", scores) eval_utils.write_disambig_metrics_to_csv( f"{emmental.Meta.log_path}/{mode}_disambig_metrics.csv", scores) return scores # If you want detailed dumps, save model outputs assert mode in [ "dump_preds", "dump_embs", ], 'Mode must be "dump_preds" or "dump_embs"' dump_embs = False if mode != "dump_embs" else True assert ( len(dataloaders) == 1 ), f"We should only have length 1 dataloaders for dump_embs and dump_preds!" final_result_file, final_out_emb_file = None, None if config.learner_config.local_rank in [0, -1]: # Setup files/folders filename = os.path.basename(dataloaders[0].dataset.raw_filename) log_rank_0_debug( logger, f"Collecting sentence to mention map {os.path.join(config.data_config.data_dir, filename)}", ) sentidx2num_mentions, sent_idx2row = eval_utils.get_sent_idx2num_mens( os.path.join(config.data_config.data_dir, filename)) log_rank_0_debug(logger, f"Done collecting sentence to mention map") eval_folder = eval_utils.get_eval_folder(filename) subeval_folder = os.path.join(eval_folder, "batch_results") utils.ensure_dir(subeval_folder) # Will keep track of sentences dumped already. These will only be ones with mentions all_dumped_sentences = set() number_dumped_batches = 0 total_mentions_seen = 0 all_result_files = [] all_out_emb_files = [] # Iterating over batches of predictions for res_i, res_dict in enumerate( eval_utils.batched_pred_iter( model, dataloaders[0], config.run_config.eval_accumulation_steps, sentidx2num_mentions, )): ( result_file, out_emb_file, final_sent_idxs, mentions_seen, ) = eval_utils.disambig_dump_preds( res_i, total_mentions_seen, config, res_dict, sentidx2num_mentions, sent_idx2row, subeval_folder, entity_symbols, dump_embs, NED_TASK, ) all_dumped_sentences.update(final_sent_idxs) all_result_files.append(result_file) all_out_emb_files.append(out_emb_file) total_mentions_seen += mentions_seen number_dumped_batches += 1 # Dump the sentences that had no mentions and were not already dumped # Assert all remaining sentences have no mentions assert all( v == 0 for k, v in sentidx2num_mentions.items() if k not in all_dumped_sentences ), (f"Sentences with mentions were not dumped: " f"{[k for k, v in sentidx2num_mentions.items() if k not in all_dumped_sentences]}" ) empty_sentidx2row = { k: v for k, v in sent_idx2row.items() if k not in all_dumped_sentences } empty_resultfile = eval_utils.get_result_file(number_dumped_batches, subeval_folder) all_result_files.append(empty_resultfile) # Dump the outputs eval_utils.write_data_labels_single( sentidx2row=empty_sentidx2row, output_file=empty_resultfile, filt_emb_data=None, sental2embid={}, alias_cand_map=entity_symbols.get_alias2qids(), qid2eid=entity_symbols.get_qid2eid(), result_alias_offset=total_mentions_seen, train_in_cands=config.data_config.train_in_candidates, max_cands=entity_symbols.max_candidates, dump_embs=dump_embs, ) log_rank_0_info( logger, f"Finished dumping. Merging results across accumulation steps.") # Final result files for labels and embeddings final_result_file = os.path.join(eval_folder, config.run_config.result_label_file) # Copy labels output = open(final_result_file, "wb") for file in all_result_files: shutil.copyfileobj(open(file, "rb"), output) output.close() log_rank_0_info(logger, f"Bootleg labels saved at {final_result_file}") # Try to copy embeddings if dump_embs: final_out_emb_file = os.path.join( eval_folder, config.run_config.result_emb_file) log_rank_0_info( logger, f"Trying to merge numpy embedding arrays. " f"If your machine is limited in memory, this may cause OOM errors. " f"Is that happens, result files should be saved in {subeval_folder}.", ) all_arrays = [] for i, npfile in enumerate(all_out_emb_files): all_arrays.append(np.load(npfile)) np.save(final_out_emb_file, np.concatenate(all_arrays)) log_rank_0_info( logger, f"Bootleg embeddings saved at {final_out_emb_file}") # Cleanup try_rmtree(subeval_folder) return final_result_file, final_out_emb_file
def checkpoint( self, iteration: Union[float, int], model: EmmentalModel, optimizer: Optimizer, lr_scheduler: _LRScheduler, metric_dict: Dict[str, float], ) -> None: """Checkpointing the checkpoint. Args: iteration: The current iteration. model: The model to checkpoint. optimizer: The optimizer used during training process. lr_scheduler: Learning rate scheduler. metric_dict: The metric dict. """ # Check the checkpoint_runway condition is met if iteration < self.checkpoint_runway: return elif not self.checkpoint_condition_met and iteration >= self.checkpoint_runway: self.checkpoint_condition_met = True logger.info( "checkpoint_runway condition has been met. Start checkpoining." ) # Save model state model_path = f"{self.checkpoint_path}/checkpoint_{iteration}.model.pth" model.save(model_path, verbose=False) logger.info(f"Save checkpoint of {iteration} {self.checkpoint_unit} " f"at {model_path}.") # Save optimizer state optimizer_path = f"{self.checkpoint_path}/checkpoint_{iteration}.optimizer.pth" optimizer_dict = { "optimizer": optimizer.state_dict(), } torch.save(optimizer_dict, optimizer_path) # Save lr_scheduler state scheduler_path = f"{self.checkpoint_path}/checkpoint_{iteration}.scheduler.pth" scheduler_dict = { "lr_scheduler": lr_scheduler.state_dict() if lr_scheduler else None } torch.save(scheduler_dict, scheduler_path) if self.checkpoint_all is False: for path in self.checkpoint_paths: if os.path.exists(path): os.remove(path) self.checkpoint_paths.extend( [model_path, optimizer_path, scheduler_path]) if not set(self.checkpoint_all_metrics.keys()).isdisjoint( set(metric_dict.keys())): new_best_metrics = self.is_new_best(metric_dict) for metric in new_best_metrics: best_metric_model_path = ( f"{self.checkpoint_path}/best_model_" f"{metric.replace('/', '_')}.model.pth") copyfile( model_path, best_metric_model_path, ) logger.info( f"Save best model of metric {metric} to {best_metric_model_path}" ) best_metric_optimizer_path = ( f"{self.checkpoint_path}/best_model_" f"{metric.replace('/', '_')}.optimizer.pth") copyfile(optimizer_path, best_metric_optimizer_path) best_metric_scheduler_path = ( f"{self.checkpoint_path}/best_model_" f"{metric.replace('/', '_')}.scheduler.pth") copyfile(scheduler_path, best_metric_scheduler_path)