def main(): reader = LinzenDatasetReader(append_null=False) vocab = Vocabulary.from_files("saved_models/vocabulary") stack = StackRNNAgreementPredictor(vocab, rnn_dim=100, rnn_cell_type=torch.nn.GRUCell) stack.load_state_dict(torch.load("saved_models/stack-linzen.th")) lstm = SimpleRNNAgreementPredictor(vocab, rnn_dim=18, rnn_type=torch.nn.GRU) lstm.load_state_dict(torch.load("saved_models/lstm-linzen.th")) iterator = BucketIterator(batch_size=32, sorting_keys=[("sentence", "num_tokens")]) iterator.index_with(vocab) dataset = reader.read("StackNN/data/linzen/rnn_agr_simple/numpred.test") stack_metrics = evaluate(stack, dataset, iterator, -1, "") lstm_metrics = evaluate(stack, dataset, iterator, -1, "") print(stack_metrics) print(lstm_metrics) for i in range(6): dataset = reader.read( "StackNN/data/linzen/rnn_agr_simple/numpred.test." + str(i)) stack_metrics = evaluate(stack, dataset, iterator, -1, "") lstm_metrics = evaluate(lstm, dataset, iterator, -1, "") print(stack_metrics) print(lstm_metrics)
def evaluate_all_tasks(task, evaluate_tasks, dev_data, vocabulary, model, args, save_weight, temps): devicea = -1 if torch.cuda.is_available(): devicea = 0 majority = { 'subjectivity': 0.5, 'sst': 0.2534059946, 'trec': 0.188, 'cola': 0, 'ag': 0.25, 'sst_2c': 0.51 } sota = { 'subjectivity': 0.955, 'sst': 0.547, 'trec': 0.9807, 'cola': 0.341, 'ag': 0.955, 'sst_2c': 0.968 } overall_metric = {} standard_metric = {} for j in evaluate_tasks: model.set_task(j, tmp=temps[j]) print("\nEvaluating ", j) sys.stdout.flush() iterator1 = BucketIterator(batch_size=args.bs, sorting_keys=[("tokens", "num_tokens")]) iterator1.index_with(vocabulary[j]) metric = evaluate(model=model, instances=dev_data[j], data_iterator=iterator1, cuda_device=devicea, batch_weight_key=None) # Take first 500 instances for evaluating activations. if not args.no_save_weight: iterator1 = BucketIterator(batch_size=500, sorting_keys=[("tokens", "num_tokens")]) iterator1.index_with(vocabulary[j]) evaluate(model=model, instances=dev_data[j][:500], data_iterator=iterator1, cuda_device=devicea, batch_weight_key=None) save_weight.add_activations(model, task, j) if j == 'cola': metric['metric'] = metric['average'] else: metric['metric'] = metric['accuracy'] smetric = (float(metric['metric']) - majority[j]) / (sota[j] - majority[j]) overall_metric[j] = metric standard_metric[j] = smetric return overall_metric, standard_metric
def finish(self, metrics: Dict[str, Any]) -> None: # import wandb here to be sure that it was initialized # before this line was executed import wandb # noqa if self.evaluation_data_loader is not None and self.evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = training_util.evaluate( self.model, self.evaluation_data_loader, # type:ignore cuda_device=self.trainer.cuda_device, # type: ignore batch_weight_key=self.batch_weight_key, ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif self.evaluation_data_loader is not None: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command." ) common_util.dump_metrics( os.path.join(self.serialization_dir, "metrics.json"), metrics, log=True, ) # update the summary with all metrics wandb.run.summary.update(metrics)
def run_training_loop(): dataset_reader = build_dataset_reader() # These are a subclass of pytorch Datasets, with some allennlp-specific # functionality added. train_data, dev_data, test_data = read_data(dataset_reader) vocab = build_vocab(train_data + dev_data) model = build_model(vocab) # This is the allennlp-specific functionality in the Dataset object; # we need to be able convert strings in the data to integers, and this # is how we do it. train_data.index_with(vocab) dev_data.index_with(vocab) test_data.index_with(vocab) # These are again a subclass of pytorch DataLoaders, with an # allennlp-specific collate function, that runs our indexing and # batching code. train_loader, dev_loader, test_loader = build_data_loaders( train_data, dev_data, test_data) trainer = build_trainer(model, "", train_loader, dev_loader) print("Starting training") trainer.train() print("Finished training") results = evaluate(model, test_loader) print(results)
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger("allennlp.common.params").disabled = True logging.getLogger("allennlp.nn.initializers").disabled = True logging.getLogger("allennlp.modules.token_embedders.embedding").setLevel( logging.INFO) # Load from archive archive = load_archive( args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, ) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop("validation_dataset_reader", None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params( config.pop("dataset_reader")) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(instances=instances) model.extend_embedder_vocab(embedding_sources) instances.index_with(model.vocab) data_loader_params = config.pop("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.pop("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) metrics = evaluate(model, data_loader, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") dump_metrics(args.output_file, metrics, log=True) return metrics
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: common_logging.FILE_FRIENDLY_LOGGING = args.file_friendly_logging # Disable some of the more verbose logging statements logging.getLogger("allennlp.common.params").disabled = True logging.getLogger("allennlp.nn.initializers").disabled = True logging.getLogger("allennlp.modules.token_embedders.embedding").setLevel( logging.INFO) # Load from archive archive = load_archive( args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, ) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data dataset_reader = archive.validation_dataset_reader evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) data_loader_params = config.pop("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.pop("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size data_loader = DataLoader.from_params(params=data_loader_params, reader=dataset_reader, data_path=evaluation_data_path) embedding_sources = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances( instances=data_loader.iter_instances()) model.extend_embedder_vocab(embedding_sources) data_loader.index_with(model.vocab) metrics = evaluate( model, data_loader, args.cuda_device, args.batch_weight_key, output_file=args.output_file, predictions_output_file=args.predictions_output_file, ) logger.info("Finished evaluating.") return metrics
def train_model(parameters, name): token_indexer = { "tokens": ELMoTokenCharactersIndexer() } if parameters['use_elmo'] else None reader = SSJ500KReader( token_indexer) if parameters["dataset"] == "ssj" else SentiCorefReader( token_indexer) train_dataset = reader.read("train") validation_dataset = reader.read("test") vocab = Vocabulary.from_instances(train_dataset + validation_dataset) # vocab = Vocabulary() if parameters['use_elmo'] else Vocabulary.from_instances(train_dataset + validation_dataset) model = get_model(vocab, parameters) if torch.cuda.is_available(): cuda_device = 0 model = model.cuda(cuda_device) else: cuda_device = -1 optimizer = optim.Adam(model.parameters(), lr=parameters['lr'], weight_decay=parameters['weight_decay']) iterator = BucketIterator(batch_size=parameters['batch_size'], sorting_keys=[("sentence", "num_tokens")]) iterator.index_with(vocab) trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_dataset, validation_dataset=validation_dataset, patience=parameters['patience'], num_epochs=parameters['num_epochs'], cuda_device=cuda_device) trainer.train() metrics = evaluate(model, validation_dataset, iterator, cuda_device, None) save_model_and_vocab(model, vocab, metrics, parameters, fname=name)
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel( logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params( validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params( config.pop('dataset_reader')) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(Params({}), instances=instances) model.extend_embedder_vocab(embedding_sources) iterator_params = config.pop("validation_iterator", None) if iterator_params is None: iterator_params = config.pop("iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) output_file = args.output_file if output_file: with open(output_file, "w") as file: json.dump(metrics, file, indent=4) return metrics
def main(args): params = Params.from_file(args.config_path) stdout_handler = prepare_global_logging(args.output_dir, False) prepare_environment(params) reader = DatasetReader.from_params(params["dataset_reader"]) train_dataset = reader.read(params.pop("train_data_path", None)) valid_dataset = reader.read(params.pop("validation_data_path", None)) test_data_path = params.pop("test_data_path", None) if test_data_path: test_dataset = reader.read(test_data_path) vocab = Vocabulary.from_instances(train_dataset + valid_dataset + test_dataset) else: test_dataset = None vocab = Vocabulary.from_instances(train_dataset + valid_dataset) model_params = params.pop("model", None) model = Model.from_params(model_params.duplicate(), vocab=vocab) vocab.save_to_files(os.path.join(args.output_dir, "vocabulary")) # copy config file with open(args.config_path, "r", encoding="utf-8") as f_in: with open(os.path.join(args.output_dir, "config.json"), "w", encoding="utf-8") as f_out: f_out.write(f_in.read()) iterator = DataIterator.from_params(params.pop("iterator", None)) iterator.index_with(vocab) trainer_params = params.pop("trainer", None) trainer = Trainer.from_params(model=model, serialization_dir=args.output_dir, iterator=iterator, train_data=train_dataset, validation_data=valid_dataset, params=trainer_params.duplicate()) trainer.train() # evaluate on the test set if test_dataset: logging.info("Evaluating on the test set") import torch # import here to ensure the republication of the experiment model.load_state_dict( torch.load(os.path.join(args.output_dir, "best.th"))) test_metrics = evaluate(model, test_dataset, iterator, cuda_device=trainer_params.pop( "cuda_device", 0), batch_weight_key=None) logging.info(f"Metrics on the test set: {test_metrics}") with open(os.path.join(args.output_dir, "test_metrics.txt"), "w", encoding="utf-8") as f_out: f_out.write(f"Metrics on the test set: {test_metrics}") cleanup_global_logging(stdout_handler)
def evaluate(self): if not self.training: final_metrics = evaluate(self.model, self.test_dataset, self.iterator, self.cuda_device, batch_weight_key=None) return final_metrics else: logger.warning('Mode is in training state!')
def evaluate_from_args(args: argparse.Namespace): # Disable some of the more verbose logging statements logging.getLogger('allennlp.common.params').disabled = True logging.getLogger('allennlp.nn.initializers').disabled = True logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO) # Load from archive archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop('validation_dataset_reader', None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop('dataset_reader')) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {}) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(Params({}), instances=instances) model.extend_embedder_vocab(embedding_sources) iterator_params = config.pop("validation_iterator", None) if iterator_params is None: iterator_params = config.pop("iterator") iterator = DataIterator.from_params(iterator_params) iterator.index_with(model.vocab) csv_writer = csv.writer(args.output_file) keys = None for instance in instances: metrics = evaluate(model, [instance], iterator, args.cuda_device, args.batch_weight_key) if keys is None: keys = sorted(metrics.keys()) csv_writer.writerow(['instance_id', *keys]) instance_id = instance.fields['metadata']['id'] values = [metrics[key] for key in keys] csv_writer.writerow([instance_id, *values])
def evaluate_get_dataset(model, task, vocab, dataset, num_samples, task_id): devicea = -1 if torch.cuda.is_available(): devicea = 0 iterator1 = BucketIterator(batch_size=500, sorting_keys=[("tokens", "num_tokens")]) iterator1.index_with(vocab) model.set_task(task) evaluate(model=model, instances=dataset[:num_samples], data_iterator=iterator1, cuda_device=devicea, batch_weight_key=None) train_act, _ = model.get_activations() if type(train_act) == list: # Hack for CNN need to do better train_act = train_act[-1] train_act = train_act.reshape(train_act.size(0), -1) train_act = train_act[:, :128] train_lab = torch.LongTensor(train_act.size(0)).fill_(task_id) return move_to_device(train_act, devicea) , move_to_device(train_lab, devicea)
def __call__(self, trainer: GradientDescentTrainer, metrics: Dict[str, Any], epoch: int) -> None: if epoch<0: return e_metrics = {} test_metrics = evaluate(model=trainer.model, data_loader=self._test_data_loader, cuda_device=trainer.cuda_device, batch_weight_key="") for key, value in test_metrics.items(): e_metrics["test_" + key] = value test_metrics = evaluate(model=trainer.model, data_loader=self._fold_data_loader, cuda_device=trainer.cuda_device, batch_weight_key="") for key, value in test_metrics.items(): e_metrics["fold_" + key] = value self._global_metrics["fold-{}".format(self._fold)]["epoch-{}".format(epoch)] = e_metrics
def run_testing(data_reader: DatasetReader, data_path: Path, model: Model) -> Model: print("Running over test set.") test_loader = build_data_loader(data_reader=data_reader, data_path=data_path, batch_size=8, shuffle=False) model.vocab.extend_from_instances(test_loader.iter_instances()) test_loader.index_with(model.vocab) results = evaluate(model, test_loader, cuda_device=0) print(f"Test results: {results}.") # log.info(results) return model
def eval_model(model_path, data_path, device, batch=32): model, dataset_reader = load_model(model_path=model_path, device=device) test_data = dataset_reader.read(data_path) iterator = BucketIterator(sorting_keys=[('text', 'num_tokens')], batch_size=batch, padding_noise=0) iterator.index_with(model.vocab) model.eval() eval_result = evaluate(model=model, instances=test_data, data_iterator=iterator, cuda_device=device, batch_weight_key="") print(eval_result)
def finish(self, metrics: Dict[str, Any]) -> None: # import wandb here to be sure that it was initialized # before this line was executed import wandb # noqa if self.evaluation_data_loader is not None and self.evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = training_util.evaluate( self.model, self.evaluation_data_loader, # type:ignore cuda_device=self.trainer.cuda_device, # type: ignore batch_weight_key=self.batch_weight_key, ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif self.evaluation_data_loader is not None: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command." ) common_util.dump_metrics( os.path.join(self.serialization_dir, "metrics.json"), metrics, log=True, ) # update the summary with all metrics if wandb.run is None: logger.info("wandb run was closed. Resuming to update summary.") run = wandb.init( id=read_from_env("WANDB_RUN_ID"), project=read_from_env("WANDB_PROJECT"), entity=read_from_env("WANDB_ENTITY"), resume="must", ) else: logger.info( "There is an active wandb run. Using that to update summary.") run = wandb.run if run is not None: logger.info("Updating summary on wandb.") run.summary.update(metrics)
def evaluate_dataset(instances, name, model, iterator, device, archive_path): logger.info(f'Evaluating {name} set.') metrics = evaluate(model, instances, iterator, device, batch_weight_key='sample_size') logger.info("Metrics:") for key, metric in metrics.items(): logger.info("%s: %s", key, metric) output_dir = os.path.dirname(archive_path) output_file = os.path.join(output_dir, f'{name}-metrics.json') if output_file: with open(output_file, "w") as file: json.dump(metrics, file, indent=4) return metrics
def main(): params = Params() config = params.opts dsr = LivedoorCorpusReader(config=config) # Loading Datasets train, dev, test = dsr._read('train'), dsr._read('dev'), dsr._read('test') train_and_dev = train + dev vocab = build_vocab(train_and_dev) num_label = len(dsr.class2id) train_loader, dev_loader, test_loader = build_data_loaders(config, train, dev, test) train_loader.index_with(vocab) dev_loader.index_with(vocab) _, __, embedder = emb_returner(config=config) mention_encoder = Pooler_for_mention(config, embedder) model = TitleAndCaptionClassifier(config, mention_encoder, num_label, vocab) trainer = build_trainer(config, model, train_loader, dev_loader) trainer.train() # Evaluation model.eval() test_loader.index_with(model.vocab) eval_result = evaluate(model=model, data_loader=test_loader, cuda_device=0, batch_weight_key="") print(eval_result) # Dump train and dev document to article embeddings embedding_encoder = EmbeddingEncoder(model, dsr) emb_dumper = ArticleKB(model=model, dsr=dsr, config=config) mention_idx2emb = emb_dumper.mention_idx2emb # load kb article_kb_class = ArticleTitleIndexerWithFaiss( config=config, mention_idx2emb=mention_idx2emb, dsr=dsr, kbemb_dim=768 ) top_titles = article_kb_class.search_with_emb( emb=emb_dumper.predictor.predict('iPhoneとパソコン')['encoded_embeddings']) print(top_titles) return article_kb_class, emb_dumper
def finish(self, metrics: Dict[str, Any]): if self.evaluation_data_loader is not None and self.evaluate_on_test: logger.info("The model will be evaluated using the best epoch weights.") test_metrics = training_util.evaluate( self.model, self.evaluation_data_loader, cuda_device=self.trainer.cuda_device, batch_weight_key=self.batch_weight_key, ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif self.evaluation_data_loader is not None: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command." ) common_util.dump_metrics( os.path.join(self.serialization_dir, "metrics.json"), metrics, log=True )
def benchmark_xlmr_mdl(): from allennlp.data import DataLoader from allennlp.training.util import evaluate xlmr = load_xlmr_coref_model() instances = xlmr.dataset_reader.load_dataset(testset) data_loader = SimpleDataLoader(instances, 1) data_loader.index_with(xlmr.model.vocab) start = time.time() metrics = evaluate(xlmr.model, data_loader) print('**XLM-R model**') print_speed_performance(start, num_sentences, num_tokens) print('Precision : ', metrics['coref_precision']) print('Recall : ', metrics['coref_recall']) print('F1 : ', metrics['coref_f1']) print('Mention Recall : ', metrics['mention_recall'])
def test_evaluation(self) -> Dict[str, Any]: """ Evaluates the model against the test dataset (if defined) Returns ------- Test metrics information """ test_data = self._test if not test_data: return {} self.__LOGGER.info( "The model will be evaluated using the best epoch weights.") return evaluate( self._pipeline._model, data_loader=DataLoader(test_data, batch_size=self._trainer_config.batch_size), cuda_device=self._trainer.cuda_device, batch_weight_key=self._batch_weight_key, )
def main(device, base_serialization_dir): storage = "sqlite:///" + os.path.join(base_serialization_dir, "optuna.db") study = load_study("optuna_allennlp", storage) best_trial = study.best_trial print(f"best_trial: {best_trial.number}") reader = TextClassificationJsonReader( token_indexers={"tokens": SingleIdTokenIndexer()}, tokenizer=WhitespaceTokenizer(), ) serialization_dir = os.path.join(base_serialization_dir, f"trial_{best_trial.number}") vocab = Vocabulary.from_files(os.path.join(serialization_dir, "vocabulary")) data = reader.read("https://s3-us-west-2.amazonaws.com/allennlp/datasets/imdb/test.jsonl") data.index_with(vocab) hyperparams = best_trial.params hyperparams.pop("lr") model = create_model(vocab=vocab, **hyperparams) model.load_state_dict(torch.load(os.path.join(serialization_dir, "best.th"))) if device >= 0: model.to(device) data_loader = DataLoader(data, batch_size=64, collate_fn=allennlp_collate) print(evaluate(model, data_loader, cuda_device=device))
def train_model(params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, force: bool = False, cache_directory: str = None, cache_prefix: str = None) -> Model: """ Trains the model specified in the given :class:`Params` object, using the data and training parameters also specified in that object, and saves the results in ``serialization_dir``. Parameters ---------- params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. force : ``bool``, optional (default=False) If ``True``, we will overwrite the serialization directory if it already exists. cache_directory : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. cache_prefix : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. Returns ------- best_model: ``Model`` The model with the best epoch weights. """ create_serialization_dir(params, serialization_dir, recover, force) stdout_handler = prepare_global_logging(serialization_dir, file_friendly_logging) prepare_environment(params) cuda_device = params.params.get('trainer').get('cuda_device', -1) check_for_gpu(cuda_device) params.to_file(os.path.join(serialization_dir, CONFIG_NAME)) evaluate_on_test = params.pop_bool("evaluate_on_test", False) trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == "default": # Special logic to instantiate backward-compatible trainer. pieces = TrainerPieces.from_params( params, # pylint: disable=no-member serialization_dir, recover, cache_directory, cache_prefix) trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.test_dataset else: if evaluate_on_test: raise ValueError( "--evaluate-on-test only works with the default Trainer. " "If you're using the CallbackTrainer you can use a callback " "to evaluate at Events.TRAINING_END; otherwise you'll have " "to run allennlp evaluate separately.") trainer = TrainerBase.from_params(params, serialization_dir, recover, cache_directory, cache_prefix) evaluation_dataset = None params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Evaluate if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, # TODO(brendanr): Pass in an arg following Joel's trainer refactor. batch_weight_key="") for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") cleanup_global_logging(stdout_handler) # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) # We count on the trainer to have the model with best weights return trainer.model
def main(): logger.setLevel(logging.CRITICAL) args = lambda x: None args.batch_size = 1024 args.run_name = "31" args.train_data = "/scratch/gobi1/johnchen/new_git_stuff/multimodal_fairness/data/decompensation/train/listfile.csv" args.dev_data = "/scratch/gobi1/johnchen/new_git_stuff/multimodal_fairness/data/decompensation/test/listfile.csv" import time start_time = time.time() # mr = MortalityReader() # instances = mr.read("/scratch/gobi1/johnchen/new_git_stuff/multimodal_fairness/data/in-hospital-mortality/train/listfile.csv") # for inst in instances[:10]: # print(inst) print("we are running with the following info") print("Torch version {} Cuda version {} cuda available? {}".format( torch.__version__, torch.version.cuda, torch.cuda.is_available())) # We've copied the training loop from an earlier example, with updated model # code, above in the Setup section. We run the training loop to get a trained # model. dataset_reader = build_dataset_reader(limit_examples=2500) dataset_reader.get_label_stats(args.train_data) for key in sorted(dataset_reader.stats.keys()): print("{} {}".format(key, dataset_reader.stats[key])) dataset_reader.get_label_stats(args.dev_data) for key in sorted(dataset_reader.stats.keys()): print("{} {}".format(key, dataset_reader.stats[key])) # These are a subclass of pytorch Datasets, with some allennlp-specific # functionality added. train_data, dev_data = read_data(dataset_reader, args.train_data, args.dev_data) vocab = build_vocab(train_data + dev_data) # make sure to index the vocab before adding it train_data.index_with(vocab) dev_data.index_with(vocab) train_dataloader, dev_dataloader = build_data_loaders(train_data, dev_data) # del train_data # del dev_data # throw in all the regularizers to the regularizer applicators model = build_model(vocab, use_reg=False) model = run_training_loop_over_dataloaders(model, train_dataloader, dev_dataloader, args, use_gpu=True, batch_size=args.batch_size) logger.warning("We have finished training") results = evaluate(model, dev_dataloader, 0, None) print("we succ fulfilled it") with open(f"nice_srun_time_{args.run_name}.txt", "w") as file: file.write("it is done\n{}\nTook {}".format(results, time.time() - start_time)) pass
def fine_tune_model(model: Model, params: Params, serialization_dir: str, extend_vocab: bool = False, file_friendly_logging: bool = False, batch_weight_key: str = "") -> Model: """ Fine tunes the given model, using a set of parameters that is largely identical to those used for :func:`~allennlp.commands.train.train_model`, except that the ``model`` section is ignored, if it is present (as we are already given a ``Model`` here). The main difference between the logic done here and the logic done in ``train_model`` is that here we do not worry about vocabulary construction or creating the model object. Everything else is the same. Parameters ---------- archive : ``Archive`` A saved model archive that is the result of running the ``train`` command. train_data_path : ``str`` Path to the training data to use for fine-tuning. serialization_dir : ``str`` The directory in which to save results and logs. validation_data_path : ``str``, optional Path to the validation data to use while fine-tuning. extend_vocab: ``bool``, optional (default=False) If ``True``, we use the new instances to extend your vocabulary. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. """ prepare_environment(params) if os.path.exists(serialization_dir) and os.listdir(serialization_dir): raise ConfigurationError( f"Serialization directory ({serialization_dir}) " f"already exists and is not empty.") os.makedirs(serialization_dir, exist_ok=True) prepare_global_logging(serialization_dir, file_friendly_logging) serialization_params = deepcopy(params).as_dict(quiet=True) with open(os.path.join(serialization_dir, CONFIG_NAME), "w") as param_file: json.dump(serialization_params, param_file, indent=4) if params.pop('model', None): logger.warning( "You passed parameters for the model in your configuration file, but we " "are ignoring them, using instead the model parameters in the archive." ) vocabulary_params = params.pop('vocabulary', {}) if vocabulary_params.get('directory_path', None): logger.warning( "You passed `directory_path` in parameters for the vocabulary in " "your configuration file, but it will be ignored. ") all_datasets = datasets_from_params(params) vocab = model.vocab if extend_vocab: datasets_for_vocab_creation = set( params.pop("datasets_for_vocab_creation", all_datasets)) for dataset in datasets_for_vocab_creation: if dataset not in all_datasets: raise ConfigurationError( f"invalid 'dataset_for_vocab_creation' {dataset}") logger.info("Extending model vocabulary using %s data.", ", ".join(datasets_for_vocab_creation)) vocab.extend_from_instances( vocabulary_params, (instance for key, dataset in all_datasets.items() for instance in dataset if key in datasets_for_vocab_creation)) vocab.save_to_files(os.path.join(serialization_dir, "vocabulary")) iterator = DataIterator.from_params(params.pop("iterator")) iterator.index_with(model.vocab) validation_iterator_params = params.pop("validation_iterator", None) if validation_iterator_params: validation_iterator = DataIterator.from_params( validation_iterator_params) validation_iterator.index_with(vocab) else: validation_iterator = None train_data = all_datasets['train'] validation_data = all_datasets.get('validation') test_data = all_datasets.get('test') trainer_params = params.pop("trainer") no_grad_regexes = trainer_params.pop("no_grad", ()) for name, parameter in model.named_parameters(): if any(re.search(regex, name) for regex in no_grad_regexes): parameter.requires_grad_(False) frozen_parameter_names, tunable_parameter_names = \ get_frozen_and_tunable_parameter_names(model) logger.info("Following parameters are Frozen (without gradient):") for name in frozen_parameter_names: logger.info(name) logger.info("Following parameters are Tunable (with gradient):") for name in tunable_parameter_names: logger.info(name) trainer_type = trainer_params.pop("type", "default") if trainer_type == "default": trainer = Trainer.from_params(model=model, serialization_dir=serialization_dir, iterator=iterator, train_data=train_data, validation_data=validation_data, params=trainer_params, validation_iterator=validation_iterator) else: raise ConfigurationError( "currently fine-tune only works with the default Trainer") evaluate_on_test = params.pop_bool("evaluate_on_test", False) params.assert_empty('base train command') try: metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if os.path.exists(os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Fine-tuning interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise # Evaluate if test_data and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( model, test_data, validation_iterator or iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, batch_weight_key=batch_weight_key) for key, value in test_metrics.items(): metrics["test_" + key] = value elif test_data: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command.") # Now tar up results archive_model(serialization_dir, files_to_archive=params.files_to_archive) metrics_json = json.dumps(metrics, indent=2) with open(os.path.join(serialization_dir, "metrics.json"), "w") as metrics_file: metrics_file.write(metrics_json) logger.info("Metrics: %s", metrics_json) return model
def main(): parser = argparse.ArgumentParser(description='Evidence Inference experiments') parser.add_argument('--cuda_device', type=int, default=0, help='GPU number (default: 0)') parser.add_argument('--epochs', type=int, default=2, help='upper epoch limit (default: 2)') parser.add_argument('--patience', type=int, default=1, help='trainer patience (default: 1)') parser.add_argument('--batch_size', type=int, default=32, help='batch size (default: 32)') parser.add_argument('--dropout', type=float, default=0.2, help='dropout for the model (default: 0.2)') parser.add_argument('--model_name', type=str, default='baseline', help='model name (default: baseline)') parser.add_argument('--tunable', action='store_true', help='tune the underlying embedding model (default: False)') args = parser.parse_args() annotations = pd.read_csv('data/data/annotations_merged.csv') prompts = pd.read_csv('data/data/prompts_merged.csv') feature_dictionary = {} prompts_dictionary = {} for index, row in prompts.iterrows(): prompts_dictionary[row['PromptID']] = [row['Outcome'], row['Intervention'], row['Comparator']] for index, row in annotations.iterrows(): if row['PMCID'] not in feature_dictionary: feature_dictionary[row['PMCID']] = [] feature_dictionary[row['PMCID']].append([row['Annotations'], row['Label']] + prompts_dictionary[row['PromptID']]) train = [] valid = [] test = [] with open('data/splits/train_article_ids.txt') as train_file: for line in train_file: train.append(int(line.strip())) with open('data/splits/validation_article_ids.txt') as valid_file: for line in valid_file: valid.append(int(line.strip())) with open('data/splits/test_article_ids.txt') as test_file: for line in test_file: test.append(int(line.strip())) bert_token_indexer = {'bert': PretrainedBertIndexer('scibert/vocab.txt', max_pieces=512)} reader = EIDatasetReader(bert_token_indexer, feature_dictionary) train_data = reader.read(train) valid_data = reader.read(valid) test_data = reader.read(test) vocab = Vocabulary.from_instances(train_data + valid_data + test_data) bert_token_embedding = PretrainedBertEmbedder( 'scibert/weights.tar.gz', requires_grad=args.tunable ) word_embeddings = BasicTextFieldEmbedder( {"bert": bert_token_embedding}, {"bert": ['bert']}, allow_unmatched_keys=True ) model = Baseline(word_embeddings, vocab) cuda_device = args.cuda_device if torch.cuda.is_available(): model = model.cuda(cuda_device) else: cuda_device = -1 optimizer = torch.optim.Adam(model.parameters(), lr=0.001) iterator = BucketIterator(batch_size=args.batch_size, sorting_keys=[('intervention', 'num_tokens')], padding_noise=0.1) iterator.index_with(vocab) serialization_dir = 'model_checkpoints/' + args.model_name trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=train_data, validation_dataset=test_data, patience=args.patience, validation_metric='+accuracy', num_epochs=args.epochs, cuda_device=cuda_device, serialization_dir=serialization_dir) result = trainer.train() for key in result: print(str(key) + ': ' + str(result[key])) test_metrics = evaluate(trainer.model, test_data, iterator, cuda_device=cuda_device, batch_weight_key="") print('Test Data statistics:') for key, value in test_metrics.items(): print(str(key) + ': ' + str(value))
def _train_worker( process_rank: int, params: Params, serialization_dir: str, file_friendly_logging: bool = False, recover: bool = False, cache_directory: str = None, cache_prefix: str = None, include_package: List[str] = None, node_rank: int = 0, master_addr: str = "127.0.0.1", master_port: int = 29500, world_size: int = 1, distributed_device_ids: List[str] = None, ) -> Optional[Model]: """ Helper to train the configured model/experiment. In distributed mode, this is spawned as a worker process. In a single GPU experiment, this returns the ``Model`` object and in distributed training, nothing is returned. # Parameters process_rank : ``int`` The process index that is initialized using the GPU device id. params : ``Params`` A parameter object specifying an AllenNLP Experiment. serialization_dir : ``str`` The directory in which to save results and logs. file_friendly_logging : ``bool``, optional (default=False) If ``True``, we add newlines to tqdm output, even on an interactive terminal, and we slow down tqdm's output to only once every 10 seconds. recover : ``bool``, optional (default=False) If ``True``, we will try to recover a training run from an existing serialization directory. This is only intended for use when something actually crashed during the middle of a run. For continuing training a model on new data, see the ``fine-tune`` command. cache_directory : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. cache_prefix : ``str``, optional For caching data pre-processing. See :func:`allennlp.training.util.datasets_from_params`. include_package : ``List[str]``, optional In distributed mode, since this function would have been spawned as a separate process, the extra imports need to be done again. NOTE: This does not have any effect in single GPU training. node_rank : ``int``, optional Rank of the node world_size : ``int``, optional The number of processes involved in distributed training. # Returns best_model : ``Model`` The model with the best epoch weights. """ prepare_global_logging(serialization_dir, file_friendly_logging, rank=process_rank, world_size=world_size) prepare_environment(params) distributed = world_size > 1 # not using `allennlp.common.util.is_master` as the process group is yet to be initialized master = process_rank == 0 evaluate_on_test = params.pop_bool("evaluate_on_test", False) if distributed: # Since the worker is spawned and not forked, the extra imports # need to be done again. if include_package is not None: for package_name in include_package: import_submodules(package_name) num_procs_per_node = len(distributed_device_ids) # The Unique identifier of the worker process among all the processes in the # distributed training group is computed here. This is used while initializing # the process group using `init_process_group` global_rank = node_rank * num_procs_per_node + process_rank # In distributed training, the configured device is always going to be a list. # The corresponding gpu id for the particular worker is obtained by picking the id # from the device list with the rank as index gpu_id = distributed_device_ids[process_rank] # type: ignore # Till now, "cuda_device" might not be set in the trainer params. # But a worker trainer needs to only know about its specific GPU id. params["trainer"]["cuda_device"] = gpu_id params["trainer"]["world_size"] = world_size params["trainer"]["distributed"] = True torch.cuda.set_device(gpu_id) dist.init_process_group( backend="nccl", init_method=f"tcp://{master_addr}:{master_port}", world_size=world_size, rank=global_rank, ) logging.info(f"Process group of world size {world_size} initialized " f"for distributed training in worker {global_rank}") trainer_type = params.get("trainer", {}).get("type", "default") if trainer_type == "default": # Special logic to instantiate backward-compatible trainer. pieces = TrainerPieces.from_params(params, serialization_dir, recover, cache_directory, cache_prefix) trainer = Trainer.from_params( model=pieces.model, serialization_dir=serialization_dir, iterator=pieces.iterator, train_data=pieces.train_dataset, validation_data=pieces.validation_dataset, params=pieces.params, validation_iterator=pieces.validation_iterator, ) evaluation_iterator = pieces.validation_iterator or pieces.iterator evaluation_dataset = pieces.test_dataset else: if evaluate_on_test: raise ValueError( "--evaluate-on-test only works with the default Trainer. " "If you're using the CallbackTrainer you can use a callback " "to evaluate at Events.TRAINING_END; otherwise you'll have " "to run allennlp evaluate separately.") trainer = TrainerBase.from_params(params, serialization_dir, recover, cache_directory, cache_prefix) evaluation_dataset = None params.assert_empty("base train command") try: if distributed: # let the setup get ready for all the workers dist.barrier() metrics = trainer.train() except KeyboardInterrupt: # if we have completed an epoch, try to create a model archive. if master and os.path.exists( os.path.join(serialization_dir, _DEFAULT_WEIGHTS)): logging.info( "Training interrupted by the user. Attempting to create " "a model archive using the current best epoch weights.") archive_model(serialization_dir, files_to_archive=params.files_to_archive) raise if master: if evaluation_dataset and evaluate_on_test: logger.info( "The model will be evaluated using the best epoch weights.") test_metrics = evaluate( trainer.model, evaluation_dataset, evaluation_iterator, cuda_device=trainer.cuda_device, # TODO(brendanr): Pass in an arg following Joel's trainer refactor. batch_weight_key="", ) for key, value in test_metrics.items(): metrics["test_" + key] = value elif evaluation_dataset: logger.info( "To evaluate on the test set after training, pass the " "'evaluate_on_test' flag, or use the 'allennlp evaluate' command." ) dump_metrics(os.path.join(serialization_dir, "metrics.json"), metrics, log=True) if not distributed: return trainer.model return None # to make mypy happy
len(reader.alltags)) ser_dir_iter = serialization_dir + "/final" prepare_global_logging(ser_dir_iter, False) trainer = Trainer(model=model, optimizer=optimizer, iterator=iterator, train_dataset=folds[0] + folds[1], validation_dataset=validation_dataset, patience=10, num_epochs=45, validation_metric="+f1-measure-overall", cuda_device=cuda_device, num_serialized_models_to_keep=3, serialization_dir=ser_dir_iter) trainer.train() test_metrics = util.evaluate( trainer.model, test_dataset, iterator, cuda_device=trainer._cuda_devices[0], # pylint: disable=protected-access, batch_weight_key="") for key, value in test_metrics.items(): metrics["test_" + key] = value dump_metrics(os.path.join(ser_dir_iter, "metrics.json"), metrics, log=True)
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]: # Disable some of the more verbose logging statements logging.getLogger("allennlp.common.params").disabled = True logging.getLogger("allennlp.nn.initializers").disabled = True logging.getLogger("transformers.modeling_utils").disabled = True logging.getLogger("transformers.tokenization_utils").disabled = True logging.getLogger("transformers.configuration_utils").disabled = True logging.basicConfig(level=logging.INFO) # Load from archive archive = load_archive( args.archive_file, weights_file=args.weights_file, cuda_device=args.cuda_device, overrides=args.overrides, ) config = archive.config prepare_environment(config) model = archive.model model.eval() # Load the evaluation data # Try to use the validation dataset reader if there is one - otherwise fall back # to the default dataset_reader used for both training and validation. validation_dataset_reader_params = config.pop("validation_dataset_reader", None) if validation_dataset_reader_params is not None: dataset_reader = DatasetReader.from_params(validation_dataset_reader_params) else: dataset_reader = DatasetReader.from_params(config.pop("dataset_reader")) evaluation_data_path = args.input_file logger.info("Reading evaluation data from %s", evaluation_data_path) instances = dataset_reader.read(evaluation_data_path) embedding_sources = ( json.loads(args.embedding_sources_mapping) if args.embedding_sources_mapping else {} ) if args.extend_vocab: logger.info("Vocabulary is being extended with test instances.") model.vocab.extend_from_instances(instances=instances) model.extend_embedder_vocab(embedding_sources) instances.index_with(model.vocab) data_loader_params = config.pop("validation_data_loader", None) if data_loader_params is None: data_loader_params = config.pop("data_loader") if args.batch_size: data_loader_params["batch_size"] = args.batch_size data_loader = DataLoader.from_params(dataset=instances, params=data_loader_params) if "iter_norm" in dir(model.text_field_embedder._token_embedders['tokens']): iter_num = model.text_field_embedder._token_embedders['tokens'].iter_norm else: iter_num = None if iter_num: # Obtrain evaluation info for iterative normalization: iter_mean_eval = [] for iter_norm_i in range(iter_num): logging.info("This is the {} time during iterative normalization for evaluation".format(iter_norm_i)) mean, embeddings = get_iter_norm_mean_eval(model, data_loader, iter_mean_eval, args.cuda_device) logger.info("The degree of isotropy of vectors is {} ".format(degree_anisotropy(embeddings.t(), args.cuda_device))) iter_mean_eval.append(mean) model.text_field_embedder._token_embedders['tokens'].iter_norm = None model.text_field_embedder._token_embedders['tokens']._matched_embedder.mean_emb_eval = iter_mean_eval model.text_field_embedder._token_embedders['tokens']._matched_embedder.is_train = False metrics = evaluate(model, data_loader, args.cuda_device, args.batch_weight_key) logger.info("Finished evaluating.") dump_metrics(args.output_file, metrics, log=True) return metrics
train_dataset=train_dataset, validation_dataset=validation_dataset, patience=PATIENCE, num_epochs=EPOCH, cuda_device=cuda_device) trainer.train() # Here's how to save the model. with open("model.th", 'wb') as f: torch.save(model.state_dict(), f) vocab.save_to_files("vocabulary") # # And here's how to reload the model. # vocab2 = Vocabulary.from_files("vocabulary") # model2 = BiLSTMTagger(word_embeddings, lstm, vocab2) # with open("model.th", 'rb') as f: # model2.load_state_dict(torch.load(f)) # if cuda_device > -1: # model2.cuda(cuda_device) seq_iterator = BasicIterator(batch_size=32) seq_iterator.index_with(vocab) metrics = evaluate(model=model, instances=test_dataset, data_iterator=seq_iterator, cuda_device=cuda_device, batch_weight_key=None) print("Test accuracy: ", metrics)