Ejemplo n.º 1
0
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    _warned_tqdm_ignores_underscores = False
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances,
                                 num_epochs=1,
                                 shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))
        for batch in generator_tqdm:
            batch = util.move_to_device(batch, cuda_device)
            model(**batch)
            metrics = model.get_metrics()
            if (not _warned_tqdm_ignores_underscores and
                        any(metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                _warned_tqdm_ignores_underscores = True
            description = ', '.join(["%s: %.2f" % (name, value) for name, value
                                     in metrics.items() if not name.startswith("_")]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        return model.get_metrics(reset=True)
Ejemplo n.º 2
0
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    model.eval()

    iterator = data_iterator(instances, num_epochs=1, cuda_device=cuda_device, for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))
    for batch in generator_tqdm:
        model(**batch)
        metrics = model.get_metrics()
        description = ', '.join(["%s: %.2f" % (name, value) for name, value in metrics.items()]) + " ||"
        generator_tqdm.set_description(description, refresh=False)

    return model.get_metrics(reset=True)
Ejemplo n.º 3
0
 def setUp(self):
     super().setUp()
     param_file = self.FIXTURES_ROOT / 'simple_tagger' / 'experiment_with_regularization.json'
     self.set_up_model(param_file,
                       self.FIXTURES_ROOT / 'data' / 'sequence_tagging.tsv')
     params = Params.from_file(param_file)
     self.reader = DatasetReader.from_params(params['dataset_reader'])
     self.iterator = DataIterator.from_params(params['iterator'])
     self.trainer = Trainer.from_params(
             self.model,
             self.TEST_DIR,
             self.iterator,
             self.dataset,
             None,
             params.get('trainer')
     )
Ejemplo n.º 4
0
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
    # Disable some of the more verbose logging statements
    logging.getLogger('allennlp.common.params').disabled = True
    logging.getLogger('allennlp.nn.initializers').disabled = True
    logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)

    # Load from archive
    archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file)
    config = archive.config
    prepare_environment(config)
    model = archive.model
    model.eval()

    # Load the evaluation data

    # Try to use the validation dataset reader if there is one - otherwise fall back
    # to the default dataset_reader used for both training and validation.
    validation_dataset_reader_params = config.pop('validation_dataset_reader', None)
    if validation_dataset_reader_params is not None:
        dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
    else:
        dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
    evaluation_data_path = args.input_file
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    instances = dataset_reader.read(evaluation_data_path)

    iterator_params = config.pop("validation_iterator", None)
    if iterator_params is None:
        iterator_params = config.pop("iterator")
    iterator = DataIterator.from_params(iterator_params)
    iterator.index_with(model.vocab)

    metrics = evaluate(model, instances, iterator, args.cuda_device, args.batch_weight_key)

    logger.info("Finished evaluating.")
    logger.info("Metrics:")
    for key, metric in metrics.items():
        logger.info("%s: %s", key, metric)

    output_file = args.output_file
    if output_file:
        with open(output_file, "w") as file:
            json.dump(metrics, file, indent=4)
    return metrics
Ejemplo n.º 5
0
def get_trainer_from_config(config: Params,
                            train_instances: List[Instance],
                            val_instances: List[Instance],
                            vocab: Optional[Vocabulary] = None,
                            device: Optional[int] = -1) -> Trainer:
    trainer_params = config.pop("trainer")
    trainer_params["cuda_device"] = device
    model_params = config.pop("model")
    vocab = vocab or Vocabulary.from_instances(train_instances)
    model = Model.from_params(model_params, vocab=vocab)
    iterator = DataIterator.from_params(config.pop("iterator"))
    iterator.index_with(vocab)
    trainer = Trainer.from_params(
        model=model,
        iterator=iterator,
        train_data=train_instances,
        validation_data=val_instances,
        serialization_dir=None,
        params=trainer_params)
    return trainer
Ejemplo n.º 6
0
def evaluate(model: Model, instances: Iterable[Instance],
             data_iterator: DataIterator, cuda_device: int) -> Dict[str, Any]:
    model.eval()

    iterator = data_iterator(instances,
                             num_epochs=1,
                             cuda_device=cuda_device,
                             for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = Tqdm.tqdm(iterator,
                               total=data_iterator.get_num_batches(instances))
    for batch in generator_tqdm:
        model(**batch)
        metrics = model.get_metrics()
        description = ', '.join(
            ["%s: %.2f" % (name, value)
             for name, value in metrics.items()]) + " ||"
        generator_tqdm.set_description(description, refresh=False)

    return model.get_metrics()
def get_model_predictions(model: Model, instances: Iterable[Instance],
                          data_iterator: DataIterator,
                          cuda_device: int) -> (Dict[str, Any], List):

    model.eval()
    model_predictions = []

    iterator = data_iterator(instances,
                             num_epochs=1,
                             cuda_device=cuda_device,
                             for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = Tqdm.tqdm(iterator,
                               total=data_iterator.get_num_batches(instances))
    for batch in generator_tqdm:
        result = model(**batch)
        predictions = model.decode(result)
        model_predictions.extend(predictions["tags"])

    return model.get_metrics(), model_predictions
Ejemplo n.º 8
0
def get_model_from_file(archive_path,
                        model_path,
                        overrides=None,
                        eval_suffix='',
                        device=0):
    if archive_path.endswith('gz'):
        archive = load_archive(archive_path, device, overrides)
        config = archive.config
        prepare_environment(config)
        model = archive.model
        serialization_dir = os.path.dirname(archive_path)
    elif archive_path.endswith('yaml'):
        config = yaml_to_params(archive_path, overrides)
        prepare_environment(config)
        config_dir = os.path.dirname(archive_path)
        serialization_dir = os.path.join(config_dir, 'serialization')

    all_datasets = datasets_from_params(config)

    # We want to create the vocab from scratch since it might be of a
    # different type. Vocabulary.from_files will always create the base
    # Vocabulary instance.
    if os.path.exists(os.path.join(serialization_dir, "vocabulary")):
        vocab_path = os.path.join(serialization_dir, "vocabulary")
        vocab = Vocabulary.from_files(vocab_path)

    vocab = Vocabulary.from_params(config.pop('vocabulary'))
    model = Model.from_params(vocab=vocab, params=config.pop('model'))

    if model_path:
        best_model_state = torch.load(model_path)
        model.load_state_dict(best_model_state)

    # instances = all_datasets.get('test')
    iterator = DataIterator.from_params(config.pop("validation_iterator"))

    iterator.index_with(model.vocab)
    model.eval().to(device)
    model.evaluate_mode = True

    return model
Ejemplo n.º 9
0
def evaluate(model: Model, dataset: Dataset, iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    model.eval()

    generator = iterator(dataset, num_epochs=1)
    logger.info("Iterating over dataset")
    generator_tqdm = tqdm.tqdm(generator,
                               total=iterator.get_num_batches(dataset))
    for batch in generator_tqdm:
        tensor_batch = arrays_to_variables(batch,
                                           cuda_device,
                                           for_training=False)
        model.forward(**tensor_batch
                      )  # stores TP/FN counts for get_metrics as a side-effect
        metrics = model.get_metrics()
        description = ', '.join(
            ["%s: %.2f" % (name, value)
             for name, value in metrics.items()]) + " ||"
        generator_tqdm.set_description(description)

    return model.get_metrics(reset=True)
Ejemplo n.º 10
0
    def from_params(  # type: ignore
        cls,
        params: Params,
        serialization_dir: str,
        recover: bool = False,
        cache_directory: str = None,
        cache_prefix: str = None,
    ) -> "MultiTaskTrainer":
        readers = {
            name: DatasetReader.from_params(reader_params)
            for name, reader_params in params.pop(
                "train_dataset_readers").items()
        }
        train_file_paths = params.pop("train_file_paths").as_dict()

        datasets = {
            name: reader.read(train_file_paths[name])
            for name, reader in readers.items()
        }

        instances = (instance for dataset in datasets.values()
                     for instance in dataset)
        vocab = Vocabulary.from_params(Params({}), instances=instances)
        model = Model.from_params(params.pop("model"), vocab=vocab)
        iterator = DataIterator.from_params(params.pop("iterator"))
        iterator.index_with(vocab)
        mingler = DatasetMingler.from_params(params.pop("mingler"))

        parameters = [[n, p] for n, p in model.named_parameters()
                      if p.requires_grad]
        optimizer = Optimizer.from_params(parameters, params.pop("optimizer"))

        num_epochs = params.pop_int("num_epochs", 10)

        _ = params.pop("trainer", Params({}))

        params.assert_empty(__name__)

        return MultiTaskTrainer(model, serialization_dir, iterator, mingler,
                                optimizer, datasets, num_epochs)
Ejemplo n.º 11
0
def evaluating(**params):
    param_is_exist(["model_file", "input_file", "include_package"], params)
    for package_name in params["include_package"]:
        import_submodules(package_name)
    cuda_device = params["cuda_device"] if "cuda_device" in params else -1
    overrides = params["overrides"] if "overrides" in params else ""
    weights_file = params["weights_file"] if "weights_file" in params else ""
    archive = load_archive(params["model_file"], cuda_device, overrides, weights_file)
    config = archive.config
    prepare_environment(config)
    model = archive.model
    model.eval()

    # Load the evaluation data

    # Try to use the validation dataset reader if there is one - otherwise fall back
    # to the default dataset_reader used for both training and validation.
    validation_dataset_reader_params = config.pop('validation_dataset_reader', None)
    if validation_dataset_reader_params is not None:
        dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
    else:
        dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
    evaluation_data_path = params["input_file"]
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    instances = dataset_reader.read(evaluation_data_path)

    iterator_params = config.pop("validation_iterator", None)
    if iterator_params is None:
        iterator_params = config.pop("iterator")
    iterator = DataIterator.from_params(iterator_params)
    iterator.index_with(model.vocab)
    metrics = evaluate(model, instances, iterator, cuda_device, batch_weight_key="loss")
    logger.info("Finished evaluating.")
    logger.info("Metrics:")
    for key, metric in metrics.items():
        logger.info("%s: %s", key, metric)

    return metrics
Ejemplo n.º 12
0
def evaluate(model: Model, dataset: Dataset, iterator: DataIterator,
             cuda_device: int) -> Dict[str, Any]:
    model.eval()

    generator = iterator(dataset, num_epochs=1)
    logger.info("Iterating over dataset")
    generator_tqdm = tqdm.tqdm(generator,
                               total=iterator.get_num_batches(dataset))
    for batch in generator_tqdm:
        tensor_batch = arrays_to_variables(batch,
                                           cuda_device,
                                           for_training=False)
        if 'metadata' in tensor_batch and 'metadata' not in signature(
                model.forward).parameters:
            del tensor_batch['metadata']
        model.forward(**tensor_batch)
        metrics = model.get_metrics()
        description = ', '.join(
            ["%s: %.2f" % (name, value)
             for name, value in metrics.items()]) + " ||"
        generator_tqdm.set_description(description)

    return model.get_metrics()
Ejemplo n.º 13
0
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
    # Disable some of the more verbose logging statements
    logging.getLogger('allennlp.common.params').disabled = True
    logging.getLogger('allennlp.nn.initializers').disabled = True
    logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)

    # Load from archive
    archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file)
    config = archive.config
    prepare_environment(config)
    model = archive.model
    model.eval()

    # Load the evaluation data

    # Try to use the validation dataset reader if there is one - otherwise fall back
    # to the default dataset_reader used for both training and validation.
    validation_dataset_reader_params = config.pop('validation_dataset_reader', None)
    if validation_dataset_reader_params is not None:
        dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
    else:
        dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
    evaluation_data_path = args.evaluation_data_file
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    instances = dataset_reader.read(evaluation_data_path)

    iterator = DataIterator.from_params(config.pop("iterator"))
    iterator.index_with(model.vocab)

    metrics = evaluate(model, instances, iterator, args.cuda_device)

    logger.info("Finished evaluating.")
    logger.info("Metrics:")
    for key, metric in metrics.items():
        logger.info("%s: %s", key, metric)

    return metrics
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             output_file: str = None,
             file_mode="w",
             id_to_meta: Dict[str, Any] = {},
             feat_id_to_feat_name: Dict[int, str] = {}) -> Dict[str, Any]:
    model.eval()

    iterator = data_iterator(instances, num_epochs=1, shuffle=False)
    logger.info("Iterating over dataset")
    generator_tqdm = Tqdm.tqdm(iterator,
                               total=data_iterator.get_num_batches(instances))
    with ExitStack() as stack:
        if output_file is None:
            file_handle = None
        else:
            file_handle = stack.enter_context(open(output_file, file_mode))

        for batch in generator_tqdm:
            model_output = model(**batch)
            metrics = model.get_metrics()
            if file_handle:
                id2label = model.vocab.get_index_to_token_vocabulary("labels")
                _persist_data(file_handle,
                              batch.get("metadata"),
                              model_output,
                              id2label=id2label,
                              id_to_meta=id_to_meta,
                              feat_id_to_feat_name=feat_id_to_feat_name)
            description = ', '.join([
                "%s: %.2f" % (name, value) for name, value in metrics.items()
            ]) + " ||"
            generator_tqdm.set_description(description)

    return model.get_metrics()
Ejemplo n.º 15
0
def main():
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--input-file', type=str, help='path to the file containing the evaluation data')
    parser.add_argument('--output-file', type=str, help='path to output file')
    parser.add_argument('--weights-file',
                               type=str,
                               help='a path that overrides which weights file to use')
    parser.add_argument('--cuda-device',
                                 type=int,
                                 default=-1,
                                 help='id of GPU to use (if any)')
    parser.add_argument('--overrides',
                               type=str,
                               default="",
                               help='a JSON structure used to override the experiment configuration')
    parser.add_argument('--include-package', type=str, default='')
    parser.add_argument('--archive-file', type=str)
    args = parser.parse_args()
    
    if '/' in args.weights_file:
        label_file = args.weights_file[:args.weights_file.rfind('/') + 1]
    else:
        label_file = ''
    label_file += (args.input_file[args.input_file.rfind('/') + 1: args.input_file.rfind('.')] if '/' in args.input_file else
                   args.input_file[:args.input_file.rfind('.')])
    label_file += '_reallabel_guessedlabel.csv'
    print("Will write labels to " + label_file)
    print("Evaluating on " + args.input_file)
    print("Archive file being used is " + args.archive_file)
    print("Weights file being used is " + args.weights_file)
    print()

    logging.getLogger('allennlp.common.params').disabled = True
    logging.getLogger('allennlp.nn.initializers').disabled = True
    logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(logging.INFO)

    if args.include_package.strip() != '':
        import_submodules(args.include_package)
    import_submodules("attn_tests_lib")
    import_submodules("textcat")

    if args.overrides != '':
        with open(args.overrides, 'r') as f:
            args.overrides = " ".join([l.strip() for l in f.readlines()])
    archive = load_archive(args.archive_file, args.cuda_device, args.overrides, args.weights_file)
    config = archive.config
    prepare_environment(config)
    model = archive.model
    model.eval()

    if model._output_logit.get_output_dim() == 2:
        model.calculate_f1 = True
        model._f1 = F1Measure(1)

    validation_dataset_reader_params = config.pop('validation_dataset_reader', None)
    if validation_dataset_reader_params is not None:
        dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
    else:
        dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
    evaluation_data_path = args.input_file
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    instances = dataset_reader.read(evaluation_data_path)

    iterator_params = config.pop("validation_iterator", None)
    if iterator_params is None:
        iterator_params = config.pop("iterator")
    new_param_dict = {'type': 'basic'}
    if 'batch_size' in iterator_params.params:
        new_param_dict['batch_size'] = iterator_params.params['batch_size']
    if 'maximum_samples_per_batch' in iterator_params.params:
        new_param_dict['maximum_samples_per_batch'] = iterator_params.params['maximum_samples_per_batch']
    iterator_params.params = new_param_dict
    iterator = DataIterator.from_params(iterator_params)
    iterator.index_with(model.vocab)

    metrics = evaluate(model, instances, iterator, args.cuda_device, label_file)

    logger.info("Finished evaluating.")
    logger.info("Metrics:")
    for key, metric in metrics.items():
        logger.info("%s: %s", key, metric)

    print('\n' + json.dumps(metrics, indent=4))
    print("Successfully wrote labels to " + label_file)

    output_file = args.output_file
    if output_file:
        with open(output_file, "w") as file:
            json.dump(metrics, file, indent=4)
    return metrics
Ejemplo n.º 16
0
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
    # Disable some of the more verbose logging statements
    logging.getLogger('allennlp.common.params').disabled = True
    logging.getLogger('allennlp.nn.initializers').disabled = True
    logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(
        logging.INFO)

    # Load from archive
    archive = load_archive(args.archive_file, args.cuda_device, args.overrides,
                           args.weights_file)
    config = archive.config
    prepare_environment(config)
    model = archive.model
    model.eval()

    # Load the evaluation data

    # Try to use the validation dataset reader if there is one - otherwise fall back
    # to the default dataset_reader used for both training and validation.
    validation_dataset_reader_params = config.pop('validation_dataset_reader',
                                                  None)
    if validation_dataset_reader_params is not None:
        dataset_reader = DatasetReader.from_params(
            validation_dataset_reader_params)
    else:
        dataset_reader = DatasetReader.from_params(
            config.pop('dataset_reader'))
    evaluation_data_path = args.input_file
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    instances = dataset_reader.read(evaluation_data_path)

    embedding_sources: Dict[str,
                            str] = (json.loads(args.embedding_sources_mapping)
                                    if args.embedding_sources_mapping else {})
    if args.extend_vocab:
        logger.info("Vocabulary is being extended with test instances.")
        model.vocab.extend_from_instances(Params({}), instances=instances)
        model.extend_embedder_vocab(embedding_sources)

    iterator_params = config.pop("validation_iterator", None)
    if iterator_params is None:
        iterator_params = config.pop("iterator")
    iterator = DataIterator.from_params(iterator_params)
    iterator.index_with(model.vocab)

    thrs = args.thresholds.replace("_", ",").split(",")

    for thr in thrs:
        model._temperature_threshold = float(thr)
        metrics = evaluate(model, instances, iterator, args.cuda_device,
                           args.batch_weight_key)

        logger.info("Finished evaluating.")
        logger.info("Metrics:")
        for key, metric in metrics.items():
            logger.info("%s: %s: %s", thr, key, metric)

        output_file = args.output_file
        if output_file:
            with open(output_file + "_" + thr, "w") as file:
                json.dump(metrics, file, indent=4)
    return metrics
Ejemplo n.º 17
0
        "The loaded model seems not to be an am-parser (GraphDependencyParser)"
    )

# Load the evaluation data

# Try to use the validation dataset reader if there is one - otherwise fall back
# to the default dataset_reader used for both training and validation.
validation_dataset_reader_params = config.pop('validation_dataset_reader',
                                              None)
if validation_dataset_reader_params is not None:
    dataset_reader = DatasetReader.from_params(
        validation_dataset_reader_params)
else:
    dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))

instances = dataset_reader.read([[args.formalism, args.input_file]])

iterator_params = config.pop("validation_iterator", None)
if iterator_params is None:
    iterator_params = config.pop("iterator")
iterator = DataIterator.from_params(iterator_params)
iterator.index_with(model.vocab)

metrics = evaluate(model, instances, iterator, args.cuda_device,
                   args.batch_weight_key)

logger.info("Finished evaluating.")
logger.info("Metrics:")
for key, metric in metrics.items():
    logger.info("%s: %s", key, metric)
Ejemplo n.º 18
0
def evaluate(model: Model, instances: Iterable[Instance],
             data_iterator: DataIterator, cuda_device: int,
             batch_weight_key: str) -> Dict[str, Any]:
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances, num_epochs=1, shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(
            iterator, total=data_iterator.get_num_batches(instances))

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = nn_util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            loss = output_dict.get("loss")

            ############ Comment out this block to save class_probabilities, logits, and losses for each batch #########
            # print(output_dict['class_probabilities'].shape)
            # import copy
            #
            # newoutput_dict = copy.deepcopy(output_dict)
            # newoutput_dict['class_probabilities'] = newoutput_dict['class_probabilities'].cpu().data.numpy()
            # newoutput_dict['logits'] = newoutput_dict['logits'].cpu().data.numpy()
            # newoutput_dict['loss'] = newoutput_dict['loss'].cpu().data.numpy()
            #
            # output_file = os.path.join(os.path.dirname(__file__), '..', "data", "test",
            #                            str(batch_count) + "_output.pkl")
            # import json
            # import pickle
            # if output_file:
            #     with open(output_file, "wb") as file:
            #         pickle.dump(newoutput_dict, file)
            #     file.close()
            # ###########################################################################################################

            metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
                # Report the average loss so far.
                metrics["loss"] = total_loss / total_weight

            if (not HasBeenWarned.tqdm_ignores_underscores and any(
                    metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                HasBeenWarned.tqdm_ignores_underscores = True
            description = ', '.join([
                "%s: %.4f" % (name, value)
                for name, value in metrics.items() if not name.startswith("_")
            ]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        final_metrics = model.get_metrics(reset=True)
        if loss_count > 0:
            # Sanity check
            if loss_count != batch_count:
                raise RuntimeError(
                    "The model you are trying to evaluate only sometimes " +
                    "produced a loss!")
            final_metrics["loss"] = total_loss / total_weight

        return final_metrics
Ejemplo n.º 19
0
def main():
    """The main entry point

    This is the main entry point for training HAN SOLO models.

    Usage::

        ${PYTHONPATH} -m AttentionSegmentation/main
            --config_file ${CONFIG_FILE}

    """
    args = get_arguments()
    # Setup Experiment Directory
    config = read_from_config_file(args.config_file)
    if args.seed > 0:
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        if config.get('trainer', None) is not None and \
           config.get('trainer', None).get('cuda_device', -1) > 0:
            torch.cuda.manual_seed(args.seed)
    serial_dir, config = setup_output_dir(config, args.loglevel)
    logger = logging.getLogger(__name__)

    # Load Training Data
    TRAIN_PATH = config.pop("train_data_path")
    logger.info("Loading Training Data from {0}".format(TRAIN_PATH))
    dataset_reader_params = config.pop("dataset_reader")
    reader_type = dataset_reader_params.pop("type", None)
    assert reader_type is not None and hasattr(Readers, reader_type),\
        f"Cannot find reader {reader_type}"
    reader = getattr(Readers, reader_type).from_params(dataset_reader_params)
    instances_train = reader.read(file_path=TRAIN_PATH)
    instances_train = instances_train
    logger.info("Length of {0}: {1}".format(
        "Training Data", len(instances_train)))

    # Load Validation Data
    VAL_PATH = config.pop("validation_data_path")
    logger.info("Loading Validation Data from {0}".format(VAL_PATH))
    instances_val = reader.read(VAL_PATH)
    instances_val = instances_val
    logger.info("Length of {0}: {1}".format(
        "Validation Data", len(instances_val)))

    # Load Test Data
    TEST_PATH = config.pop("test_data_path", None)
    instances_test = None
    if TEST_PATH is not None:
        logger.info("Loading Test Data from {0}".format(TEST_PATH))
        instances_test = reader.read(TEST_PATH)
        instances_test = instances_test
        logger.info("Length of {0}: {1}".format(
            "Testing Data", len(instances_test)))

    # # Load Pretrained Existing Model
    # load_config = config.pop("load_from", None)

    # # Construct Vocabulary
    vocab_size = config.pop("max_vocab_size", -1)
    logger.info("Constructing Vocab of size: {0}".format(vocab_size))
    vocab_size = None if vocab_size == -1 else vocab_size
    vocab = Vocabulary.from_instances(instances_train,
                                      max_vocab_size=vocab_size)
    vocab_dir = os.path.join(serial_dir, "vocab")
    assert os.path.exists(vocab_dir), "Couldn't find the vocab directory"
    vocab.save_to_files(vocab_dir)

    # if load_config is not None:
    #     # modify the vocab from the source model vocab
    #     src_vocab_path = load_config.pop("vocab_path", None)
    #     if src_vocab_path is not None:
    #         vocab = construct_vocab(src_vocab_path, vocab_dir)
    #         # Delete the old vocab
    #         for file in os.listdir(vocab_dir):
    #             os.remove(os.path.join(vocab_dir, file))
    #         # save the new vocab
    #         vocab.save_to_files(vocab_dir)
    logger.info("Saving vocab to {0}".format(vocab_dir))
    logger.info("Vocab Construction Done")

    # # Construct the data iterators
    logger.info("Constructing Data Iterators")
    data_iterator = DataIterator.from_params(config.pop("iterator"))
    data_iterator.index_with(vocab)

    logger.info("Data Iterators Done")

    # Create the model
    logger.info("Constructing The model")
    model_params = config.pop("model")
    model_type = model_params.pop("type")
    assert model_type is not None and hasattr(Models, model_type),\
        f"Cannot find reader {model_type}"
    model = getattr(Models, model_type).from_params(
        vocab=vocab,
        params=model_params,
        label_indexer=reader.get_label_indexer()
    )
    logger.info("Model Construction done")

    # visualize = config.pop("visualize", False)
    # visualizer = None
    # if visualize:
    #     visualizer = html_visualizer(vocab, reader)
    segmenter_params = config.pop("segmentation")
    segment_class = segmenter_params.pop("type")
    segmenter = getattr(SegmentationModels, segment_class).from_params(
        vocab=vocab,
        reader=reader,
        params=segmenter_params
    )

    logger.info("Segmenter Done")

    # if load_config is not None:
    #     # Load the weights, as specified by the load_config
    #     model_path = load_config.pop("model_path", None)
    #     layers = load_config.pop("layers", None)
    #     load_config.assert_empty("Load Config")
    #     assert model_path is not None,\
    #         "You need to specify model path to load from"
    #     model = load_model_from_existing(model_path, model, layers)
    #     logger.info("Pretrained weights loaded")

    # logger.info("Starting the training process")

    trainer = Trainer.from_params(
        model=model,
        base_dir=serial_dir,
        iterator=data_iterator,
        train_data=instances_train,
        validation_data=instances_val,
        segmenter=segmenter,
        params=config.pop("trainer")
    )
    trainer.train()
    logger.info("Training Done.")
    if instances_test is not None:
        logger.info("Computing final Test Accuracy")
        trainer.test(instances_test)
    logger.info("Done.")
def evaluate_from_args(args: argparse.Namespace,
                       func_eval=None) -> Dict[str, Any]:
    # USAGE:
    # docqa/run.py
    # evaluate_custom
    # --archive_file
    # _trained_models/qanet_semantic_flat_concat_sdp_debug/model.tar.gz
    # --evaluation_data_file
    # /Users/mihaylov/research/document-parsing-pipeline/tests/fixtures/data/narrativeqa/third_party/wikipedia/summaries-all.csv.parsed.jsonl.srl.jsonl.with_q_spans.jsonl.with_exp.with_sdp.json.train.2
    # --output_file
    # predictions_dev.json
    # --batch_size=1
    # --item_ids
    # "00936497f5884881f1df23f4834f6739552cee8b##016[15:30];00936497f5884881f1df23f4834f6739552cee8b##005[17:45];0029bdbe75423337b551e42bb31f9a102785376f##023"
    # --output_attention
    # True

    if func_eval is None:
        func_eval = evaluate

    # Disable some of the more verbose logging statements
    logging.getLogger('allennlp.common.params').disabled = True
    logging.getLogger('allennlp.nn.initializers').disabled = True
    logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(
        logging.INFO)

    logging.info("Parameters:")
    for arg in vars(args):
        logging.info("{0}: {1}".format(arg, getattr(args, arg)))

    # Load from archive
    cuda_device = args.cuda_device
    output_attention = args.output_attention.lower() == "true"
    display_attention_matplot = args.display_attention_matplot.lower(
    ) == "true"

    # selected ids to validate
    item_ids = []
    item_ids_with_range = {}
    if args.item_ids:
        separator = ";"
        item_ids_str = args.item_ids
        item_ids = item_ids_str.split(separator)
        for item_id in item_ids:
            if "[" in item_id and "]" in item_id:
                tokens_range = [
                    int(x)
                    for x in item_id.split("[")[-1].replace("]", "").split(":")
                ]
                item_id_only = item_id.split("[")[0]
                item_ids_with_range[item_id_only] = {
                    "attention_range": tokens_range
                }
            else:
                item_ids_with_range[item_id] = None

        item_ids = set(item_ids_with_range.keys())

    logging.info("cuda_device:{0}".format(cuda_device))
    archive = load_archive(args.archive_file,
                           cuda_device=cuda_device,
                           overrides=args.overrides)
    config = archive.config
    prepare_environment(config)

    model = archive.model
    model.eval()

    if output_attention:
        if hasattr(model, "return_output_metadata"):
            model.return_output_metadata = output_attention
        else:
            raise Exception(
                "Model {0} does not support output of the attention weights!".
                format(model))

    # Load the evaluation data
    dataset_reader = DatasetReader.from_params(
        config.pop('validation_dataset_reader') if "validation_dataset_reader"
        in config else config.pop('dataset_reader'))

    feat_id_to_feat_name = {}
    if hasattr(dataset_reader, "_semantic_views_extractor"):
        feat_id_to_feat_name = dataset_reader._semantic_views_extractor.get_vocab_feats_id2name(
        )

    evaluation_data_path = args.evaluation_data_file
    logger.info("Reading evaluation data from %s", evaluation_data_path)

    batch_size = args.batch_size
    start_id = args.start_id
    end_id = args.end_id

    dataset = dataset_reader.read(evaluation_data_path)
    file_mode = args.file_open_mode
    if start_id > 0 or end_id > 0:
        if not isinstance(dataset, list):
            raise ValueError(
                "dataset must be list when start_id and end_id are set")

        start_id = max(start_id, 0)
        if end_id <= 0:
            end_id = len(dataset)

        dataset = dataset[start_id:end_id]

    selected_dataset = []
    if len(item_ids) > 0:
        for item in dataset:
            item_id = item.fields["metadata"]["id"]
            if item_id in item_ids:
                selected_dataset.append(item)
            else:
                del item

        dataset = selected_dataset

    iterator_config = config.pop(
        'validation_iterator'
    ) if "validation_iterator" in config else config.pop('iterator')
    if batch_size > -1:
        if "base_iterator" in iterator_config:
            iterator_config["base_iterator"]["batch_size"] = batch_size
        else:
            iterator_config["batch_size"] = batch_size

    iterator = DataIterator.from_params(iterator_config)

    iterator.index_with(model.vocab)

    metrics = func_eval(model,
                        dataset,
                        iterator,
                        args.output_file,
                        file_mode=file_mode,
                        id_to_meta=item_ids_with_range,
                        feat_id_to_feat_name=feat_id_to_feat_name)

    if args.output_file:
        absolute_path = os.path.abspath(args.output_file)
        logging.info("Output saved to \n{}".format(absolute_path))
        with open(absolute_path + ".id2featname", mode="w") as fp:
            json.dump(feat_id_to_feat_name, fp)

    logger.info("Finished evaluating.")
    logger.info("Metrics:")
    for key, metric in metrics.items():
        logger.info("%s: %s", key, metric)

    return metrics
Ejemplo n.º 21
0
def evaluate(model: Model, instances: Iterable[Instance],
             data_iterator: DataIterator, cuda_device: int,
             serialization_dir: str, eval_suffix: str,
             batch_weight_key: str) -> Dict[str, Any]:
    check_for_gpu(cuda_device)
    nlp = spacy.load("en_core_web_lg")
    assert not os.path.exists(
        os.path.join(serialization_dir, f'generations{eval_suffix}.jsonl'))

    # caching saves us extra 30 minutes
    if 'goodnews' in serialization_dir:
        cache_path = 'data/goodnews/evaluation_cache.pkl'
    elif 'nytimes' in serialization_dir:
        cache_path = 'data/nytimes/evaluation_cache.pkl'
    if os.path.exists(cache_path):
        with open(cache_path, 'rb') as f:
            cache = pickle.load(f)
    else:
        cache = {}

    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances, num_epochs=1, shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(
            iterator, total=data_iterator.get_num_batches(instances))

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = nn_util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            loss = output_dict.get("loss")

            write_to_json(output_dict, serialization_dir, nlp, eval_suffix,
                          cache)

            metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
                # Report the average loss so far.
                metrics["loss"] = total_loss / total_weight

            if (not HasBeenWarned.tqdm_ignores_underscores and any(
                    metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                HasBeenWarned.tqdm_ignores_underscores = True
            description = ', '.join([
                "%s: %.2f" % (name, value)
                for name, value in metrics.items() if not name.startswith("_")
            ]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        final_metrics = model.get_metrics(reset=True)
        if loss_count > 0:
            # Sanity check
            # if loss_count != batch_count:
            #     raise RuntimeError("The model you are trying to evaluate only sometimes " +
            #                        "produced a loss!")
            final_metrics["loss"] = total_loss / total_weight

    if not os.path.exists(cache_path):
        with open(cache_path, 'wb') as f:
            pickle.dump(cache, f)

    return final_metrics
Ejemplo n.º 22
0
    dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
evaluation_data_path = args.input_file

embedding_sources: Dict[str, str] = (json.loads(args.embedding_sources_mapping)
                                     if args.embedding_sources_mapping else {})
if args.extend_vocab:
    logger.info("Vocabulary is being extended with test instances.")
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    instances = dataset_reader.read(evaluation_data_path)
    model.vocab.extend_from_instances(Params({}), instances=instances)
    model.extend_embedder_vocab(embedding_sources)

formalism = args.formalism
instances = dataset_reader.read([[formalism, args.input_file]])  # we need to give the formalism to amconll dataset_reader
model.train(False)
data_iterator = DataIterator.from_params(config.pop('iterator'))

with open (args.input_file) as f:
    conll_sentences = list(amconll_tools.parse_amconll(f))

predictions = dataset_reader.restore_order(forward_on_instances(model, instances, data_iterator))

i2edge_label = [model.vocab.get_token_from_index(i, namespace=formalism + "_head_tags") for i in
                range(model.vocab.get_vocab_size(formalism + "_head_tags"))]

i2supertag = [model.vocab.get_token_from_index(i, namespace=formalism+"_supertag_labels")
              for i in range(model.vocab.get_vocab_size(formalism+"_supertag_labels"))]

lexlabel2i = { model.vocab.get_token_from_index(i, namespace=formalism+"_lex_labels") : i
              for i in range(model.vocab.get_vocab_size(formalism+"_lex_labels"))}
Ejemplo n.º 23
0
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
    # Disable some of the more verbose logging statements
    logging.getLogger('allennlp.common.params').disabled = True
    logging.getLogger('allennlp.nn.initializers').disabled = True
    logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(
        logging.INFO)

    # Load from archive
    archive = load_archive(args.archive_file, args.cuda_device, args.overrides)
    config = archive.config
    prepare_environment(config)
    model = archive.model
    model.eval()

    # Load the evaluation data
    dataset_reader_config = config.pop('dataset_reader')

    if "evaluator_type" in config:
        eval_type = config.get("evaluator_type")
    else:
        dataset_reader_type = dataset_reader_config.get("type")
        eval_type = dataset_reader_type
    dataset_reader = DatasetReader.from_params(dataset_reader_config)

    evaluation_data_paths_list = []
    evaluation_data_short_names = []
    output_files_list = args.output_file.split(";")

    if args.evaluation_data_file:
        evaluation_data_paths_list.append(args.evaluation_data_file)
        evaluation_data_short_names.append("input")
    else:
        if "validation_data_path" in config:
            evaluation_data_paths_list.append(config["validation_data_path"])
            evaluation_data_short_names.append("dev")

        if "test_data_path" in config:
            evaluation_data_paths_list.append(config["test_data_path"])
            evaluation_data_short_names.append("test")

    metrics_out = {}

    iterator = DataIterator.from_params(config.pop("iterator"))
    iterator.index_with(model.vocab)

    for i in range(len(evaluation_data_paths_list)):
        evaluation_data_path = evaluation_data_paths_list[i]
        evaluation_data_short_name = evaluation_data_path if len(evaluation_data_short_names) - 1 < i \
                                                          else evaluation_data_short_names[i]

        if len(output_files_list) == len(evaluation_data_paths_list):
            out_file = output_files_list[i]
        else:
            out_file = "{0}_{1}.txt".format(output_files_list[0],
                                            evaluation_data_short_name)

        logger.info("Reading evaluation data from %s", evaluation_data_path)
        dataset = dataset_reader.read(evaluation_data_path)

        metrics = evaluate(model, dataset, iterator, args.cuda_device,
                           out_file, eval_type)
        if out_file is not None:
            logging.info("Predictions exported to {0}".format(out_file))

        logger.info("Finished evaluating.")
        logger.info("Metrics:")
        for key, metric in metrics.items():
            logger.info("%s: %s", key, metric)

        if len(evaluation_data_paths_list) == 1:
            metrics_out = metrics
        else:
            metrics_out[evaluation_data_short_name] = metrics

    return metrics_out
Ejemplo n.º 24
0
def evaluate(
        model: Model, instances: Iterable[Instance], task_name: str, data_iterator: DataIterator, cuda_device: int
) -> Dict[str, Any]:
    """
    Evaluate a model for a particular tasks (usually after training).
    
    Parameters
    ----------
    model : ``allennlp.models.model.Model``, required
        The model to evaluate
    instances : ``Iterable[Instance]``, required
        The (usually test) dataset on which to evalute the model.
    task_name : ``str``, required
        The name of the tasks on which evaluate the model.
    data_iterator : ``DataIterator``
        Iterator that go through the dataset.
    cuda_device : ``int``
        Cuda device to use.
        
    Returns
    -------
    metrics :  ``Dict[str, Any]``
        A dictionary containing the metrics on the evaluated dataset.
    """
    from train import TASKS_NAME

    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances, num_epochs=1, shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))

        eval_loss = 0
        nb_batches = 0
        for tensor_batch in generator_tqdm:
            nb_batches += 1

            # train_stages = ["stm", "sd", "valid"]
            # task_index = TASKS_NAME.index(task_name)
            # tensor_batch['task_index'] = torch.tensor(task_index)
            # tensor_batch["reverse"] = torch.tensor(False)
            # tensor_batch['for_training'] = torch.tensor(False)
            # train_stage = train_stages.index("stm")
            # tensor_batch['train_stage'] = torch.tensor(train_stage)
            # tensor_batch = move_to_device(tensor_batch, 0)
            # print(model)
            # print(tensor_batch.keys())
            # tensor_batch, task_name: str, epoch_trained=None, reverse=False, for_training=False
            eval_output_dict = model.forward(tensor_batch, task_name, for_training=False)
            loss = eval_output_dict["loss"]
            eval_loss += loss.item()
            metrics = model.get_metrics(task_name=task_name)
            metrics["stm_loss"] = float(eval_loss / nb_batches)

            description = training_util.description_from_metrics(metrics)
            generator_tqdm.set_description(description, refresh=False)

        metrics = model.get_metrics(task_name=task_name, reset=True)
        metrics["stm_loss"] = float(eval_loss / nb_batches)
        return metrics
Ejemplo n.º 25
0
def evaluate_from_file(archive_path,
                       model_path,
                       overrides=None,
                       eval_suffix='',
                       device=0):
    if archive_path.endswith('gz'):
        archive = load_archive(archive_path, device, overrides)
        config = archive.config
        prepare_environment(config)
        model = archive.model
        serialization_dir = os.path.dirname(archive_path)
    elif archive_path.endswith('yaml'):
        config = yaml_to_params(archive_path, overrides)
        prepare_environment(config)
        config_dir = os.path.dirname(archive_path)
        serialization_dir = os.path.join(config_dir, 'serialization')

    all_datasets = datasets_from_params(config)

    # We want to create the vocab from scratch since it might be of a
    # different type. Vocabulary.from_files will always create the base
    # Vocabulary instance.
    # if os.path.exists(os.path.join(serialization_dir, "vocabulary")):
    #     vocab_path = os.path.join(serialization_dir, "vocabulary")
    #     vocab = Vocabulary.from_files(vocab_path)

    vocab = Vocabulary.from_params(config.pop('vocabulary'))
    model = Model.from_params(vocab=vocab, params=config.pop('model'))

    if model_path:
        best_model_state = torch.load(model_path)
        model.load_state_dict(best_model_state)

    instances = all_datasets.get('test')
    iterator = DataIterator.from_params(config.pop("validation_iterator"))

    iterator.index_with(model.vocab)
    model.eval().to(device)
    model.evaluate_mode = True

    metrics = evaluate(model,
                       instances,
                       iterator,
                       device,
                       serialization_dir,
                       eval_suffix,
                       batch_weight_key='')

    logger.info("Finished evaluating.")
    logger.info("Metrics:")
    for key, metric in metrics.items():
        logger.info("%s: %s", key, metric)
        print("%s: %s", key, metric)

    output_file = os.path.join(serialization_dir,
                               f"evaluate-metrics3{eval_suffix}.json")
    print(metrics)
    if output_file:
        try:
            with open(output_file, "w") as file:
                json.dump(metrics, file, indent=4)
        except FileNotFoundError as file_not_found:
            print(file_not_found)
            with open(f"evaluate-metrics3{eval_suffix}.json", "w") as file:
                json.dump(metrics, file, indent=4)
                print(f"Written eval metrics to {os.path.curdir}")

    return metrics
Ejemplo n.º 26
0
def main():
    parser = arg_parser()

    args = parser.parse_args()

    temp_value = "1._1._1._1."
    optimizer = args.optimizer
    n_epochs = 10000

    if optimizer == 'adam':
        optimizer = Adam_optimizer(n_epochs)
    elif optimizer == 'lbfgs':
        optimizer = LBFGS_optimizer(n_epochs)
    else:
        print("Illegal optimizer {}, must be one of (adam, lbfgs)")
        return -2

    temp_value_list = [float(x) for x in temp_value.split("_")]
    n_layers = len(temp_value_list)

    model_file = args.model_file
    serialization_dir = "/".join(args.model_file.split("/")[:-1])
    evaluation_data_path = args.dev_file

    cuda_device = args.cuda_device

    # output file
    overrides = "{ iterator: {batch_size: 1}, model: {temperature_threshold: 1, scaling_temperature: '" + temp_value + "'}}"
    archive = load_archive(serialization_dir, cuda_device, overrides,
                           model_file)
    config = archive.config

    model = archive.model

    if cuda_device >= 0:
        model = model.cuda()
    model._scaling_temperatures = temp_value_list
    model.eval()
    vocab = model.vocab

    validation_dataset_reader_params = config.pop("validation_dataset_reader",
                                                  None)
    if validation_dataset_reader_params is not None:
        dataset_reader = DatasetReader.from_params(
            validation_dataset_reader_params)
    else:
        dataset_reader = DatasetReader.from_params(
            config.pop("dataset_reader"))

    instances = dataset_reader.read(evaluation_data_path)

    iterator_params = config.pop("iterator")

    data_iterator = DataIterator.from_params(iterator_params)

    data_iterator.index_with(vocab)

    iterator = data_iterator(instances, num_epochs=1, shuffle=False)

    generator_tqdm = Tqdm.tqdm(iterator,
                               total=data_iterator.get_num_batches(instances))

    temp = set_temperature(model, n_layers, optimizer, generator_tqdm,
                           cuda_device)

    print("\n\n######################################\n")
    print("#  Finished computing temperatures.  #\n")
    print("######################################\n")
    print("Values are: {}".format("_".join([str(x) for x in temp])))

    return 0
Ejemplo n.º 27
0
def evaluate_from_args(args: argparse.Namespace) -> Dict[str, Any]:
    # Disable some of the more verbose logging statements
    logging.getLogger('allennlp.common.params').disabled = True
    logging.getLogger('allennlp.nn.initializers').disabled = True
    logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(
        logging.INFO)

    logging.info("Parameters:")
    for arg in vars(args):
        logging.info("{0}: {1}".format(arg, getattr(args, arg)))

    # Load from archive
    cuda_device = args.cuda_device

    logging.info("cuda_device:{0}".format(cuda_device))
    archive = load_archive(args.archive_file,
                           cuda_device=cuda_device,
                           overrides=args.overrides)
    config = archive.config
    prepare_environment(config)
    model = archive.model
    model.eval()

    # Load the evaluation data
    dataset_reader = DatasetReader.from_params(
        config.pop('validation_dataset_reader') if "validation_dataset_reader"
        in config else config.pop('dataset_reader'))
    evaluation_data_path = args.evaluation_data_file
    logger.info("Reading evaluation data from %s", evaluation_data_path)

    batch_size = args.batch_size
    start_id = args.start_id
    end_id = args.end_id

    dataset = dataset_reader.read(evaluation_data_path)
    file_mode = args.file_open_mode
    if start_id > 0 or end_id > 0:
        if not isinstance(dataset, list):
            raise ValueError(
                "dataset must be list when start_id and end_id are set")

        start_id = max(start_id, 0)
        if end_id <= 0:
            end_id = len(dataset)

        dataset = dataset[start_id:end_id]

    iterator_config = config.pop(
        'validation_iterator'
    ) if "validation_iterator" in config else config.pop('iterator')
    if batch_size > -1:
        if "base_iterator" in iterator_config:
            iterator_config["base_iterator"]["batch_size"] = batch_size
        else:
            iterator_config["batch_size"] = batch_size

    iterator = DataIterator.from_params(iterator_config)

    iterator.index_with(model.vocab)

    metrics = evaluate(model,
                       dataset,
                       iterator,
                       args.output_file,
                       file_mode=file_mode)
    if args.output_file:
        absolute_path = os.path.abspath(args.output_file)
        logging.info("Output saved to \n{}".format(absolute_path))
    logger.info("Finished evaluating.")
    logger.info("Metrics:")
    for key, metric in metrics.items():
        logger.info("%s: %s", key, metric)

    return metrics
Ejemplo n.º 28
0
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int,
             batch_weight_key: str) -> Dict[str, Any]:
    _warned_tqdm_ignores_underscores = False
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances,
                                 num_epochs=1,
                                 shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            loss = output_dict.get("loss")

            metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
                # Report the average loss so far.
                metrics["loss"] = total_loss / total_weight

            if (not _warned_tqdm_ignores_underscores and
                        any(metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                _warned_tqdm_ignores_underscores = True
            description = ', '.join(["%s: %.2f" % (name, value) for name, value
                                     in metrics.items() if not name.startswith("_")]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        final_metrics = model.get_metrics(reset=True)
        if loss_count > 0:
            # Sanity check
            if loss_count != batch_count:
                raise RuntimeError("The model you are trying to evaluate only sometimes " +
                                   "produced a loss!")
            final_metrics["loss"] = total_loss / total_weight

        return final_metrics
def evaluate_from_args(args):
    logging.getLogger('allennlp.common.params').disabled = True
    logging.getLogger('allennlp.nn.initializers').disabled = True
    logging.getLogger('allennlp.modules.token_embedders.embedding').setLevel(
        logging.INFO)
    print(args.version)

    # Load from archive
    if args.unziped_archive_directory != "default":
        if args.elmo == True:
            model, config = _load_elmo(args.unziped_archive_directory,
                                       args.archive_file,
                                       weights_file=None,
                                       cuda_device=args.cuda_device)
        else:
            model, config = _load(args.unziped_archive_directory,
                                  weights_file=None,
                                  cuda_device=args.cuda_device)
    else:
        archive = load_archive(args.archive_file, args.cuda_device,
                               args.overrides, args.weights_file)
        config = archive.config
        prepare_environment(config)
        model = archive.model

    model.eval()

    # Load the evaluation data
    # Load evaluation dataset for multilingual evaluation.
    # The validation_dataset are called from the achieved model,
    # so you need to reload the multilingual file.
    validation_dataset_reader_params = config.pop('validation_dataset_reader',
                                                  None)
    if validation_dataset_reader_params is not None:
        dataset_reader = DatasetReader.from_params(
            validation_dataset_reader_params)
    else:
        dataset_reader = SQuADReaderML.from_params(
            config.pop('dataset_reader'))

        dataset_reader.set_nmt_models_resources(
            args.trans_embedding_model, args.trans_encdec_model,
            args.trans_train_source, args.trans_train_target,
            args.use_question_tag, args.replace_UNK, args.version,
            args.online_trans, args.beam, args.soft)

        if args.language == "Fr":
            dataset_reader.set_squad_test_resources_fr()
        elif args.language == "Ja":
            dataset_reader.set_squad_test_resources_ja()

        dataset_reader.set_google_translate_mode(args.use_google_translate)
        dataset_reader.set_bing_translate_mode(args.use_bing_translate)
    evaluation_data_path = args.evaluation_data_file
    logger.info("Reading evaluation data from %s", evaluation_data_path)

    # TODO: Fix this file path argument, it is not used and misleading.
    instances = dataset_reader.read("inputdata_tmp/ja_question_v2.csv")
    iterator = DataIterator.from_params(config.pop("iterator"))
    iterator.index_with(model.vocab)

    if args.use_google_translate:
        metrics = evaluate_mlqa_google_translate(model, instances, iterator,
                                                 args.cuda_device,
                                                 args.language, args.version)

    elif args.use_bing_translate:
        metrics = evaluate_mlqa_bing_translate(model, instances, iterator,
                                               args.cuda_device, args.language,
                                               args.version, "bing",
                                               args.back_trans_bing)

    else:
        if args.back_trans_ours == True:
            metrics = evaluate_mlqa_back_trans_ours(\
                model, instances, iterator, args.cuda_device, args.language,\
                args.version, args.enja_emb, args.enja_encdec,args.enja_train_source,args.enja_train_target)
        else:
            metrics = evaluate_mlqa(model, instances, iterator,
                                    args.cuda_device, args.language,
                                    args.version, args.trans_embedding_model,
                                    args.beam, args.soft)

    logger.info("Finished evaluating.")
    logger.info("Metrics:")
    for key, metric in metrics.items():
        logger.info("%s: %s", key, metric)

    return metrics
Ejemplo n.º 30
0
    # Load the evaluation data

    # Try to use the validation dataset reader if there is one - otherwise fall back
    # to the default dataset_reader used for both training and validation.
    validation_dataset_reader_params = config.pop('validation_dataset_reader', None)
    if validation_dataset_reader_params is not None:
        dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)
    else:
        dataset_reader = DatasetReader.from_params(config.pop('dataset_reader'))
    evaluation_data_path = args.input_file
    logger.info("Reading evaluation data from %s", evaluation_data_path)
    instances = dataset_reader.read(evaluation_data_path)

    config['iterator']['type'] = 'basic'
    del config['iterator']['sorting_keys']
    data_iterator = DataIterator.from_params(config.pop("iterator"))
    data_iterator.index_with(model.vocab)

    cuda_device = args.cuda_device

    #### EVALUATION AQUI


    model.eval()
    iterator = data_iterator(instances, num_epochs=1, shuffle=False, cuda_device=cuda_device, for_training=False)
    logger.info("Iterating over dataset")
    generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))

    label_probs = []
    for batch in generator_tqdm:
        lol = model(**batch)
Ejemplo n.º 31
0
    def from_params(params: Params, serialization_dir: str, recover: bool = False) -> 'TrainerPieces':
        all_datasets = multitask_datasets_from_params(params)
        datasets_for_vocab_creation = set(params.pop("datasets_for_vocab_creation", all_datasets))

        for dataset in datasets_for_vocab_creation:
            if dataset not in all_datasets:
                raise ConfigurationError(f"invalid 'dataset_for_vocab_creation' {dataset}")

        logger.info("From dataset instances, %s will be considered for vocabulary creation.",
                    ", ".join(datasets_for_vocab_creation))

        if recover and os.path.exists(os.path.join(serialization_dir, "vocabulary")):
            vocab = Vocabulary.from_files(os.path.join(serialization_dir, "vocabulary"))
            params.pop("vocabulary", {})
        else:
            vocab = Vocabulary.from_params(
                    params.pop("vocabulary", {}),
                    (instance for key, dataset in all_datasets.items()
                     for instance in dataset
                     if key in datasets_for_vocab_creation)
            )

        model = Model.from_params(vocab=vocab, params=params.pop('model'))

        # If vocab extension is ON for training, embedding extension should also be
        # done. If vocab and embeddings are already in sync, it would be a no-op.
        model.extend_embedder_vocab()

        # Initializing the model can have side effect of expanding the vocabulary
        vocab.save_to_files(os.path.join(serialization_dir, "vocabulary"))

        iterator = DataIterator.from_params(params.pop("iterator"))
        iterator.index_with(model.vocab)
        validation_iterator_params = params.pop("validation_iterator", None)
        if validation_iterator_params:
            validation_iterator = DataIterator.from_params(validation_iterator_params)
            validation_iterator.index_with(model.vocab)
        else:
            validation_iterator = None

        train_data = all_datasets['train']
        validation_data = all_datasets.get('validation')
        test_data = all_datasets.get('test')

        trainer_params = params.pop("trainer")
        no_grad_regexes = trainer_params.pop("no_grad", ())
        for name, parameter in model.named_parameters():
            if any(re.search(regex, name) for regex in no_grad_regexes):
                parameter.requires_grad_(False)

        frozen_parameter_names, tunable_parameter_names = \
                    get_frozen_and_tunable_parameter_names(model)
        logger.info("Following parameters are Frozen  (without gradient):")
        for name in frozen_parameter_names:
            logger.info(name)
        logger.info("Following parameters are Tunable (with gradient):")
        for name in tunable_parameter_names:
            logger.info(name)

        return MultiTaskTrainerPieces(model, iterator,
                             train_data, validation_data, test_data,
                             validation_iterator, trainer_params)
Ejemplo n.º 32
0
def evaluate(model: Model,
             instances: Iterable[Instance],
             data_iterator: DataIterator,
             cuda_device: int,
             batch_weight_key: str) -> Dict[str, Any]:
    check_for_gpu(cuda_device)
    with torch.no_grad():
        model.eval()

        iterator = data_iterator(instances,
                                 num_epochs=1,
                                 shuffle=False)
        logger.info("Iterating over dataset")
        generator_tqdm = Tqdm.tqdm(iterator, total=data_iterator.get_num_batches(instances))

        # Number of batches in instances.
        batch_count = 0
        # Number of batches where the model produces a loss.
        loss_count = 0
        # Cumulative weighted loss
        total_loss = 0.0
        # Cumulative weight across all batches.
        total_weight = 0.0

        for batch in generator_tqdm:
            batch_count += 1
            batch = nn_util.move_to_device(batch, cuda_device)
            output_dict = model(**batch)
            loss = output_dict.get("loss")

            metrics = model.get_metrics()

            if loss is not None:
                loss_count += 1
                if batch_weight_key:
                    weight = output_dict[batch_weight_key].item()
                else:
                    weight = 1.0

                total_weight += weight
                total_loss += loss.item() * weight
                # Report the average loss so far.
                metrics["loss"] = total_loss / total_weight

            if (not HasBeenWarned.tqdm_ignores_underscores and
                        any(metric_name.startswith("_") for metric_name in metrics)):
                logger.warning("Metrics with names beginning with \"_\" will "
                               "not be logged to the tqdm progress bar.")
                HasBeenWarned.tqdm_ignores_underscores = True
            description = ', '.join(["%s: %.2f" % (name, value) for name, value
                                     in metrics.items() if not name.startswith("_")]) + " ||"
            generator_tqdm.set_description(description, refresh=False)

        final_metrics = model.get_metrics(reset=True)
        if loss_count > 0:
            # Sanity check
            if loss_count != batch_count:
                raise RuntimeError("The model you are trying to evaluate only sometimes " +
                                   "produced a loss!")
            final_metrics["loss"] = total_loss / total_weight

        return final_metrics
Ejemplo n.º 33
0
def main():
    parse = argparse.ArgumentParser("")
    parse.add_argument("command", type=str, help="one of the following options train, evaluate, generalize")
    parse.add_argument("--datasets", type=str, help="", default=None)
    parse.add_argument("--model", type=str, help="", default=None)
    parse.add_argument("--serialization_dir", type=str, help="the directory storing the intermediate files and output", default=None)
    parse.add_argument("--cuda_device", type=str, help="Cuda device ID", default="-1")
    parse.add_argument("--split", type=str, help="dev / test", default="dev")
    parse.add_argument("--bert_type", type=str, help="Base / Large /", default="Base")
    parse.add_argument("--config", type=str, help="dev / test", default=None)
    parse.add_argument("--output_path", type=str, help="directory to which results JSONs of eval will written", default='results/eval/')
    parse.add_argument("--models_dir", type=str, help="directory containing the models used for eval , (please add '/' at the end)", default=None)
    parse.add_argument("--data_dir", type=str, help="directory containing the multiqa format datasets , (please add '/' at the end and make sure to have a headers directory with all headers under your specified path)", default='https://multiqa.s3.amazonaws.com/data/')
    parse.add_argument("--t_total", type=str, help="used for training, see BERT's learning rate schedule for details", default=None)
    parse.add_argument("--sample_size", type=str, help="used for sampling a subset of the training data", default=-1)
    parse.add_argument("--validation_sample_size", type=str, help="used for sampling a subset of the training data", default=-1)
    parse.add_argument("--batch_size", type=str, help="the batch size", default=8)
    parse.add_argument("--max_instances_in_memory", type=str, help="max number instances in memrory during training", default=5000)
    parse.add_argument("--num_epochs", type=str, help="", default=2)
    parse.add_argument("--lr", type=str, help="learning rate", default=0.00003)
    args = parse.parse_args()

    import_submodules("models")

    # TODO add best config for specific datasets as default, not one general default...
    if args.config is None:
        config = 'models/MultiQA_BERT' + args.bert_type + '.jsonnet'
    else:
        config = args.config
    config_params = Params(json.loads(_jsonnet.evaluate_file(config)))

    if args.command == 'train':
        # building the default dataset urls
        train_datasets = [args.data_dir + dataset + '_train.jsonl.gz' for dataset in args.datasets.split(',')]
        val_datasets = [args.data_dir + dataset + '_' + args.split + '.jsonl.gz' for dataset in args.datasets.split(',')]

        # calculating the t_total
        if args.t_total == None:
            logging.info('getting headers of the chosen dataset in order to compute learning rate schedule t_total')
            total_number_of_examples = 0
            for header_url in [args.data_dir + 'headers/' + dataset + '_train.json' for dataset in
                              args.datasets.split(',')]:
                with open(cached_path(header_url),'r') as f:
                    header = json.load(f)
                    total_number_of_examples += header['number_of_qas']
            t_total = int(total_number_of_examples / float(config_params['iterator']['batch_size']) \
                    * float(config_params['trainer']['num_epochs'])) \
                    / len(args.cuda_device.split(','))

        if args.serialization_dir is None:
            serialization_dir = 'models/' + args.datasets.replace(',','_') + f"num_epochs_{args.num_epochs}_batch_size_{args.batch_size}_lr_{args.lr}"
        else:
            serialization_dir = args.serialization_dir

            
        print(" >>>>>>>> overriding the parameters <<<<<<<<<<< ") 
        overrides = {
            'train_data_path': ','.join(train_datasets),
            'validation_data_path': ','.join(val_datasets),
            'dataset_reader': {
                'sample_size': args.sample_size, 
            },
            'validation_dataset_reader': {
                'sample_size': args.validation_sample_size,
            },
            'iterator': {
                'batch_size': args.batch_size,
                'max_instances_in_memory': args.max_instances_in_memory,
             },
            'trainer': {
                'cuda_device': args.cuda_device,
                'num_epochs': args.num_epochs,
                'optimizer': {
                  't_total': t_total, 
                  'lr': args.lr,
                }
            }
        }

        overrides_str = str(overrides).replace('True', 'true').replace('False', 'false')
        train_model_from_file(config, serialization_dir, overrides_str, True, False, True, "", "")
    elif args.command == 'evaluate':
        print(" evaluate . . . ")
        if args.models_dir is None:
            model_path = 'https://multiqa.s3.amazonaws.com/models/BERT' + args.bert_type + '/' + args.model + '.tar.gz'
        else:
            model_path = args.models_dir + args.model + '.tar.gz'
        model_cached_path = cached_path(model_path)

        print(" loading models . . . .")
        overrides_str = ''
        # Load from archive
        archive = load_archive(model_cached_path, int(args.cuda_device), overrides_str, '')
        prepare_environment(config_params)
        model = archive.model
        model.eval()

        print(" loading data . . . .")

        # Load the evaluation data
        validation_dataset_reader_params = config_params.get('validation_dataset_reader', None)
        dataset_reader = DatasetReader.from_params(validation_dataset_reader_params)

        # print(" *  *  *  *  *  *  * ")
        # print(validation_dataset_reader_params)
        # print(dataset_reader)

        print(" looping over datasets . . . .")

        # running over all validation datasets specified
        val_dataset_names = args.datasets.split(',')
        val_datasets = [args.data_dir + dataset + '_' + args.split + '.jsonl.gz' for dataset in val_dataset_names]

        for val_dataset_path,val_dataset_name in zip(val_datasets,val_dataset_names):
            print(f" * * * val_dataset_name: {val_dataset_name}")
            # This is a bit strange but there is a lot of config "popping" going on implicitly in allennlp
            # so we need to have the full config reloaded every iteration...
            config_params = Params(json.loads(_jsonnet.evaluate_file(config)))

            print("Reading evaluation data from %s", val_dataset_path)
            logger.info("Reading evaluation data from %s", val_dataset_path)
            instances = dataset_reader.read(val_dataset_path)

            # loading iterator
            iterator_params = config_params.get("validation_iterator", None)
            iterator = DataIterator.from_params(iterator_params)
            iterator.index_with(model.vocab)

            metrics = evaluate(model, instances, iterator, int(args.cuda_device), '')

            logger.info("Finished evaluating " + val_dataset_name)
            print("Finished evaluating " + val_dataset_name)
            logger.info("Metrics:")
            for key, metric in metrics.items():
                logger.info("%s: %s", key, metric)

            if not os.path.exists(args.output_path):
                os.makedirs(args.output_path)
            output_path = args.output_path + args.model + '_BERT' + args.bert_type + '_eval-on_' \
                          + val_dataset_name + '_' + args.split + '.json'
            with open(output_path, "w") as file:
                json.dump(metrics, file, indent=4)
        return metrics

    elif args.command == 'generalize':
        logging.error('The command %s is not yet supported' % (args.command))
    else:
        logging.error('The command %s is not supported' % (args.command))