Python evaluation Examples, neuralmonkey.learning_utils.evaluation Python Examples

Example #1

0

Show file

    def evaluate(self,
                 dataset: Dataset,
                 write_out: bool = False,
                 log_progress: int = 0,
                 name: str = None) -> Dict[str, Any]:
        """Run the model on a given dataset and evaluate the outputs.

        Args:
            dataset: The dataset on which the model will be executed.
            write_out: Flag whether the outputs should be printed to a file
                defined in the dataset object.
            log_progress: log progress every X seconds
            name: The name of the evaluated dataset

        Returns:
            Dictionary of evaluation names and their values which includes the
            metrics applied on respective series loss and loss values from the
            run.
        """
        execution_results, output_data, f_dataset = self.run_model(
            dataset, write_out, log_progress)

        evaluators = [(e[0], e[0], e[1]) if len(e) == 2 else e
                      for e in self.model.evaluation]
        with self.graph.as_default():
            eval_result = evaluation(evaluators, f_dataset, execution_results,
                                     output_data)
        if eval_result:
            print_final_evaluation(eval_result, name)

        return eval_result

Example #2

0

Show file

def main() -> None:
    # pylint: disable=no-member,broad-except
    if len(sys.argv) != 3:
        print("Usage: run.py <run_ini_file> <test_datasets>")
        exit(1)

    test_datasets = Configuration()
    test_datasets.add_argument('test_datasets')
    test_datasets.add_argument('variables')

    CONFIG.load_file(sys.argv[1])
    CONFIG.build_model()
    test_datasets.load_file(sys.argv[2])
    test_datasets.build_model()
    datesets_model = test_datasets.model
    initialize_for_running(CONFIG.model.output, CONFIG.model.tf_manager,
                           datesets_model.variables)

    print("")

    evaluators = [(e[0], e[0], e[1]) if len(e) == 2 else e
                  for e in CONFIG.model.evaluation]

    for dataset in datesets_model.test_datasets:
        execution_results, output_data = run_on_dataset(
            CONFIG.model.tf_manager,
            CONFIG.model.runners,
            dataset,
            CONFIG.model.postprocess,
            write_out=True)
        # TODO what if there is no ground truth
        eval_result = evaluation(evaluators, dataset, CONFIG.model.runners,
                                 execution_results, output_data)
        if eval_result:
            print_final_evaluation(dataset.name, eval_result)

Example #3

0

Show file

File: experiment.py Project: ufal/neuralmonkey

    def evaluate(self,
                 dataset: Dataset,
                 write_out: bool = False,
                 log_progress: int = 0,
                 name: str = None) -> Dict[str, Any]:
        """Run the model on a given dataset and evaluate the outputs.

        Args:
            dataset: The dataset on which the model will be executed.
            write_out: Flag whether the outputs should be printed to a file
                defined in the dataset object.
            log_progress: log progress every X seconds
            name: The name of the evaluated dataset

        Returns:
            Dictionary of evaluation names and their values which includes the
            metrics applied on respective series loss and loss values from the
            run.
        """
        execution_results, output_data, f_dataset = self.run_model(
            dataset, write_out, log_progress)

        evaluators = [(e[0], e[0], e[1]) if len(e) == 2 else e
                      for e in self.model.evaluation]
        with self.graph.as_default():
            eval_result = evaluation(
                evaluators, f_dataset, execution_results, output_data)
        if eval_result:
            print_final_evaluation(eval_result, name)

        return eval_result

Example #4

0

Show file

def main() -> None:
    # pylint: disable=no-member,broad-except
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("config",
                        metavar="INI-FILE",
                        help="the configuration file of the experiment")
    parser.add_argument('datasets',
                        metavar='INI-TEST-DATASETS',
                        help="the configuration of the test datasets")
    parser.add_argument("-g",
                        "--grid",
                        dest="grid",
                        action="store_true",
                        help="look at the SGE variables for slicing the data")
    args = parser.parse_args()

    test_datasets = Configuration()
    test_datasets.add_argument('test_datasets')
    test_datasets.add_argument('variables')

    CONFIG.load_file(args.config)
    CONFIG.build_model()
    test_datasets.load_file(args.datasets)
    test_datasets.build_model()
    datasets_model = test_datasets.model
    initialize_for_running(CONFIG.model.output, CONFIG.model.tf_manager,
                           datasets_model.variables)

    print("")

    evaluators = [(e[0], e[0], e[1]) if len(e) == 2 else e
                  for e in CONFIG.model.evaluation]

    if args.grid and len(datasets_model.test_datasets) > 1:
        raise ValueError("Only one test dataset supported when using --grid")

    for dataset in datasets_model.test_datasets:
        if args.grid:
            if ("SGE_TASK_FIRST" not in os.environ
                    or "SGE_TASK_LAST" not in os.environ
                    or "SGE_TASK_STEPSIZE" not in os.environ
                    or "SGE_TASK_ID" not in os.environ):
                raise EnvironmentError(
                    "Some SGE environment variables are missing")

            length = int(os.environ["SGE_TASK_STEPSIZE"])
            start = int(os.environ["SGE_TASK_ID"]) - 1
            end = int(os.environ["SGE_TASK_LAST"]) - 1

            if start + length > end:
                length = end - start + 1

            log("Running grid task {} starting at {} with step {}".format(
                start // length, start, length))

            dataset = dataset.subset(start, length)

        if CONFIG.model.runners_batch_size is None:
            runners_batch_size = CONFIG.model.batch_size
        else:
            runners_batch_size = CONFIG.model.runners_batch_size

        execution_results, output_data = run_on_dataset(
            CONFIG.model.tf_manager,
            CONFIG.model.runners,
            dataset,
            CONFIG.model.postprocess,
            write_out=True,
            batch_size=runners_batch_size,
            log_progress=60)
        # TODO what if there is no ground truth
        eval_result = evaluation(evaluators, dataset, CONFIG.model.runners,
                                 execution_results, output_data)
        if eval_result:
            print_final_evaluation(dataset.name, eval_result)