예제 #1
0
    def run(self, args):
        prepare_global_logging(file_path=args.log_file, silent=args.silent)

        if args.random_seed is not None:
            np.random.seed(args.random_seed)
            random.seed(args.random_seed + 1)

        two_tailed = args.num_tails == 2
        alpha = 1.0 - args.confidence / 100
        results = run_hypothesis_tests(
            args.metrics_jsonl_files,
            args.dependent_metric,
            args.metric_A,
            args.metric_B,
            args.summarizer_type,
            test_method=args.hypothesis_test,
            alpha=alpha,
            two_tailed=two_tailed,
            skip_summary_level=args.skip_summary_level,
            skip_system_level=args.skip_system_level,
            skip_global=args.skip_global)

        if args.output_file:
            dirname = os.path.dirname(args.output_file)
            if dirname:
                os.makedirs(dirname, exist_ok=True)
            with open(args.output_file, 'w') as out:
                out.write(json.dumps(results, indent=2))

        if not args.silent:
            logger.info(json.dumps(results, indent=2))
예제 #2
0
    def run(self, args):
        prepare_global_logging(file_path=args.log_file, silent=args.silent)

        metric1, metric2 = args.metrics
        two_tailed = args.num_tails == 2
        alpha = 1.0 - args.confidence / 100
        ci_kwargs = json.loads(args.confidence_interval_kwargs)
        results = compute_correlation(
            args.metrics_jsonl_files,
            metric1,
            metric2,
            args.summarizer_type,
            skip_summary_level=args.skip_summary_level,
            skip_system_level=args.skip_system_level,
            skip_global=args.skip_global,
            system_level_output_plot=args.system_level_output_plot,
            global_output_plot=args.global_output_plot,
            ci_method=args.confidence_interval_method,
            alpha=alpha,
            two_tailed=two_tailed,
            ci_kwargs=ci_kwargs)

        if args.output_file:
            dirname = os.path.dirname(args.output_file)
            if dirname:
                os.makedirs(dirname, exist_ok=True)
            with open(args.output_file, 'w') as out:
                out.write(json.dumps(results, indent=2))

        if not args.silent:
            logger.info(json.dumps(results, indent=2))
예제 #3
0
    def run_score(self, args: argparse.Namespace) -> None:
        prepare_global_logging(file_path=args.log_file, silent=args.silent)

        dataset_reader = get_dataset_reader_from_argument(args.dataset_reader)
        metric = get_metric_from_arguments(self.metric_type, args)
        input_files = args.input_files

        instances = dataset_reader.read(*input_files)
        metrics_dicts = score_instances(instances, [metric])

        save_score_results(metrics_dicts, args.output_jsonl, args.silent)
예제 #4
0
    def run_evaluate(self, args: argparse.Namespace) -> None:
        prepare_global_logging(file_path=args.log_file, silent=args.silent)

        dataset_reader = get_dataset_reader_from_argument(args.dataset_reader)
        metric = get_metric_from_arguments(self.metric_type, args)
        input_files = args.input_files

        instances = dataset_reader.read(*input_files)
        macro, micro_list = evaluate_instances(instances, [metric])

        save_evaluation_results(macro, micro_list, args.macro_output_json,
                                args.micro_output_jsonl, args.silent)
예제 #5
0
    def run(self, args):
        prepare_global_logging(file_path=args.log_file, silent=args.silent)

        results = run_all_partial_conjunction_pvalue_test(
            args.method, args.pvalue_json_files, args.names, alpha=args.alpha)

        if args.output_file:
            dirname = os.path.dirname(args.output_file)
            if dirname:
                os.makedirs(dirname, exist_ok=True)
            with open(args.output_file, 'w') as out:
                out.write(json.dumps(results, indent=2))

        if not args.silent:
            logger.info(json.dumps(results, indent=2))
예제 #6
0
    def run(self, args):
        prepare_global_logging(file_path=args.log_file, silent=args.silent)

        import_module_and_submodules('sacrerouge')
        include_packages = args.include_packages or []
        for package in include_packages:
            import_module_and_submodules(package)

        params = Params.from_file(args.config, args.overrides)
        dataset_reader = DatasetReader.from_params(params.pop('dataset_reader'))
        metrics = _load_metrics(params)

        input_files = params.pop('input_files')
        if isinstance(input_files, str):
            input_files = [input_files]

        instances = dataset_reader.read(*input_files)
        metrics_dicts = score_instances(instances, metrics, args.disable_peer_jackknifing)

        save_score_results(metrics_dicts, args.output_jsonl, args.silent)
예제 #7
0
    def run(self, args):
        prepare_global_logging(file_path=args.log_file, silent=args.silent)

        correlations_A = json.load(open(args.summary_level_correlations_A,
                                        'r'))
        correlations_B = json.load(open(args.summary_level_correlations_B,
                                        'r'))

        results = run_wilcoxon_tests(correlations_A,
                                     correlations_B,
                                     alternative=args.alternative)

        if args.output_file:
            dirname = os.path.dirname(args.output_file)
            if dirname:
                os.makedirs(dirname, exist_ok=True)
            with open(args.output_file, 'w') as out:
                out.write(json.dumps(results, indent=2))

        if not args.silent:
            logger.info(json.dumps(results, indent=2))
예제 #8
0
    def run(self, args):
        prepare_global_logging(file_path=args.log_file, silent=args.silent)

        import_module_and_submodules('sacrerouge')
        include_packages = args.include_packages or []
        for package in include_packages:
            import_module_and_submodules(package)

        params = Params.from_file(args.config, args.overrides)
        dataset_reader = DatasetReader.from_params(
            params.pop('dataset_reader'))
        metrics = load_metrics(params)

        input_files = params.pop('input_files')
        if isinstance(input_files, str):
            input_files = [input_files]

        instances = dataset_reader.read(*input_files)
        macro, micro_list = evaluate_instances(instances, metrics)

        save_evaluation_results(macro, micro_list, args.macro_output_json,
                                args.micro_output_jsonl, args.silent)
예제 #9
0
    def run(self, args):
        prepare_global_logging(file_path=args.log_file, silent=args.silent)

        metric1, metric2 = args.metrics
        return_all_summary_level = args.summary_level_correlations_output is not None
        results = compute_correlation(
            args.metrics_jsonl_files,
            metric1,
            metric2,
            args.summarizer_type,
            return_all_summary_level=return_all_summary_level,
            skip_summary_level=args.skip_summary_level,
            skip_system_level=args.skip_system_level,
            skip_global=args.skip_global,
            system_level_output_plot=args.system_level_output_plot,
            global_output_plot=args.global_output_plot)

        # Strip off the original results from the individual summary correlations
        if return_all_summary_level:
            results, all_summary_level = results

        if args.output_file:
            dirname = os.path.dirname(args.output_file)
            if dirname:
                os.makedirs(dirname, exist_ok=True)
            with open(args.output_file, 'w') as out:
                out.write(json.dumps(results, indent=2))

        if not args.silent:
            logger.info(json.dumps(results, indent=2))

        # Save the individual summary-level correlations if the output file is provided. `all_summary_level`
        # should only be defined if `return_all_summary_level` is true
        if return_all_summary_level:
            with open(args.summary_level_correlations_output, 'w') as out:
                out.write(json.dumps(all_summary_level, indent=2))