def main(): parser = argparse.ArgumentParser( description='Program to compare MT results', ) parser.add_argument( '--ref-file', type=str, dest='ref_file', help= 'A path to a reference file over which the likelihoods are being computed/compared' ) parser.add_argument( '--ll-files', type=str, nargs='+', dest='ll_files', help= 'A path to file containing log likelihoods for ref-file generated by systems' ) parser.add_argument('--compare-word-likelihoods', type=str, dest='compare_word_likelihoods', nargs='*', default=['bucket_type=freq'], help=""" Compare word log likelihoods by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'print_word_likelihood_report' to see which arguments are available. """) parser.add_argument( '--decimals', type=int, default=4, help="Number of decimals to print for floating point numbers") args = parser.parse_args() # Set formatting # Set formatting formatting.fmt.set_decimals(args.decimals) ref = corpus_utils.load_tokens(args.ref_file) lls = [corpus_utils.load_nums(x) for x in args.ll_files] # Word likelihood analysis if args.compare_word_likelihoods: print_utils.print_header('Word Likelihood Analysis') for profile in args.compare_word_likelihoods: kargs = arg_utils.parse_profile(profile) print_word_likelihood_report(ref, lls, **kargs) print()
def main(): parser = argparse.ArgumentParser( description='Program to compare MT results', ) parser.add_argument('ref_file', type=str, help='A path to a correct reference file') parser.add_argument('out_files', type=str, nargs='+', help='Paths to system outputs') parser.add_argument('--sys_names', type=str, nargs='+', default=None, help='Names for each system, must be same number as output files') parser.add_argument('--src_file', type=str, default=None, help='A path to the source file') parser.add_argument('--fig_size', type=str, default='6x4.5', help='The size of figures, in "width x height" format.') parser.add_argument('--compare_scores', type=str, nargs='*', default=['score_type=bleu', 'score_type=length'], help=""" Compare scores. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_score_report' to see which arguments are available. """) parser.add_argument('--compare_word_accuracies', type=str, nargs='*', default=['bucket_type=freq'], help=""" Compare word accuracies by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_word_accuracy_report' to see which arguments are available. """) parser.add_argument('--compare_src_word_accuracies', type=str, nargs='*', default=None, help=""" Source analysis. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_src_word_accuracy_report' to see which arguments are available. """) parser.add_argument('--compare_sentence_buckets', type=str, nargs='*', default=['bucket_type=length,statistic_type=score,score_measure=bleu', 'bucket_type=lengthdiff', 'bucket_type=score,score_measure=sentbleu'], help=""" Compare sentence counts by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_sentence_buckets_report' to see which arguments are available. """) parser.add_argument('--compare_ngrams', type=str, nargs='*', default=['compare_type=match'], help=""" Compare ngrams. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_ngram_report' to see which arguments are available. """) parser.add_argument('--compare_sentence_examples', type=str, nargs='*', default=['score_type=sentbleu'], help=""" Compare sentences. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_sentence_examples' to see which arguments are available. """) parser.add_argument('--output_directory', type=str, default=None, help=""" A path to a directory where a graphical report will be saved. Open index.html in the directory to read the report. """) parser.add_argument('--decimals', type=int, default=4, help="Number of decimals to print for floating point numbers") args = parser.parse_args() # Set formatting formatting.fmt.set_decimals(args.decimals) ref = corpus_utils.load_tokens(args.ref_file) outs = [corpus_utils.load_tokens(x) for x in args.out_files] src = corpus_utils.load_tokens(args.src_file) if args.src_file else None reporters.sys_names = args.sys_names if args.sys_names else [f'sys{i+1}' for i in range(len(outs))] reporters.fig_size = tuple([float(x) for x in args.fig_size.split('x')]) if len(reporters.sys_names) != len(outs): raise ValueError(f'len(reporters.sys_names) != len(outs) -- {len(reporters.sys_names)} != {len(outs)}') reports = [] report_types = [ (args.compare_scores, generate_score_report, 'Aggregate Scores', False), (args.compare_word_accuracies, generate_word_accuracy_report, 'Word Accuracies', False), (args.compare_src_word_accuracies, generate_src_word_accuracy_report, 'Source Word Accuracies', True), (args.compare_sentence_buckets, generate_sentence_bucketed_report, 'Sentence Buckets', False)] if len(outs) > 1: report_types += [ (args.compare_ngrams, generate_ngram_report, 'Characteristic N-grams', False), (args.compare_sentence_examples, generate_sentence_examples, 'Sentence Examples', True), ] for arg, func, name, use_src in report_types: if arg is not None: if use_src: reports.append( (name, [func(ref, outs, src, **arg_utils.parse_profile(x)) for x in arg]) ) else: reports.append( (name, [func(ref, outs, **arg_utils.parse_profile(x)) for x in arg]) ) # Write all reports into a single html file if args.output_directory != None: reporters.generate_html_report(reports, args.output_directory)
def main(): parser = argparse.ArgumentParser( description='Program to compare MT results', ) parser.add_argument('ref_file', type=str, help='A path to a correct reference file') parser.add_argument('out_files', type=str, nargs='+', help='Paths to system outputs') parser.add_argument('--sys_names', type=str, nargs='+', default=None, help='Names for each system, must be same number as output files') parser.add_argument('--src_file', type=str, default=None, help='A path to the source file') parser.add_argument('--fig_size', type=str, default='6x4.5', help='The size of figures, in "width x height" format.') parser.add_argument('--compare_scores', type=str, nargs='*', default=['score_type=bleu', 'score_type=length'], help=""" Compare scores. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_score_report' to see which arguments are available. """) parser.add_argument('--compare_word_accuracies', type=str, nargs='*', default=['bucket_type=freq'], help=""" Compare word accuracies by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_word_accuracy_report' to see which arguments are available. """) parser.add_argument('--compare_src_word_accuracies', type=str, nargs='*', default=None, help=""" Source analysis. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_src_word_accuracy_report' to see which arguments are available. """) parser.add_argument('--compare_sentence_buckets', type=str, nargs='*', default=['bucket_type=length,statistic_type=score,score_measure=bleu', 'bucket_type=lengthdiff', 'bucket_type=score,score_measure=sentbleu'], help=""" Compare sentence counts by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_sentence_buckets_report' to see which arguments are available. """) parser.add_argument('--compare_ngrams', type=str, nargs='*', default=['compare_type=match'], help=""" Compare ngrams. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_ngram_report' to see which arguments are available. """) parser.add_argument('--compare_sentence_examples', type=str, nargs='*', default=['score_type=sentbleu'], help=""" Compare sentences. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_sentence_examples' to see which arguments are available. """) parser.add_argument('--compare_repetitions', type=str, nargs='*', default=None, help=""" Compare repetition statistics. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_repetitions_report' to see which arguments are available. """) parser.add_argument('--compare_repetition_examples', type=str, nargs='*', default=None, help=""" Compare sentences that contain repetitions. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_repetition_examples' to see which arguments are available. """) parser.add_argument('--output_directory', type=str, default=None, help=""" A path to a directory where a graphical report will be saved. Open index.html in the directory to read the report. """) parser.add_argument('--report_title', type=str, default='compare-mt Analysis Report', help=""" The name of the HTML report. """) parser.add_argument('--decimals', type=int, default=4, help="Number of decimals to print for floating point numbers") parser.add_argument('--scorer_scale', type=float, default=100, choices=[1, 100], help="Set the scale of BLEU, METEOR, WER and chrF to 0-1 or 0-100 (default 0-100)") parser.add_argument('--lang_id', type=str, nargs='*', default=None, help=""" Use language identification on output. Can specify arguments in 'arg1=val1,arg2=val2,...' format. Arguments: model=[wtl,langid], min_length=int, print_lines=[True,False], print_line_numbers=[True,False] Set minimum length for segments to be analyzed with language identification (the shorter the segment, the more unreliable the analysis), default=5. """) args = parser.parse_args() # Set formatting formatting.fmt.set_decimals(args.decimals) # Set scale scorers.global_scorer_scale = args.scorer_scale ref = corpus_utils.load_tokens(args.ref_file) outs = [corpus_utils.load_tokens(x) for x in args.out_files] src = corpus_utils.load_tokens(args.src_file) if args.src_file else None reporters.sys_names = args.sys_names if args.sys_names else [f'sys{i+1}' for i in range(len(outs))] reporters.fig_size = tuple([float(x) for x in args.fig_size.split('x')]) if len(reporters.sys_names) != len(outs): raise ValueError(f'len(sys_names) != len(outs) -- {len(reporters.sys_names)} != {len(outs)}') reports = [] report_types = [ (args.compare_scores, generate_score_report, 'Aggregate Scores', False), (args.compare_word_accuracies, generate_word_accuracy_report, 'Word Accuracies', False), (args.compare_src_word_accuracies, generate_src_word_accuracy_report, 'Source Word Accuracies', True), (args.compare_sentence_buckets, generate_sentence_bucketed_report, 'Sentence Buckets', False), (args.compare_repetitions, generate_repetitions_report, 'Repetition Statistics', True), (args.compare_repetition_examples, generate_repetitions_examples, 'Repetition Examples', True), (args.lang_id, generate_lang_id_report, 'Language Identification', False)] if len(outs) > 1: report_types += [ (args.compare_ngrams, generate_ngram_report, 'Characteristic N-grams', False), (args.compare_sentence_examples, generate_sentence_examples, 'Sentence Examples', True), ] for arg, func, name, use_src in report_types: if arg is not None: if use_src: reports.append( (name, [func(ref, outs, src, **arg_utils.parse_profile(x)) for x in arg]) ) else: reports.append( (name, [func(ref, outs, **arg_utils.parse_profile(x)) for x in arg]) ) # Write all reports into a single html file if args.output_directory != None: reporters.generate_html_report(reports, args.output_directory, args.report_title)
def main(): parser = argparse.ArgumentParser( description='Program to compare MT results', epilog=f'For more details, see {source_code_url}') parser.add_argument('ref_file', type=str, help='A path to a correct reference file') parser.add_argument('out_files', type=str, nargs='+', help='Paths to system outputs') parser.add_argument( '--sys_names', type=str, nargs='+', default=None, help='Names for each system, must be same number as output files') parser.add_argument('--src_file', type=str, default=None, help='A path to the source file') parser.add_argument( '--fig_size', type=str, default='6x4.5', help='The size of figures, in "width x height" format.') parser.add_argument('--compare_scores', type=str, nargs='*', default=['score_type=bleu', 'score_type=length'], help=""" Compare scores. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_score_report' to see which arguments are available. """) parser.add_argument('--compare_word_accuracies', type=str, nargs='*', default=['bucket_type=freq'], help=""" Compare word accuracies by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_word_accuracy_report' to see which arguments are available. """) parser.add_argument('--compare_src_word_accuracies', type=str, nargs='*', default=None, help=""" Source analysis. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_src_word_accuracy_report' to see which arguments are available. """) parser.add_argument( '--compare_sentence_buckets', type=str, nargs='*', default=[ 'bucket_type=length,statistic_type=score,score_measure=bleu', 'bucket_type=lengthdiff', 'bucket_type=score,score_measure=sentbleu' ], help=""" Compare sentence counts by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_sentence_buckets_report' to see which arguments are available. """) parser.add_argument('--compare_ngrams', type=str, nargs='*', default=['compare_type=match'], help=""" Compare ngrams. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_ngram_report' to see which arguments are available. """) parser.add_argument('--compare_sentence_examples', type=str, nargs='*', default=['score_type=sentbleu'], help=""" Compare sentences. Can specify arguments in 'arg1=val1,arg2=val2,...' format. See documentation for 'generate_sentence_examples' to see which arguments are available. """) parser.add_argument('--output_directory', type=str, default=None, help=""" A path to a directory where a graphical report will be saved. Open index.html in the directory to read the report. """) parser.add_argument('--report_title', type=str, default='compare-mt Analysis Report', help=""" The name of the HTML report. """) parser.add_argument( '--decimals', type=int, default=4, help="Number of decimals to print for floating point numbers") parser.add_argument('--seed', type=int, default=None, help="Seed for random number generation") parser.add_argument( '--scorer_scale', type=float, default=100, choices=[1, 100], help= "Set the scale of BLEU, METEOR, WER, chrF and COMET to 0-1 or 0-100 (default 0-100)" ) parser.add_argument( '--http', type=int, dest='bind_port', help='Launch an HTTP server at specified port to view results.' 'Disabled by default, but specifying a port number enabled it.') parser.add_argument('-v', '--version', action='version', version=f'%(prog)s {__version__}') args = parser.parse_args() # Set formatting formatting.fmt.set_decimals(args.decimals) # Set random seed if args.seed is not None: npr.seed(args.seed) # Set scale scorers.global_scorer_scale = args.scorer_scale ref = corpus_utils.load_tokens(args.ref_file) outs = [corpus_utils.load_tokens(x) for x in args.out_files] src = corpus_utils.load_tokens(args.src_file) if args.src_file else None reporters.sys_names = args.sys_names if args.sys_names else [ f'sys{i+1}' for i in range(len(outs)) ] reporters.fig_size = tuple([float(x) for x in args.fig_size.split('x')]) if len(reporters.sys_names) != len(outs): raise ValueError( f'len(sys_names) != len(outs) -- {len(reporters.sys_names)} != {len(outs)}' ) reports = [] report_types = [ (args.compare_scores, generate_score_report, 'Aggregate Scores', True), (args.compare_word_accuracies, generate_word_accuracy_report, 'Word Accuracies', False), (args.compare_src_word_accuracies, generate_src_word_accuracy_report, 'Source Word Accuracies', True), (args.compare_sentence_buckets, generate_sentence_bucketed_report, 'Sentence Buckets', True) ] if len(outs) > 1: report_types += [ (args.compare_ngrams, generate_ngram_report, 'Characteristic N-grams', False), (args.compare_sentence_examples, generate_sentence_examples, 'Sentence Examples', True), ] for arg, func, name, use_src in report_types: if arg is not None: if use_src: reports.append((name, [ func(ref, outs, src, **arg_utils.parse_profile(x)) for x in arg ])) else: reports.append((name, [ func(ref, outs, **arg_utils.parse_profile(x)) for x in arg ])) # Write all reports into a single html file if args.output_directory != None: reporters.generate_html_report(reports, args.output_directory, args.report_title) if args.bind_port: out_dir = args.output_directory if not out_dir: out_dir = tempfile.TemporaryDirectory().name reporters.generate_html_report(reports, out_dir, args.report_title) reporters.launch_http_server(out_dir, bind_port=args.bind_port)