Exemple #1
0
def main():
    parser = argparse.ArgumentParser(
        description='Program to compare MT results', )
    parser.add_argument(
        '--ref-file',
        type=str,
        dest='ref_file',
        help=
        'A path to a reference file over which the likelihoods are being computed/compared'
    )
    parser.add_argument(
        '--ll-files',
        type=str,
        nargs='+',
        dest='ll_files',
        help=
        'A path to file containing log likelihoods for ref-file generated by systems'
    )
    parser.add_argument('--compare-word-likelihoods',
                        type=str,
                        dest='compare_word_likelihoods',
                        nargs='*',
                        default=['bucket_type=freq'],
                        help="""
                    Compare word log likelihoods by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                    See documentation for 'print_word_likelihood_report' to see which arguments are available.
                    """)
    parser.add_argument(
        '--decimals',
        type=int,
        default=4,
        help="Number of decimals to print for floating point numbers")

    args = parser.parse_args()

    # Set formatting

    # Set formatting
    formatting.fmt.set_decimals(args.decimals)

    ref = corpus_utils.load_tokens(args.ref_file)
    lls = [corpus_utils.load_nums(x) for x in args.ll_files]

    # Word likelihood analysis
    if args.compare_word_likelihoods:
        print_utils.print_header('Word Likelihood Analysis')
        for profile in args.compare_word_likelihoods:
            kargs = arg_utils.parse_profile(profile)
            print_word_likelihood_report(ref, lls, **kargs)
            print()
Exemple #2
0
def main():
  parser = argparse.ArgumentParser(
      description='Program to compare MT results',
  )
  parser.add_argument('ref_file', type=str,
                      help='A path to a correct reference file')
  parser.add_argument('out_files', type=str, nargs='+',
                      help='Paths to system outputs')
  parser.add_argument('--sys_names', type=str, nargs='+', default=None,
                      help='Names for each system, must be same number as output files')
  parser.add_argument('--src_file', type=str, default=None,
                      help='A path to the source file')
  parser.add_argument('--fig_size', type=str, default='6x4.5',
                      help='The size of figures, in "width x height" format.')
  parser.add_argument('--compare_scores', type=str, nargs='*',
                      default=['score_type=bleu', 'score_type=length'],
                      help="""
                      Compare scores. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_score_report' to see which arguments are available.
                      """)
  parser.add_argument('--compare_word_accuracies', type=str, nargs='*',
                      default=['bucket_type=freq'],
                      help="""
                      Compare word accuracies by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_word_accuracy_report' to see which arguments are available.
                      """)
  parser.add_argument('--compare_src_word_accuracies', type=str, nargs='*',
                      default=None,
                      help="""
                      Source analysis. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_src_word_accuracy_report' to see which arguments are available.
                      """)
  parser.add_argument('--compare_sentence_buckets', type=str, nargs='*',
                      default=['bucket_type=length,statistic_type=score,score_measure=bleu',
                               'bucket_type=lengthdiff',
                               'bucket_type=score,score_measure=sentbleu'],
                      help="""
                      Compare sentence counts by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_sentence_buckets_report' to see which arguments are available.
                      """)
  parser.add_argument('--compare_ngrams', type=str, nargs='*',
                      default=['compare_type=match'],
                      help="""
                      Compare ngrams. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_ngram_report' to see which arguments are available.
                      """)
  parser.add_argument('--compare_sentence_examples', type=str, nargs='*',
                      default=['score_type=sentbleu'],
                      help="""
                      Compare sentences. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_sentence_examples' to see which arguments are available.
                      """)
  parser.add_argument('--output_directory', type=str, default=None,
                      help="""
                      A path to a directory where a graphical report will be saved. Open index.html in the directory
                      to read the report.
                      """)
  parser.add_argument('--decimals', type=int, default=4,
                      help="Number of decimals to print for floating point numbers")
  args = parser.parse_args()

  # Set formatting
  formatting.fmt.set_decimals(args.decimals)

  ref = corpus_utils.load_tokens(args.ref_file)
  outs = [corpus_utils.load_tokens(x) for x in args.out_files]

  src = corpus_utils.load_tokens(args.src_file) if args.src_file else None 
  reporters.sys_names = args.sys_names if args.sys_names else [f'sys{i+1}' for i in range(len(outs))]
  reporters.fig_size = tuple([float(x) for x in args.fig_size.split('x')])
  if len(reporters.sys_names) != len(outs):
    raise ValueError(f'len(reporters.sys_names) != len(outs) -- {len(reporters.sys_names)} != {len(outs)}')

  reports = []

  report_types = [
    (args.compare_scores, generate_score_report, 'Aggregate Scores', False),
    (args.compare_word_accuracies, generate_word_accuracy_report, 'Word Accuracies', False),
    (args.compare_src_word_accuracies, generate_src_word_accuracy_report, 'Source Word Accuracies', True),
    (args.compare_sentence_buckets, generate_sentence_bucketed_report, 'Sentence Buckets', False)]
  if len(outs) > 1:
    report_types += [
      (args.compare_ngrams, generate_ngram_report, 'Characteristic N-grams', False),
      (args.compare_sentence_examples, generate_sentence_examples, 'Sentence Examples', True),
    ]

  for arg, func, name, use_src in report_types:
    if arg is not None:
      if use_src:
        reports.append( (name, [func(ref, outs, src, **arg_utils.parse_profile(x)) for x in arg]) )
      else:
        reports.append( (name, [func(ref, outs, **arg_utils.parse_profile(x)) for x in arg]) )

  # Write all reports into a single html file
  if args.output_directory != None:
    reporters.generate_html_report(reports, args.output_directory)
def main():
  parser = argparse.ArgumentParser(
      description='Program to compare MT results',
  )
  parser.add_argument('ref_file', type=str,
                      help='A path to a correct reference file')
  parser.add_argument('out_files', type=str, nargs='+',
                      help='Paths to system outputs')
  parser.add_argument('--sys_names', type=str, nargs='+', default=None,
                      help='Names for each system, must be same number as output files')
  parser.add_argument('--src_file', type=str, default=None,
                      help='A path to the source file')
  parser.add_argument('--fig_size', type=str, default='6x4.5',
                      help='The size of figures, in "width x height" format.')
  parser.add_argument('--compare_scores', type=str, nargs='*',
                      default=['score_type=bleu', 'score_type=length'],
                      help="""
                      Compare scores. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_score_report' to see which arguments are available.
                      """)
  parser.add_argument('--compare_word_accuracies', type=str, nargs='*',
                      default=['bucket_type=freq'],
                      help="""
                      Compare word accuracies by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_word_accuracy_report' to see which arguments are available.
                      """)
  parser.add_argument('--compare_src_word_accuracies', type=str, nargs='*',
                      default=None,
                      help="""
                      Source analysis. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_src_word_accuracy_report' to see which arguments are available.
                      """)
  parser.add_argument('--compare_sentence_buckets', type=str, nargs='*',
                      default=['bucket_type=length,statistic_type=score,score_measure=bleu',
                               'bucket_type=lengthdiff',
                               'bucket_type=score,score_measure=sentbleu'],
                      help="""
                      Compare sentence counts by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_sentence_buckets_report' to see which arguments are available.
                      """)
  parser.add_argument('--compare_ngrams', type=str, nargs='*',
                      default=['compare_type=match'],
                      help="""
                      Compare ngrams. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_ngram_report' to see which arguments are available.
                      """)
  parser.add_argument('--compare_sentence_examples', type=str, nargs='*',
                      default=['score_type=sentbleu'],
                      help="""
                      Compare sentences. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_sentence_examples' to see which arguments are available.
                      """)
  parser.add_argument('--compare_repetitions', type=str, nargs='*',
                      default=None,
                      help="""
                        Compare repetition statistics. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                        See documentation for 'generate_repetitions_report' to see which arguments are available.
                        """)
  parser.add_argument('--compare_repetition_examples', type=str, nargs='*',
                      default=None,
                      help="""
                        Compare sentences that contain repetitions. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                        See documentation for 'generate_repetition_examples' to see which arguments are available.
                        """)

  parser.add_argument('--output_directory', type=str, default=None,
                      help="""
                      A path to a directory where a graphical report will be saved. Open index.html in the directory
                      to read the report.
                      """)
  parser.add_argument('--report_title', type=str, default='compare-mt Analysis Report',
                      help="""
                      The name of the HTML report.
                      """)
  parser.add_argument('--decimals', type=int, default=4,
                      help="Number of decimals to print for floating point numbers")
  parser.add_argument('--scorer_scale', type=float, default=100, choices=[1, 100],
                      help="Set the scale of BLEU, METEOR, WER and chrF to 0-1 or 0-100 (default 0-100)")
  parser.add_argument('--lang_id', type=str, nargs='*', default=None,
                      help="""
                      Use language identification on output. Can specify arguments in 'arg1=val1,arg2=val2,...' format. 
                      Arguments: model=[wtl,langid], min_length=int, print_lines=[True,False], print_line_numbers=[True,False]
                      Set minimum length for segments to be analyzed with language identification (the shorter the segment, the more unreliable the analysis), default=5.
                      """) 
  args = parser.parse_args()

  # Set formatting
  formatting.fmt.set_decimals(args.decimals)

  # Set scale
  scorers.global_scorer_scale = args.scorer_scale

  ref = corpus_utils.load_tokens(args.ref_file)
  outs = [corpus_utils.load_tokens(x) for x in args.out_files]

  src = corpus_utils.load_tokens(args.src_file) if args.src_file else None 
  reporters.sys_names = args.sys_names if args.sys_names else [f'sys{i+1}' for i in range(len(outs))]
  reporters.fig_size = tuple([float(x) for x in args.fig_size.split('x')])
  if len(reporters.sys_names) != len(outs):
    raise ValueError(f'len(sys_names) != len(outs) -- {len(reporters.sys_names)} != {len(outs)}')

  reports = []

  report_types = [
    (args.compare_scores, generate_score_report, 'Aggregate Scores', False),
    (args.compare_word_accuracies, generate_word_accuracy_report, 'Word Accuracies', False),
    (args.compare_src_word_accuracies, generate_src_word_accuracy_report, 'Source Word Accuracies', True),
    (args.compare_sentence_buckets, generate_sentence_bucketed_report, 'Sentence Buckets', False),
    (args.compare_repetitions, generate_repetitions_report, 'Repetition Statistics', True),
    (args.compare_repetition_examples, generate_repetitions_examples, 'Repetition Examples', True),
    (args.lang_id, generate_lang_id_report, 'Language Identification', False)]
  if len(outs) > 1:
    report_types += [
      (args.compare_ngrams, generate_ngram_report, 'Characteristic N-grams', False),
      (args.compare_sentence_examples, generate_sentence_examples, 'Sentence Examples', True),
    ]

  for arg, func, name, use_src in report_types:
    if arg is not None:
      if use_src:
        reports.append( (name, [func(ref, outs, src, **arg_utils.parse_profile(x)) for x in arg]) )
      else:
        reports.append( (name, [func(ref, outs, **arg_utils.parse_profile(x)) for x in arg]) )

  # Write all reports into a single html file
  if args.output_directory != None:
    reporters.generate_html_report(reports, args.output_directory, args.report_title)
Exemple #4
0
def main():
    parser = argparse.ArgumentParser(
        description='Program to compare MT results',
        epilog=f'For more details, see {source_code_url}')
    parser.add_argument('ref_file',
                        type=str,
                        help='A path to a correct reference file')
    parser.add_argument('out_files',
                        type=str,
                        nargs='+',
                        help='Paths to system outputs')
    parser.add_argument(
        '--sys_names',
        type=str,
        nargs='+',
        default=None,
        help='Names for each system, must be same number as output files')
    parser.add_argument('--src_file',
                        type=str,
                        default=None,
                        help='A path to the source file')
    parser.add_argument(
        '--fig_size',
        type=str,
        default='6x4.5',
        help='The size of figures, in "width x height" format.')
    parser.add_argument('--compare_scores',
                        type=str,
                        nargs='*',
                        default=['score_type=bleu', 'score_type=length'],
                        help="""
                      Compare scores. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_score_report' to see which arguments are available.
                      """)
    parser.add_argument('--compare_word_accuracies',
                        type=str,
                        nargs='*',
                        default=['bucket_type=freq'],
                        help="""
                      Compare word accuracies by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_word_accuracy_report' to see which arguments are available.
                      """)
    parser.add_argument('--compare_src_word_accuracies',
                        type=str,
                        nargs='*',
                        default=None,
                        help="""
                      Source analysis. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_src_word_accuracy_report' to see which arguments are available.
                      """)
    parser.add_argument(
        '--compare_sentence_buckets',
        type=str,
        nargs='*',
        default=[
            'bucket_type=length,statistic_type=score,score_measure=bleu',
            'bucket_type=lengthdiff',
            'bucket_type=score,score_measure=sentbleu'
        ],
        help="""
                      Compare sentence counts by buckets. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_sentence_buckets_report' to see which arguments are available.
                      """)
    parser.add_argument('--compare_ngrams',
                        type=str,
                        nargs='*',
                        default=['compare_type=match'],
                        help="""
                      Compare ngrams. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_ngram_report' to see which arguments are available.
                      """)
    parser.add_argument('--compare_sentence_examples',
                        type=str,
                        nargs='*',
                        default=['score_type=sentbleu'],
                        help="""
                      Compare sentences. Can specify arguments in 'arg1=val1,arg2=val2,...' format.
                      See documentation for 'generate_sentence_examples' to see which arguments are available.
                      """)
    parser.add_argument('--output_directory',
                        type=str,
                        default=None,
                        help="""
                      A path to a directory where a graphical report will be saved. Open index.html in the directory
                      to read the report.
                      """)
    parser.add_argument('--report_title',
                        type=str,
                        default='compare-mt Analysis Report',
                        help="""
                      The name of the HTML report.
                      """)
    parser.add_argument(
        '--decimals',
        type=int,
        default=4,
        help="Number of decimals to print for floating point numbers")
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help="Seed for random number generation")
    parser.add_argument(
        '--scorer_scale',
        type=float,
        default=100,
        choices=[1, 100],
        help=
        "Set the scale of BLEU, METEOR, WER, chrF and COMET to 0-1 or 0-100 (default 0-100)"
    )
    parser.add_argument(
        '--http',
        type=int,
        dest='bind_port',
        help='Launch an HTTP server at specified port to view results.'
        'Disabled by default, but specifying a port number enabled it.')
    parser.add_argument('-v',
                        '--version',
                        action='version',
                        version=f'%(prog)s {__version__}')
    args = parser.parse_args()

    # Set formatting
    formatting.fmt.set_decimals(args.decimals)

    # Set random seed
    if args.seed is not None:
        npr.seed(args.seed)

    # Set scale
    scorers.global_scorer_scale = args.scorer_scale

    ref = corpus_utils.load_tokens(args.ref_file)
    outs = [corpus_utils.load_tokens(x) for x in args.out_files]

    src = corpus_utils.load_tokens(args.src_file) if args.src_file else None
    reporters.sys_names = args.sys_names if args.sys_names else [
        f'sys{i+1}' for i in range(len(outs))
    ]
    reporters.fig_size = tuple([float(x) for x in args.fig_size.split('x')])
    if len(reporters.sys_names) != len(outs):
        raise ValueError(
            f'len(sys_names) != len(outs) -- {len(reporters.sys_names)} != {len(outs)}'
        )

    reports = []

    report_types = [
        (args.compare_scores, generate_score_report, 'Aggregate Scores', True),
        (args.compare_word_accuracies, generate_word_accuracy_report,
         'Word Accuracies', False),
        (args.compare_src_word_accuracies, generate_src_word_accuracy_report,
         'Source Word Accuracies', True),
        (args.compare_sentence_buckets, generate_sentence_bucketed_report,
         'Sentence Buckets', True)
    ]
    if len(outs) > 1:
        report_types += [
            (args.compare_ngrams, generate_ngram_report,
             'Characteristic N-grams', False),
            (args.compare_sentence_examples, generate_sentence_examples,
             'Sentence Examples', True),
        ]

    for arg, func, name, use_src in report_types:
        if arg is not None:
            if use_src:
                reports.append((name, [
                    func(ref, outs, src, **arg_utils.parse_profile(x))
                    for x in arg
                ]))
            else:
                reports.append((name, [
                    func(ref, outs, **arg_utils.parse_profile(x)) for x in arg
                ]))

    # Write all reports into a single html file
    if args.output_directory != None:
        reporters.generate_html_report(reports, args.output_directory,
                                       args.report_title)

    if args.bind_port:
        out_dir = args.output_directory
        if not out_dir:
            out_dir = tempfile.TemporaryDirectory().name
            reporters.generate_html_report(reports, out_dir, args.report_title)
        reporters.launch_http_server(out_dir, bind_port=args.bind_port)