예제 #1
0
파일: m2scorer.py 프로젝트: xqjin/GEC0Prep
def evaluateIt(system_file,gold_file,verbose=False):
	max_unchanged_words=2
	ignore_whitespace_casing= False
	very_verbose = False
#	opts, args = getopt(sys.argv[1:], "v", ["max_unchanged_words=", "verbose", "ignore_whitespace_casing", "very_verbose"])
#	for o, v in opts:
#		if o in ('-v', '--verbose'):
#			verbose = True
#		elif o == '--very_verbose':
#			very_verbose = True
#		elif o == '--max_unchanged_words':
#			max_unchanged_words = int(v)
#		elif o == '--ignore_whitespace_casing':
#			ignore_whitespace_casing = True
#		else:
#			print >> sys.stderr, "Unknown option :", o
#			print_usage()
#			sys.exit(-1)
#
#	# starting point
#	if len(args) != 2:
#		print_usage()
#		sys.exit(-1)

	#system_file = args[0]
	#gold_file = args[1]

	# load source sentences and gold edits
	source_sentences, gold_edits = load_annotation(gold_file)

	# load system hypotheses
	fin = smart_open(system_file, 'r')
	system_sentences = [line.decode("utf8").strip() for line in fin.readlines()]
	fin.close()

	p, r, f1 = levenshtein.batch_multi_pre_rec_f1(system_sentences, source_sentences, gold_edits, max_unchanged_words, ignore_whitespace_casing, verbose, very_verbose)

	#print "Precision   : %.4f" % p
	#print "Recall      : %.4f" % r
	#print "F1          : %.4f" % f1
	return p,r,f1
def get_score(system_sentences,
              source_sentences,
              gold_edits,
              max_unchanged_words=2,
              beta=0.5,
              ignore_whitespace_casing=False,
              verbose=False,
              very_verbose=False,
              should_cache=True):
    if should_cache:
        hashable = tuple([system_sentences[i][:20] for i in range(30)])
        if hashable in cache:
            print("caching")
            return cache[hashable]
    p, r, f1 = levenshtein.batch_multi_pre_rec_f1(system_sentences,
                                                  source_sentences, gold_edits,
                                                  max_unchanged_words, beta,
                                                  ignore_whitespace_casing,
                                                  verbose, very_verbose)
    if verbose:
        print(p, r, f1)
    if should_cache:
        cache[hashable] = p, r, f1
    return p, r, f1
예제 #3
0
        )
        print_usage()
        sys.exit(-1)

# starting point
if len(args) != 2:
    print_usage()
    sys.exit(-1)

system_file = args[0]
gold_file = args[1]

# load source sentences and gold edits
source_sentences, gold_edits = load_annotation(gold_file)

# load system hypotheses
fin = smart_open(system_file, 'r')
#system_sentences = [line.decode("utf8").strip() for line in fin.readlines()]
system_sentences = [line.strip() for line in fin.readlines()]
fin.close()

p, r, f1 = levenshtein.batch_multi_pre_rec_f1(system_sentences,
                                              source_sentences, gold_edits,
                                              max_unchanged_words, beta,
                                              ignore_whitespace_casing,
                                              verbose, very_verbose)

print("Precision   : %.4f" % p)
print("Recall      : %.4f" % r)
print("F_%.1f       : %.4f" % (beta, f1))
예제 #4
0
    elif o == "--ignore_whitespace_casing":
        ignore_whitespace_casing = True
    else:
        print >> sys.stderr, "Unknown option :", o
        print_usage()
        sys.exit(-1)

# starting point
if len(args) != 2:
    print_usage()
    sys.exit(-1)

system_file = args[0]
gold_file = args[1]

# load source sentences and gold edits
source_sentences, gold_edits = load_annotation(gold_file)

# load system hypotheses
fin = smart_open(system_file, "r")
system_sentences = [line.decode("utf8").strip() for line in fin.readlines()]
fin.close()

p, r, f1 = levenshtein.batch_multi_pre_rec_f1(
    system_sentences, source_sentences, gold_edits, max_unchanged_words, ignore_whitespace_casing, verbose, very_verbose
)

print "Precision   : %.4f" % p
print "Recall      : %.4f" % r
print "F1          : %.4f" % f1