コード例 #1
0
def get_result_for_sentences():
    current_directory = os.path.abspath(os.getcwd())
    directory = 'usertexts'
    filename = 'sentences.txt'
    usertext = os.path.join(current_directory, directory, filename)
    corpus = Corpus()
    print('#'.join(
        ['Hitrate',
        'Correct',
        'Not detected',
        'Incorrect',
        'Wrong word',
        'Incorrect sentence',
        'Correct sentence',
        'Correct defects',
        'Nr defects found',
        'Defects found',
        'Nr bigrams',
        'Bigrams']
    ))
    with open(usertext, 'r') as f:
        total_correct = 0
        total_not_detected = 0
        total_incorrect = 0
        for line in f:
            if len(line) == 0:
                continue
            false_word, incorrect_sentence, correct_sentence = line.split(';')
            bigrams = list(get_bigrams(incorrect_sentence.split())) + list(get_bigrams(correct_sentence.split()))
            found_defects = []
            for bigram in bigrams:
                if not corpus.bigram_exists(bigram):
                    found_defects.append(bigram)
            correct_defects = get_correct_defects(bigrams, false_word)
            correct, not_detected, incorrect = get_hitrate_for_bigrams(correct_defects, found_defects)
            total_correct += correct
            total_incorrect += incorrect
            total_not_detected += not_detected
            print('{0}#{1}#{2}#{3}#{4}#{5}#{6}#{7}#{8}#{9}#{10}#{11}'.format(
                str(get_z(correct, not_detected, incorrect).quantize(Decimal('.001'), rounding=ROUND_DOWN)).replace('.', ','),
                correct,
                not_detected,
                incorrect,
                false_word,
                incorrect_sentence.strip(),
                correct_sentence.strip(),
                correct_defects,
                len(found_defects),
                found_defects,
                len(bigrams),
                bigrams,
            ))
        print('{0}#{1}#{2}#{3}'.format(
            str(get_z(total_correct, total_not_detected, total_incorrect).quantize(Decimal('.001'), rounding=ROUND_DOWN)).replace('.', ','),
            total_correct,
            total_not_detected,
            total_incorrect,
        ))
コード例 #2
0
def get_defects(usertext_filename, corpuses=None):
    """Return a list of bigrams that doesn't exist in the corpuses."""
    usertext_words = generate_words(usertext_filename)
    bigrams = get_bigrams(usertext_words)
    print('Found {0} bigrams in {1}'.format(len(list(get_bigrams(usertext_words))), usertext_filename))
    corpus = Corpus(corpuses)
    defects = []
    for bigram in bigrams:
        if not corpus.bigram_exists(bigram):
            defects.append(bigram)
    return defects
コード例 #3
0
def get_defects_in_false_friends():
    current_directory = os.path.abspath(os.getcwd())
    directory = 'usertexts'
    filename = 'false_friends.txt'
    usertext = os.path.join(current_directory, directory, filename)
    corpus = Corpus()
    print(' & '.join(
        ['Hitrate',
         'Wrong word',
         'Sentence',
         'Nr defects found',
         'Defects found \\\\']
    ))
    print('\\hline')
    with open(usertext, 'r') as f:
        total_correct = 0
        total_not_detected = 0
        total_incorrect = 0
        for line in f:
            if len(line) == 0:
                continue
            false_word, sentence = line.split(';')
            words = sentence.split()
            found_defects = []
            for bigram in get_bigrams(words):
                if not corpus.bigram_exists(bigram):
                    found_defects.append(bigram)
            correct_defects = get_correct_defects(get_bigrams(words), false_word)
            correct, not_detected, incorrect = get_hitrate_for_bigrams(correct_defects, found_defects)
            total_correct += correct
            total_incorrect += incorrect
            total_not_detected += not_detected
            print('{0} & {1} & {2} & {3} & {4} \\\\'.format(
                str(get_z(correct, not_detected, incorrect).quantize(Decimal('.001'), rounding=ROUND_DOWN)).replace('.', ','),
                false_word,
                sentence.strip(),
                len(found_defects),
                '' if len(found_defects) == 0 else found_defects,
            ))
        print('\\hline')
        print('{0} & hitrate'.format(
            str(get_z(total_correct, total_not_detected, total_incorrect).quantize(Decimal('.001'), rounding=ROUND_DOWN)).replace('.', ','),
        ))
コード例 #4
0
def get_defects_in_correct_sentences():
    #  current_directory = os.path.abspath(os.getcwd())
    current_directory = os.path.dirname(__file__)
    directory = 'usertexts'
    filename = 'correct_sentences.txt'
    usertext = os.path.join(current_directory, directory, filename)
    corpus = Corpus()
    print(' & '.join(
        ['Hitrate',
         'Sentence',
         'Count',
         'Defects found \\\\']
    ))
    print('\\hline')
    with open(usertext, 'r') as f:
        total_correct = 0
        total_incorrect = 0
        for line in f:
            words = line.split()
            bigrams = list(get_bigrams(words))
            nr_of_bigrams = len(bigrams)
            defects = []
            for bigram in bigrams:
                if not corpus.bigram_exists(bigram):
                    defects.append(bigram)
            incorrect = len(defects)
            correct = nr_of_bigrams - incorrect
            not_detected = 0
            total_incorrect += incorrect
            total_correct += correct
            print('{0} & {1} & {2} & {3} \\\\'.format(
                str(get_z(correct, not_detected, incorrect).quantize(Decimal('.001'), rounding=ROUND_DOWN)).replace('.', ','),
                line.strip(),
                len(defects),
                '' if len(defects) == 0 else defects,
                ))
        total_not_detected = 0
        print('\\hline')
        print('{0} & hitrate'.format(
            str(get_z(total_correct, total_not_detected, total_incorrect).quantize(Decimal('.001'), rounding=ROUND_DOWN)).replace('.', ','),
        ))