Ejemplo n.º 1
0
import hmm_utils
import cmd_utils
import parsers


def round_to(n, precission):
    correction = 0.5 if n >= 0 else -0.5
    return int(n/precission+correction)*precission


counts = hmm_utils.get_transition_counts()

# Flags that note that incode should be looked for in STDIN instead of
# in a test essay file
grade_directory = cmd_utils.cmd_arg('--dir', None)
final_score_stdin = cmd_utils.cmd_flag('--final-score', None)
parse_stdin = cmd_utils.cmd_flag('--parse', None)
score_stdin = cmd_utils.cmd_flag('--score', None)
pronoun_stdin = cmd_utils.cmd_flag('--pronoun', None)
topic_stdin = cmd_utils.cmd_flag('--topic', None)
syntactic_formation_stdin = cmd_utils.cmd_flag('--syn-formation', None)
agreement_stdin = cmd_utils.cmd_flag('--agree', None)
sentence_parse_stdin = cmd_utils.cmd_flag('--sen-token', None)
word_order_parse_stdin = cmd_utils.cmd_flag('--word-order', None)


transition_count = cmd_utils.cmd_arg('--count', None)
transition_prob = cmd_utils.cmd_arg('--prob', None)

Ejemplo n.º 2
0
    (2, 1, 2, 1,),
    (1, 1, 1, 1, 1, 1, 1, 1, 1, 1,),  # 10
    (2, 2, 2, 1,),
    (1, 2, 1, 2,),
    (3, 3, 2, 2,),
    (2, 3,),
    (2, 2, 1,),  # 15
    (1, 1, 1, 3, 1),
    (2, 1,),
    (2, 2, 1,),
    (2, 3,),
    (2, 2, 2, 2),  # 20
)

counts = hmm_utils.get_transition_counts()
essay_index = int(cmd_utils.cmd_arg('--essay', 0)) - 1
line_index = int(cmd_utils.cmd_arg('--line', -1))
use_stdin = cmd_utils.cmd_flag('--stdin')


# How much to prefer long answers over shorter onces
weight = .0001

invalid_boundary_tags = ('IN', 'CC', 'SINV', 'RP', 'TO')
pers_pro_tags = ('PRP', 'PRP$')
start_pers_pro_weight = 1000


def _possible_sentences_in_line(line, min_sentence_len=3):
    # The simplest thing here is to defer to the paper.  If it looks like they've
    # added punctuation already, lets just use that
Ejemplo n.º 3
0

def grade_3a(text):
    sentences = sentence_tokenizer.parse(text)
    num_sentences = len(sentences)
    if num_sentences >= 6:
        return 5
    else:
        return max(num_sentences - 1, 1)

if __name__ == '__main__':
    import cmd_utils

    tests = cmd_utils.cmd_test()
    tests = [tests] if tests else ('1a', '1b', '1d', '2a', '2b', '3a')
    essay_index = int(cmd_utils.cmd_arg('--essay', 0)) - 1

    for test in tests:
        if essay_index >= 0:
            essay_text = "\n".join(essay_utils.essays[essay_index])
            received_grade = grade_text(essay_text, test)
            log("Expect %s score: %d" % (test, correct_essay_grade(essay_index, test)), 0)
            log("Received %s score: %d" % (test, received_grade), 0)
        else:
            print "Values for %s" % (test,)
            print "-------------"
            for i in range(0, len(essay_utils.essays)):
                essay_text = "\n".join(essay_utils.essays[i])
                received_grade = grade_text(essay_text, test)
                expected_grade = correct_essay_grade(i, test)
                diff = received_grade - expected_grade
Ejemplo n.º 4
0
    try:
        rs = mem_caches[cache_name][cache_key]
        log('Cache Hit: %s[%s]' % (cache_name, cache_key), 5)
        return rs
    except KeyError:
        return None


def cache_set(cache_name, cache_key, cache_value):

    if cache_name not in mem_caches:
        cache_get(cache_name, cache_key)

    mem_caches[cache_name][cache_key] = cache_value
    _write_cache(cache_name)


def _write_cache(cache_name):
    file_name = cache_name + '.data'
    f_write = open(os.path.join('cache', file_name), 'wb')
    pickle.dump(mem_caches[cache_name], f_write)
    f_write.close()


if __name__ == "__main__":
    from cmd_utils import cmd_arg
    cache_key = cmd_arg('--key', None)
    cache_name = cmd_arg('--name', None)
    if cache_key and cache_name:
        cache_del(cache_name, cache_key)
Ejemplo n.º 5
0
    try:
        rs = mem_caches[cache_name][cache_key]
        log('Cache Hit: %s[%s]' % (cache_name, cache_key), 5)
        return rs
    except KeyError:
        return None


def cache_set(cache_name, cache_key, cache_value):

    if cache_name not in mem_caches:
        cache_get(cache_name, cache_key)

    mem_caches[cache_name][cache_key] = cache_value
    _write_cache(cache_name)


def _write_cache(cache_name):
    file_name = cache_name + '.data'
    f_write = open(os.path.join('cache', file_name), 'wb')
    pickle.dump(mem_caches[cache_name], f_write)
    f_write.close()


if __name__ == "__main__":
    from cmd_utils import cmd_arg
    cache_key = cmd_arg('--key', None)
    cache_name = cmd_arg('--name', None)
    if cache_key and cache_name:
        cache_del(cache_name, cache_key)
Ejemplo n.º 6
0
import hmm_utils
import cmd_utils
import parsers


def round_to(n, precission):
    correction = 0.5 if n >= 0 else -0.5
    return int(n / precission + correction) * precission


counts = hmm_utils.get_transition_counts()

# Flags that note that incode should be looked for in STDIN instead of
# in a test essay file
grade_directory = cmd_utils.cmd_arg('--dir', None)
final_score_stdin = cmd_utils.cmd_flag('--final-score', None)
parse_stdin = cmd_utils.cmd_flag('--parse', None)
score_stdin = cmd_utils.cmd_flag('--score', None)
pronoun_stdin = cmd_utils.cmd_flag('--pronoun', None)
topic_stdin = cmd_utils.cmd_flag('--topic', None)
syntactic_formation_stdin = cmd_utils.cmd_flag('--syn-formation', None)
agreement_stdin = cmd_utils.cmd_flag('--agree', None)
sentence_parse_stdin = cmd_utils.cmd_flag('--sen-token', None)
word_order_parse_stdin = cmd_utils.cmd_flag('--word-order', None)

transition_count = cmd_utils.cmd_arg('--count', None)
transition_prob = cmd_utils.cmd_arg('--prob', None)

if grade_directory:
    import os
Ejemplo n.º 7
0
        1,
    ),
    (
        2,
        2,
        1,
    ),
    (
        2,
        3,
    ),
    (2, 2, 2, 2),  # 20
)

counts = hmm_utils.get_transition_counts()
essay_index = int(cmd_utils.cmd_arg('--essay', 0)) - 1
line_index = int(cmd_utils.cmd_arg('--line', -1))
use_stdin = cmd_utils.cmd_flag('--stdin')

# How much to prefer long answers over shorter onces
weight = .0001

invalid_boundary_tags = ('IN', 'CC', 'SINV', 'RP', 'TO')
pers_pro_tags = ('PRP', 'PRP$')
start_pers_pro_weight = 1000


def _possible_sentences_in_line(line, min_sentence_len=3):
    # The simplest thing here is to defer to the paper.  If it looks like they've
    # added punctuation already, lets just use that
    has_abbr = sum([
Ejemplo n.º 8
0
def grade_3a(text):
    sentences = sentence_tokenizer.parse(text)
    num_sentences = len(sentences)
    if num_sentences >= 6:
        return 5
    else:
        return max(num_sentences - 1, 1)


if __name__ == '__main__':
    import cmd_utils

    tests = cmd_utils.cmd_test()
    tests = [tests] if tests else ('1a', '1b', '1d', '2a', '2b', '3a')
    essay_index = int(cmd_utils.cmd_arg('--essay', 0)) - 1

    for test in tests:
        if essay_index >= 0:
            essay_text = "\n".join(essay_utils.essays[essay_index])
            received_grade = grade_text(essay_text, test)
            log(
                "Expect %s score: %d" %
                (test, correct_essay_grade(essay_index, test)), 0)
            log("Received %s score: %d" % (test, received_grade), 0)
        else:
            print "Values for %s" % (test, )
            print "-------------"
            for i in range(0, len(essay_utils.essays)):
                essay_text = "\n".join(essay_utils.essays[i])
                received_grade = grade_text(essay_text, test)