Example #1
0
    problems += ['NP->ADJP in S'] * num_forbidden_orders(tree, ('S',), ('NP', 'ADJP'))

    # Last, if there is an S w/ only one child, we call it a word order problem...
    problems += ['Single Child S'] * len(list(tree.subtrees(lambda x: x in tree_utils.semi_tree_roots and len(x) == 1)))

    if tree[0].node not in tree_utils.semi_tree_roots and not hasattr(tree[0], '_has_error'):
        tree[0]._has_error = True
        problems += ['No S Root']

    log("Found %d order issues" % (len(problems),), 1)
    log("Issues: %s", (problems,), 2)

    if use_cache:
        cache_set('word_order_issues', sentence, problems)

    return problems


if __name__ == "__main__":

    essay_index = cmd_essay_index()
    for essay in [essay_utils.essays[essay_index]]:
        issues_in_text = []
        for line in essay:
            issues_in_line = []
            for sentence in parse_sentences(line):
                issues_in_sentence = issues_in_sentence(sentence)
                issues_in_text += issues_in_sentence
                issues_in_line += issues_in_sentence
    print issues_in_text
Example #2
0
        print tree
        if score_stdin:
            sentence_transitions = tree_utils.transitions_in_tree(tree)
            sentence_probs = []
            for transition in sentence_transitions:
                print "Transitions: %s" % (transition)
                probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3)
                print "Probs: %s" % (probs)
                sentence_probs += probs
            total = 1
            for prob in sentence_probs:
                total *= prob
            print "Total: %f" % (total,)
elif sentence_parse_stdin:
    import sentence_tokenizer
    sentences = sentence_tokenizer.parse_sentences(cmd_utils.get_stdin(), use_cache=False)
    print sentences
elif word_order_parse_stdin:
    import sentence_tokenizer
    import word_order
    lines = cmd_utils.get_stdin_lines()
    issues_in_text = []
    for line in lines:
        sentences = sentence_tokenizer.parse_sentences(line)
        for sentence in sentences:
            issues = word_order.issues_in_sentence(sentence, use_cache=False)
            print sentence
            print issues
            issues_in_text += issues
    print "Found %d issues" % (len(issues_in_text),)
    print "Issues: %s" % (issues_in_text,)
Example #3
0
            sentence_transitions = tree_utils.transitions_in_tree(tree)
            sentence_probs = []
            for transition in sentence_transitions:
                print "Transitions: %s" % (transition)
                probs = hmm_utils.prob_of_all_transitions(transition,
                                                          counts,
                                                          gram_size=3)
                print "Probs: %s" % (probs)
                sentence_probs += probs
            total = 1
            for prob in sentence_probs:
                total *= prob
            print "Total: %f" % (total, )
elif sentence_parse_stdin:
    import sentence_tokenizer
    sentences = sentence_tokenizer.parse_sentences(cmd_utils.get_stdin(),
                                                   use_cache=False)
    print sentences
elif word_order_parse_stdin:
    import sentence_tokenizer
    import word_order
    lines = cmd_utils.get_stdin_lines()
    issues_in_text = []
    for line in lines:
        sentences = sentence_tokenizer.parse_sentences(line)
        for sentence in sentences:
            issues = word_order.issues_in_sentence(sentence, use_cache=False)
            print sentence
            print issues
            issues_in_text += issues
    print "Found %d issues" % (len(issues_in_text), )
    print "Issues: %s" % (issues_in_text, )