problems += ['NP->ADJP in S'] * num_forbidden_orders(tree, ('S',), ('NP', 'ADJP')) # Last, if there is an S w/ only one child, we call it a word order problem... problems += ['Single Child S'] * len(list(tree.subtrees(lambda x: x in tree_utils.semi_tree_roots and len(x) == 1))) if tree[0].node not in tree_utils.semi_tree_roots and not hasattr(tree[0], '_has_error'): tree[0]._has_error = True problems += ['No S Root'] log("Found %d order issues" % (len(problems),), 1) log("Issues: %s", (problems,), 2) if use_cache: cache_set('word_order_issues', sentence, problems) return problems if __name__ == "__main__": essay_index = cmd_essay_index() for essay in [essay_utils.essays[essay_index]]: issues_in_text = [] for line in essay: issues_in_line = [] for sentence in parse_sentences(line): issues_in_sentence = issues_in_sentence(sentence) issues_in_text += issues_in_sentence issues_in_line += issues_in_sentence print issues_in_text
print tree if score_stdin: sentence_transitions = tree_utils.transitions_in_tree(tree) sentence_probs = [] for transition in sentence_transitions: print "Transitions: %s" % (transition) probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3) print "Probs: %s" % (probs) sentence_probs += probs total = 1 for prob in sentence_probs: total *= prob print "Total: %f" % (total,) elif sentence_parse_stdin: import sentence_tokenizer sentences = sentence_tokenizer.parse_sentences(cmd_utils.get_stdin(), use_cache=False) print sentences elif word_order_parse_stdin: import sentence_tokenizer import word_order lines = cmd_utils.get_stdin_lines() issues_in_text = [] for line in lines: sentences = sentence_tokenizer.parse_sentences(line) for sentence in sentences: issues = word_order.issues_in_sentence(sentence, use_cache=False) print sentence print issues issues_in_text += issues print "Found %d issues" % (len(issues_in_text),) print "Issues: %s" % (issues_in_text,)
sentence_transitions = tree_utils.transitions_in_tree(tree) sentence_probs = [] for transition in sentence_transitions: print "Transitions: %s" % (transition) probs = hmm_utils.prob_of_all_transitions(transition, counts, gram_size=3) print "Probs: %s" % (probs) sentence_probs += probs total = 1 for prob in sentence_probs: total *= prob print "Total: %f" % (total, ) elif sentence_parse_stdin: import sentence_tokenizer sentences = sentence_tokenizer.parse_sentences(cmd_utils.get_stdin(), use_cache=False) print sentences elif word_order_parse_stdin: import sentence_tokenizer import word_order lines = cmd_utils.get_stdin_lines() issues_in_text = [] for line in lines: sentences = sentence_tokenizer.parse_sentences(line) for sentence in sentences: issues = word_order.issues_in_sentence(sentence, use_cache=False) print sentence print issues issues_in_text += issues print "Found %d issues" % (len(issues_in_text), ) print "Issues: %s" % (issues_in_text, )