def main(): f = open("data/train_trees") frequencies = dict() for i, line in enumerate(f): l = tree_parser.get_tree(line) # charity from Patrick de Kok for t in traverse(l[1]): # skip TOP if t[0] in frequencies: if t[1:] in frequencies[t[0]]: frequencies[t[0]][t[1:]] += 1 else: frequencies[t[0]][t[1:]] = 1 else: frequencies[t[0]] = {t[1:]: 1} print frequencies
def evaluate(): sentenceFile = open('data/test_sentence') treeFile = open('data/test_trees') correct = 0 incorrect = 0 for cykTree in cyk.cyk(sentenceFile): print cykTree realTree = tree_parser.get_tree(treeFile.readline()) #TODO use traverse here match = True r_traverse = traverse(realTree) for c in traverse(cykTree): r = r_traverse.next() if len(c) != len(r): match = False break if match: correct += 1 print 1 else: incorrect += 1 print 0 print 'correct: ' + str(correct) print 'incorrect: ' + str(incorrect)