def main(): f = open("data/train_trees") frequencies = dict() for line in f: l = tree_parser.get_tree(line) # charity from Patrick de Kok for t in traverse_parent("TOP", l[1]): # skip TOP if t[1:] in frequencies: if not t[0] in frequencies[t[1:]]: frequencies[t[1:]].append(t[0]) else: frequencies[t[1:]] = [t[0]] print frequencies
def main(): f = open('data/train_trees') frequencies = dict() for i, line in enumerate(f): l = tree_parser.get_tree(line) # charity from Patrick de Kok for t in traverse_parent('TOP', l[1]): # skip TOP if t[0] in frequencies: if t[1:] in frequencies[t[0]]: frequencies[t[0]][t[1:]] += 1 else: frequencies[t[0]][t[1:]] = 1 else: frequencies[t[0]] = {t[1:]: 1} print frequencies
def evaluate(): sentenceFile = open('data/test_sentence') treeFile = open('data/test_trees') correct = 0 incorrect = 0 for cykTree in cyk.cyk(sentenceFile): print cykTree realTree = tree_parser.get_tree(treeFile.readline()) #TODO use traverse here match = True r_traverse = traverse(realTree) for c in traverse(cykTree): r = r_traverse.next() if len(c) != len(r): match = False break if match: correct += 1 print 1 else: incorrect += 1 print 0 print 'correct: ' + str(correct) print 'incorrect: ' + str(incorrect)