def get_trees(self): trees = [] for i, tree_file in enumerate(self.files): if (i + 1) % 100 == 0: print "Tree %d" % (i + 1) ff = open(tree_file, 'rb') for tree in Trees.PennTreeReader(ff): trees.append(tree) ff.close() return trees
def output(partId, ch_aux): """Uses the student code to compute the output for test cases.""" print "== getting output for part: %d" % partId if (ch_aux == None): print "== Error receiving data from server. Please try again." version = 1 out = sys.stdout sys.stdout = open(os.devnull, 'w') #parser = BaselineParser() parser = PCFGParser() base_path = "../data/parser/masc/" out.write("== Training parser...\n") train_trees = read_masc_trees("%strain" % base_path, 0, 38) parser.train(train_trees) out.write("== done training.\n") if partId == 1: out.write("== Reading in development set...\n") test_trees = read_masc_trees("%sdevtest" % base_path, 0, 11) out.write("== Testing on development set...\n") f1 = test_parser(parser, test_trees, out) elif partId == 2: out.write("== Reading in test set...\n") import StringIO test_string = StringIO.StringIO() test_string.write(ch_aux) test_string.seek(0) raw_test_trees = [] ptr = Trees.PennTreeReader(test_string) raw_test_trees = [tree for tree in ptr] #while ptr.has_next(): # raw_test_trees.append(ptr.next()) test_trees = [Trees.StandardTreeNormalizer.transform_tree(tree) \ for tree in raw_test_trees] out.write("== Testing on test set...\n") f1 = test_parser(parser, test_trees, out) else: out.write("!!! Invalid part choice: %d\n" % partId) sys.stdout = out return None sys.stdout = out return "[%d, %d, %f]" % (partId, version, f1)
else: return 1 end = start for child in tree.children: child_span = self.add_constituents(child, aset, end) end += child_span label = tree.label if label not in self.labels_to_ignore: aset.add(LabeledConstituent(label, start, end)) return end - start if __name__ == '__main__': import StringIO gold_string = "(ROOT (S (NP (DT the) (NN can)) (VP (VBD fell))))" gold_io = StringIO.StringIO() gold_io.write(gold_string) gold_io.seek(0) gold_tree = Trees.PennTreeReader(gold_io).next() guess_string = "(ROOT (S (NP (DT the)) (VP (MB can) (VP (VBD fell)))))" guess_io = StringIO.StringIO() guess_io.write(guess_string) guess_io.seek(0) guess_tree = Trees.PennTreeReader(guess_io).next() evaluator = LabeledConstituentEval(["ROOT"], set()) evaluator.evaluate(guess_tree, gold_tree) evaluator.display(True)