def trainMapping(): f = open("/usr/share/biotext/GeniaChallenge/xml/train.xml") f2 = open("featurefile",'w') parser = gifxmlParser(f) iterator = parser.documentIterator() mapping = buildDictionary(iterator) writeMapping(mapping, f2) f.close() f2.close()
def getTP_FP_FN(g_edges, p_edges): TP = len(g_edges.intersection(p_edges)) FP = len(p_edges)-TP FN = len(g_edges)-TP return TP, FP, FN if __name__=="__main__": optparser = instantiateOptionParser() (options, args) = optparser.parse_args() if len(args) != 2: sys.stdout.write(optparser.get_usage()) print "python CandidateGenerator.py -h for options\n" sys.exit(0) p_file = open(args[0]) g_file = open(args[1]) p_parser = parseGifxml.gifxmlParser(p_file) p_iterator = p_parser.documentIterator() g_parser = parseGifxml.gifxmlParser(g_file) g_iterator = g_parser.documentIterator() counter = 1 oracleStatistics(p_iterator, g_iterator, options.nbest) sys.exit(0) for p_document, g_document in zip(p_iterator, g_iterator): for p_child, g_child in zip(p_document, g_document): if p_child.tag == "sentence": assert p_child.attrib["id"]==g_child.attrib["id"] p_entities, p_pairs = getEntitiesAndPairs(p_child) g_entities, g_pairs = getEntitiesAndPairs(g_child) predictions = getSimplePredictions(p_entities, p_pairs) table, table_transpose, keys = toTable(predictions) best = nbest.decode(table_transpose,options.nbest)
f = open("/usr/share/biotext/GeniaChallenge/xml/train.xml") f2 = open("featurefile",'w') parser = gifxmlParser(f) iterator = parser.documentIterator() mapping = buildDictionary(iterator) writeMapping(mapping, f2) f.close() f2.close() if __name__=="__main__": f = open("/usr/share/biotext/GeniaChallenge/xml/train.xml") f2 = open("featurefile") mapping = readDictionaryMapping(f2) f2.close() f2 = open("train_inputs",'w') parser = gifxmlParser(f) iterator = parser.documentIterator() for document in iterator: for child in document: if child.tag == "sentence": tokenization, parse, entities = getTokenizationParseEntities(child, "split-Charniak-Lease", "split-Charniak-Lease") token_features = getTokenFeatures(tokenization, entities) dep_features = getDependencyFeatures(parse, token_features) features = getFinalFeatures(token_features, dep_features) fvalues = {} for key in features.keys(): if key in mapping: fvalues[int(mapping[key])] = features[key] keys = fvalues.keys() keys.sort() line = "".join("%d:%f " %(x, fvalues[x]) for x in keys)+"\n"
TP = len(g_edges.intersection(p_edges)) FP = len(p_edges) - TP FN = len(g_edges) - TP return TP, FP, FN if __name__ == "__main__": optparser = instantiateOptionParser() (options, args) = optparser.parse_args() if len(args) != 2: sys.stdout.write(optparser.get_usage()) print "python CandidateGenerator.py -h for options\n" sys.exit(0) p_file = open(args[0]) g_file = open(args[1]) p_parser = parseGifxml.gifxmlParser(p_file) p_iterator = p_parser.documentIterator() g_parser = parseGifxml.gifxmlParser(g_file) g_iterator = g_parser.documentIterator() counter = 1 oracleStatistics(p_iterator, g_iterator, options.nbest) sys.exit(0) for p_document, g_document in zip(p_iterator, g_iterator): for p_child, g_child in zip(p_document, g_document): if p_child.tag == "sentence": assert p_child.attrib["id"] == g_child.attrib["id"] p_entities, p_pairs = getEntitiesAndPairs(p_child) g_entities, g_pairs = getEntitiesAndPairs(g_child) predictions = getSimplePredictions(p_entities, p_pairs) table, table_transpose, keys = toTable(predictions) best = nbest.decode(table_transpose, options.nbest)