Beispiel #1
0
def trainMapping():
    f = open("/usr/share/biotext/GeniaChallenge/xml/train.xml")
    f2 = open("featurefile",'w')
    parser = gifxmlParser(f)
    iterator = parser.documentIterator()
    mapping = buildDictionary(iterator)
    writeMapping(mapping, f2)
    f.close()
    f2.close()
Beispiel #2
0
def getTP_FP_FN(g_edges, p_edges):
    TP = len(g_edges.intersection(p_edges))
    FP = len(p_edges)-TP
    FN = len(g_edges)-TP
    return TP, FP, FN

if __name__=="__main__":
    optparser = instantiateOptionParser()
    (options, args) = optparser.parse_args()
    if len(args) != 2:
        sys.stdout.write(optparser.get_usage())
        print "python CandidateGenerator.py -h for options\n"
        sys.exit(0)
    p_file = open(args[0])
    g_file = open(args[1])
    p_parser = parseGifxml.gifxmlParser(p_file)
    p_iterator = p_parser.documentIterator()
    g_parser = parseGifxml.gifxmlParser(g_file)
    g_iterator = g_parser.documentIterator()
    counter = 1
    oracleStatistics(p_iterator, g_iterator, options.nbest)
    sys.exit(0)
    for p_document, g_document in zip(p_iterator, g_iterator):
        for p_child, g_child in zip(p_document, g_document):
            if p_child.tag == "sentence":
                assert p_child.attrib["id"]==g_child.attrib["id"]
                p_entities, p_pairs = getEntitiesAndPairs(p_child)
                g_entities, g_pairs = getEntitiesAndPairs(g_child)
                predictions = getSimplePredictions(p_entities, p_pairs)
                table, table_transpose, keys = toTable(predictions)
                best = nbest.decode(table_transpose,options.nbest)
Beispiel #3
0
    f = open("/usr/share/biotext/GeniaChallenge/xml/train.xml")
    f2 = open("featurefile",'w')
    parser = gifxmlParser(f)
    iterator = parser.documentIterator()
    mapping = buildDictionary(iterator)
    writeMapping(mapping, f2)
    f.close()
    f2.close()

if __name__=="__main__":
    f = open("/usr/share/biotext/GeniaChallenge/xml/train.xml")
    f2 = open("featurefile")
    mapping = readDictionaryMapping(f2)
    f2.close()
    f2 = open("train_inputs",'w')
    parser = gifxmlParser(f)
    iterator = parser.documentIterator()
    for document in iterator:
        for child in document:
            if child.tag == "sentence":
                tokenization, parse, entities = getTokenizationParseEntities(child, "split-Charniak-Lease", "split-Charniak-Lease")
                token_features = getTokenFeatures(tokenization, entities)
                dep_features = getDependencyFeatures(parse, token_features)
                features = getFinalFeatures(token_features, dep_features)
                fvalues = {}
                for key in features.keys():
                    if key in mapping:
                        fvalues[int(mapping[key])] = features[key]
                keys = fvalues.keys()
                keys.sort()
                line = "".join("%d:%f " %(x, fvalues[x]) for x in keys)+"\n"
Beispiel #4
0
    TP = len(g_edges.intersection(p_edges))
    FP = len(p_edges) - TP
    FN = len(g_edges) - TP
    return TP, FP, FN


if __name__ == "__main__":
    optparser = instantiateOptionParser()
    (options, args) = optparser.parse_args()
    if len(args) != 2:
        sys.stdout.write(optparser.get_usage())
        print "python CandidateGenerator.py -h for options\n"
        sys.exit(0)
    p_file = open(args[0])
    g_file = open(args[1])
    p_parser = parseGifxml.gifxmlParser(p_file)
    p_iterator = p_parser.documentIterator()
    g_parser = parseGifxml.gifxmlParser(g_file)
    g_iterator = g_parser.documentIterator()
    counter = 1
    oracleStatistics(p_iterator, g_iterator, options.nbest)
    sys.exit(0)
    for p_document, g_document in zip(p_iterator, g_iterator):
        for p_child, g_child in zip(p_document, g_document):
            if p_child.tag == "sentence":
                assert p_child.attrib["id"] == g_child.attrib["id"]
                p_entities, p_pairs = getEntitiesAndPairs(p_child)
                g_entities, g_pairs = getEntitiesAndPairs(g_child)
                predictions = getSimplePredictions(p_entities, p_pairs)
                table, table_transpose, keys = toTable(predictions)
                best = nbest.decode(table_transpose, options.nbest)