Ejemplo n.º 1
0
def writeSamples(sentences, filePrefix, count, tigerFile):
    tigerSentences = tigerhelper.readTreebankMap(tigerFile, normalize)
    tigerXML = tigerhelper.TigerHelper(tigerFile)

    fhM = open(filePrefix + ".meta", "w")
    fh = open(filePrefix + ".txt", "w")
    validSamples = 0
    sampledTiger = []
    while validSamples < count:
        s = random.choice(sentences)
        print s
        sentenceID = lookup(s[2], tigerSentences)
        if not sentenceID:
            print "Could not look up sentence %s" % (s,)
            continue
        else:
            validSamples += 1
            sampledTiger.append(tigerXML.getSentence(sentenceID))
        fhM.write(str(sentenceID))
        fhM.write("\n")
        fh.write(s[2].strip())
        fh.write("\n")
    fh.close()
    fhM.close()
    newTiger = tigerhelper.constructTigerXML(sampledTiger)
    etree.ElementTree(newTiger).write(filePrefix + ".xml")
Ejemplo n.º 2
0
def main(tigerFile, sentenceList, out):
    tigerXML = th.TigerHelper(tigerFile)
    res = []
    for sID in sentenceList.split(","):
        sentenceNode = tigerXML.getSentence(sID)
        assert sentenceNode is not None
        res.append(sentenceNode)
    print res
    result = th.constructTigerXML(res)
    etree.ElementTree(result).write(out)
Ejemplo n.º 3
0
def write_tiger_set(someSet, outDir, name):
    res = []
    for fold in someSet:
        for sentence in fold:
            # print sentence
            elem = etree.fromstring(sentence)
            # print elem
            res.append(elem)
    tree = th.constructTigerXML(res)
    fh = open(os.path.join(outDir, name), 'w')
    fh.write(etree.tostring(tree, encoding='UTF-8'))
    fh.close()