def writeSamples(sentences, filePrefix, count, tigerFile): tigerSentences = tigerhelper.readTreebankMap(tigerFile, normalize) tigerXML = tigerhelper.TigerHelper(tigerFile) fhM = open(filePrefix + ".meta", "w") fh = open(filePrefix + ".txt", "w") validSamples = 0 sampledTiger = [] while validSamples < count: s = random.choice(sentences) print s sentenceID = lookup(s[2], tigerSentences) if not sentenceID: print "Could not look up sentence %s" % (s,) continue else: validSamples += 1 sampledTiger.append(tigerXML.getSentence(sentenceID)) fhM.write(str(sentenceID)) fhM.write("\n") fh.write(s[2].strip()) fh.write("\n") fh.close() fhM.close() newTiger = tigerhelper.constructTigerXML(sampledTiger) etree.ElementTree(newTiger).write(filePrefix + ".xml")
def main(tigerFile, sentenceList, out): tigerXML = th.TigerHelper(tigerFile) res = [] for sID in sentenceList.split(","): sentenceNode = tigerXML.getSentence(sID) assert sentenceNode is not None res.append(sentenceNode) print res result = th.constructTigerXML(res) etree.ElementTree(result).write(out)
def write_tiger_set(someSet, outDir, name): res = [] for fold in someSet: for sentence in fold: # print sentence elem = etree.fromstring(sentence) # print elem res.append(elem) tree = th.constructTigerXML(res) fh = open(os.path.join(outDir, name), 'w') fh.write(etree.tostring(tree, encoding='UTF-8')) fh.close()