def getWords(): inputPath = '/home/aishwary/Desktop/ltrc/inference' fileList = ssfAPI.folderWalk(inputPath) newFileList = [] for fileName in fileList: xFileName = fileName.split('/')[-1] if xFileName == 'err.txt' or xFileName.split('.')[-1] in [ 'comments', 'bak' ] or xFileName[:4] == 'task': continue else: newFileList.append(fileName) words = [] for fileName in newFileList: d = ssfAPI.Document(fileName) for tree in d.nodeList: for chunkNode in tree.nodeList: for node in chunkNode.nodeList: words.append(node.type.encode('utf-8')) return words
def readFilesAndComputeHeadOfChunks(inputPath, chunkTagPOSMapping, outputPath): if not os.path.isdir(inputPath): d = ssfAPI.Document(inputPath) headComputedSentences = computeHeadOfChunks(d, chunkTagPOSMapping) writeListToFile(headComputedSentences, outputPath) else: fileList = ssfAPI.folderWalk(inputPath) newFileList = list() for fileName in fileList: xFileName = fileName.split('/')[-1] if xFileName == 'err.txt' or xFileName.split('.')[-1] in ['comments', 'bak'] or xFileName[:4] == 'task': continue else: newFileList.append(fileName) for fl in newFileList: d = ssfAPI.Document(fl) headComputedSentences = computeHeadOfChunks(d, chunkTagPOSMapping) print( fl[fl.rfind('/') + 1:] + '.head') writeListToFile(headComputedSentences, os.path.join(outputPath, fl[fl.rfind('/') + 1:] + '.head'))
return fileList if __name__ == '__main__' : inputPath = sys.argv[1] fileList = folderWalk(inputPath) newFileList = [] for fileName in fileList : xFileName = fileName.split('/')[-1] if xFileName == 'err.txt' or xFileName.split('.')[-1] in ['comments','bak'] or xFileName[:4] == 'task' : continue else : newFileList.append(fileName) for fileName in newFileList : d = ssfAPI.Document(fileName) writeTo = './finalTest/' + fileName.split('/')[-1] f = open(writeTo,'w') #print f sentIndex = 0 for tree in d.nodeList : try : flag_pbrel = 0 tree.populateNodes(naming='strict') tree.populateEdges() for chunkNode in tree.nodeList : if chunkNode.parentPB != '0' : flag_pbrel=1 break if flag_pbrel == 1: string = ""