def countWords(self, which): cnt = counts.countWords(self.readText(which)) cnt.printStat(sys.stdout) fname = os.path.join(self.target_, self.name + '-' + which + '.cov.gz') cnt.reportCoverage(uopen(fname, self.encoding, 'w')) fname = os.path.join(self.target_, self.name + '-' + which + '.counts.gz') cnt.exportText(zopen(fname, 'w'))
def parse(self, trsFileName='-'): # add unknown speaker self.spkIdDict['unk'] = self.corpusParser.speaker({ 'id': 'unk', 'type': 'unknown', 'gender': 'unknown' }) self.trsFileName = trsFileName trsFd = zopen(self.trsFileName, 'r') self.parser.parse(trsFd) zclose(trsFd) print trsFileName, '-->'
def parseFile(self, fname): fd = zopen(fname, 'r') data = fd.read() zclose(fd) self.reset() if data.startswith('<?xml'): self.parser.feed(data) else: self.parser.feed('<?xml version="1.0" encoding="ISO-8859-1"?>') self.parser.feed('<sprint>') self.parser.feed(data) self.parser.feed('</sprint>') self.parser.close()
def main(options, args): if len(args) < 2: return counts = loadCounts(args[0]) counts.printStat(sys.stdout) print mapper, newCounts = makeMapperIterated( counts, [mappersByName[what] for what in args[1:]]) if options.map: mapper.store(options.map) if options.counts: newCounts.exportText(zopen(options.counts, 'w'))
def parse(self, path): fd = zopen(path, 'r') # determine encoding # - assume everthing to be in ascii-encoding until <?xml ...> is found # - assume <?xml ...> not to be splitted over several lines history = [] reEncoding = re.compile(r'<\?xml[^>]* encoding="([^"]*)"') try: row = fd.next() history.append(row) while row.find('<?xml') == -1: row = fd.next() except StopIteration: print >> sys.stderr, 'Error: no xml header <?xml ...> found; "' + path + '" is probably not a proper xml-file.' sys.exit(1) m = reEncoding.search(row) if (m): encoding = m.group(1) else: encoding = self.__default_encoding__ print >> sys.stderr, 'Warning: no encoding specified, use "' + encoding + '"' # parse file parser = self.__sax_parser__ try: self.__handler__.startFile(path, encoding) except AttributeError: pass for row in history: parser.feed(row) del history for row in fd: parser.feed(row) try: self.__handler__.endFile(path) except AttributeError: pass zclose(fd)
def accept(self, filename, visitor): self.visitor = visitor self.corpusDir = os.path.dirname(filename) self.parse(zopen(filename).read()) self.visitor = None
def store(self, filename): f = zopen(filename, 'w') for mm in self.maps: for phrase, replacement in mm.iteritems(): print >> f, phrase, ' -> ', replacement
def __init__(self, filename=None): self.maps = [] if filename: self.load(zopen(filename))