def __preprocess_files(self, filesToBePreprocessed, extensionStr, preprocessModule, tempFileSeed=None, options=list(), verbose=False, parseErrorFiles=None): filesToBePreprocessed = list(filesToBePreprocessed) encodingName = None preprocessorOptions = None prepDirs = list() for k, v in options: if k == '-c': encodingName = v elif k == '-r': preprocessorOptions = v elif k == '-n': prepDirs.append(v) cnv = easytorq.ICUConverter() if encodingName: cnv.setencoding(encodingName) prep = preprocessModule.getpreprocessor() if preprocessorOptions: prep.setoptions(preprocessorOptions) if verbose: progressBar = utility.ProgressReporter(len(filesToBePreprocessed)) else: progressBar = utility.ProgressReporter(0) if len(filesToBePreprocessed) > 0: prefetch = FileReader(filesToBePreprocessed[0]) prefetch.start() else: prefetch = None for i in xrange(len(filesToBePreprocessed)): if i + 1 < len(filesToBePreprocessed): nextPrefetch = FileReader(filesToBePreprocessed[i + 1]) nextPrefetch.start() else: nextPrefetch = None prefetch.join() preprocessedFname = prefetch.filename + extensionStr #print "prepDirs=", prepDirs #debug #print "preprocessedFname=", preprocessedFname #debug preprocessedFname = to_filename_in_prepdir(preprocessedFname, prepDirs) #print "preprocessedFname=", preprocessedFname #debug if prefetch.error is not None: print >> sys.stderr, "warning: not found file '%s'" % prefetch.filename else: try: strUtf8 = cnv.decode(prefetch.content) except TypeError: print >> sys.stderr, "error: invalid string (wrong character encoding?) in file '%s'" % prefetch.filename raise parseResult = None try: parseResult = prep.parse(strUtf8) except ValueError, e: if parseErrorFiles is not None: parseErrorFiles.append(prefetch.filename) else: print >> sys.stderr, "error: failure to parse file '%s'" % prefetch.filename raise e if parseResult: preorocessedFnameTemp = preprocessedFname + "-temp" try: f = fopen(preorocessedFnameTemp, "wb") except IOError: try: d = os_path_split(preprocessedFname, self.__syscnv)[0] os.makedirs(d) except: # Rarely, another process makes the directory while this process is trying to make the directory. # In such case, the above mkdirs() fails, since the directory already exists. if not os.path.exists(d): raise # the directory does not exist and this process fails to make the directory f = fopen(preorocessedFnameTemp, "wb") try: f.write(parseResult) f.close() rename_file( preorocessedFnameTemp, preprocessedFname ) # rarely causes an error, the reason is unknown except EnvironmentError, e: print >> sys.stderr, "debug info: preorocessedFnameTemp=%s preprocessedFname=%s" % ( preorocessedFnameTemp, preprocessedFname) remove_file_neglecting_error(preorocessedFnameTemp) raise e progressBar.proceed(i + 1)
def __preprocess_files_by_workers(self, maxWorkerThreads, filesToBePreprocessed, extensionStr, preprocessModule, tempFileSeed=None, options=list(), verbose=False, parseErrorFiles=None): assert maxWorkerThreads >= 2 filesToBePreprocessed = list(filesToBePreprocessed) chunkSize = 200 chunkSizeMax = 2000 s2 = len(filesToBePreprocessed) / 64 if s2 > chunkSize: chunkSize = s2 if chunkSize > chunkSizeMax: chunkSize = chunkSizeMax commands = list() tempFiles = list() fi = 0 while fi < len(filesToBePreprocessed): fiStart, fiEnd = fi, min(fi + chunkSize, len(filesToBePreprocessed)) fi += chunkSize cmd = [sys.executable, __file__, preprocessModule.getname()] for k, v in options: cmd.append(k) cmd.append(v) fn = make_temp_filename(tempFileSeed, self.__syscnv) tempFiles.append(fn) f = fopen(fn, "wb") for i in xrange(fiStart, fiEnd): f.write(filesToBePreprocessed[i]) f.write('\n') f.close() cmd.append('-i') cmd.append(fn) commands.append(cmd) if parseErrorFiles is not None: en = make_temp_filename(tempFileSeed, self.__syscnv) parseErrorFiles.append(en) tempFiles.append(en) cmd.append("--parseerrors=%s" % en) if verbose: progressBar = utility.ProgressReporter(len(commands)) else: progressBar = utility.ProgressReporter(0) doneCount = 0 for index, result in threadingutil.multithreading_iter( invoke_subprocess, commands, maxWorkerThreads): if result != 0: raise RuntimeError, "error in invocation of subprocess" doneCount += 1 #progressBar.proceed(doneCount) if parseErrorFiles is not None: for en in parseErrorFiles: f = fopen(en, "r") if not f: print >> sys.stderr, "error: can't open a temporary file '%s'" % en sys.exit(2) parseErrorFiles.append(f.readlines()) f.close() for fn in tempFiles: remove_file_neglecting_error(fn) progressBar.done()