def setupProject(project): USE_STDOUT = True time = ProcessTimeWatch( project, too_slow=3.0, too_slow_score=0.10, too_fast=0.100, too_fast_score=-0.80, ) orig_filename = project.application().getInputFilename("PDF document") if AUTO_MANGLE: mangle = AutoMangle(project, orig_filename) mangle.hard_max_op = 1000 else: mangle = MangleFile(project, orig_filename) mangle.config.max_op = 1000 options = {'timeout': 5.0} if not USE_STDOUT: options['stdout'] = 'null' process = PopplerProcess(project, ['pdftotext'], **options) WatchProcess(process, exitcode_score=-0.10) if USE_STDOUT: stdout = WatchStdout(process) def cleanupLine(line): match = re.match(r"Error(?: \([0-9]+\))?: (.*)", line) if match: line = match.group(1) return line stdout.cleanup_func = cleanupLine del stdout.words['unknown'] # stdout.show_not_matching = True # stdout.ignoreRegex(r"Unknown operator 'allocate'$") # stdout.ignoreRegex(r" operator is wrong type \(error\)$") # stdout.ignoreRegex(r'^No current point in lineto$') # stdout.ignoreRegex(r'^No current point in lineto') # stdout.ignoreRegex(r'^Unknown operator ') # stdout.ignoreRegex(r"^Couldn't open 'nameToUnicode' file ") # stdout.ignoreRegex(r"^Illegal character ") # stdout.ignoreRegex(r"^No font in show$") # stdout.ignoreRegex(r"^Element of show/space array must be number or string$") # stdout.ignoreRegex(r"^No current point in curveto$") # stdout.ignoreRegex(r"^Badly formatted number$") # stdout.ignoreRegex(r"^Dictionary key must be a name object$") # stdout.ignoreRegex(r"^End of file inside array$") # stdout.ignoreRegex(r"^Too few \([0-9]+\) args to .* operator$") # stdout.ignoreRegex(r"Too many args in content stream") stdout.max_nb_line = (100, 0.20)
def setupProject(project): USE_STDOUT = True time = ProcessTimeWatch(project, too_slow=3.0, too_slow_score=0.10, too_fast=0.100, too_fast_score=-0.80, ) orig_filename = project.application().getInputFilename("PDF document") if AUTO_MANGLE: mangle = AutoMangle(project, orig_filename) mangle.hard_max_op = 1000 else: mangle = MangleFile(project, orig_filename) mangle.config.max_op = 1000 options = {'timeout': 5.0} if not USE_STDOUT: options['stdout'] = 'null' process = PopplerProcess(project, ['pdftotext'], **options) WatchProcess(process, exitcode_score=-0.10) if USE_STDOUT: stdout = WatchStdout(process) def cleanupLine(line): match = re.match(r"Error(?: \([0-9]+\))?: (.*)", line) if match: line = match.group(1) return line stdout.cleanup_func = cleanupLine del stdout.words['unknown'] # stdout.show_not_matching = True # stdout.ignoreRegex(r"Unknown operator 'allocate'$") # stdout.ignoreRegex(r" operator is wrong type \(error\)$") # stdout.ignoreRegex(r'^No current point in lineto$') # stdout.ignoreRegex(r'^No current point in lineto') # stdout.ignoreRegex(r'^Unknown operator ') # stdout.ignoreRegex(r"^Couldn't open 'nameToUnicode' file ") # stdout.ignoreRegex(r"^Illegal character ") # stdout.ignoreRegex(r"^No font in show$") # stdout.ignoreRegex(r"^Element of show/space array must be number or string$") # stdout.ignoreRegex(r"^No current point in curveto$") # stdout.ignoreRegex(r"^Badly formatted number$") # stdout.ignoreRegex(r"^Dictionary key must be a name object$") # stdout.ignoreRegex(r"^End of file inside array$") # stdout.ignoreRegex(r"^Too few \([0-9]+\) args to .* operator$") # stdout.ignoreRegex(r"Too many args in content stream") stdout.max_nb_line = (100, 0.20)