def test_rotating_log(self): """ test rotating via proxy """ open("/tmp/lg.log", "w").close() args={} args["file_name"] = "/tmp/lg.log" args["rotating"] = True args["maxBytes"]=20000 args["backupCount"]=10 #args["level"]= logging.INFO (my_log, logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger, "my_logger", args) with logging_mutex: my_log.debug('This is a debug message') my_log.info('This is an info message') my_log.warning('This is a warning message') my_log.error('This is an error message') my_log.critical('This is a critical error message') my_log.log(logging.ERROR, 'This is a debug message') with open("/tmp/lg.log") as ii: self.assertTrue(ii.read() == \ """This is a warning message This is an error message This is a critical error message This is a debug message """)
def test_rotating_log(self): """ test rotating via proxy """ open("/tmp/lg.log", "w").close() args = {} args["file_name"] = "/tmp/lg.log" args["rotating"] = True args["maxBytes"] = 20000 args["backupCount"] = 10 args["level"]= logging.INFO (my_log, logging_mutex) = make_shared_logger_and_proxy( setup_std_shared_logger, "my_logger", args) with logging_mutex: my_log.debug('This is a debug message') my_log.info('This is an info message') my_log.warning('This is a warning message') my_log.error('This is an error message') my_log.critical('This is a critical error message') my_log.log(logging.ERROR, 'This is a debug message') with open("/tmp/lg.log") as ii: data = ii.readlines() self.assertEqual(data, ["This is an info message\n", "This is a warning message\n", "This is an error message\n", "This is a critical error message\n", "This is a debug message\n"])
def ruffus_logger(options=None, module_name='pipeline'): 'creates a shared logger and mutex' if options is None: options = DefaultLog() logger = logging.getLogger(module_name) _setup_std_logging(logger, options.log_file, options.verbose) def get_logger (logger_name, args): return logger (logger_proxy, logging_mutex) = make_shared_logger_and_proxy (get_logger, module_name, {}) logger_proxy.log_file = options.log_file return logger_proxy, logging_mutex
def configure(config, args): """ Setup runtime from config module/dict and command line args Parameters ---------- config: dict or Namespace Hold configurations used to initialize ApusConfig object args: list list of arguments to be passed to Ruffus.cmdline module Returns ------- apusconf: ApusConfig Hold configurations of the Apus option: Namespace Hold parsed command line arguments """ if isinstance(config, dict): apusconf = ApusConfig(**config) else: apusconf = ApusConfig(config=config) parser = cmdline.get_argparse(description=""" +- Astronomy Pipeline Using ruffuS, specifically tweaked for PostCalib -+ """, version=ruffus.__version__, prog='postcalib run ... -a ') parser.add_argument('-r', '--redo-all', action='store_true', help='force redo all tasks') parser.add_argument('-l', '--list-tasks', action='store_true', help='list the task names and exit') parser.set_defaults(verbose=[ '0', ], log_file=os.path.join(apusconf.logdir, apusconf.log_file), history_file=os.path.join(apusconf.logdir, apusconf.history_file)) option = parser.parse_args(args) # handle logger logger, logger_mutex = make_shared_logger_and_proxy( logger_factory, apusconf.jobkey, [option.log_file, option.verbose]) apusconf.logger = logger apusconf.logger_mutex = logger_mutex return apusconf, option
def ruffus_logger(options=None, module_name='pipeline'): 'creates a shared logger and mutex' if options is None: options = DefaultLog() logger = logging.getLogger(module_name) _setup_std_logging(logger, options.log_file, options.verbose) def get_logger(logger_name, args): return logger (logger_proxy, logging_mutex) = make_shared_logger_and_proxy(get_logger, module_name, {}) logger_proxy.log_file = options.log_file return logger_proxy, logging_mutex
def get_logger(self, logger_name, log_file): ''' Returns a shared logger and proxy ''' # the log file should be this format: # '/<project_path>/<pipeline_name>_<run_id>.log' logger_args = {} logger_args["file_name"] = log_file logger_args["level"] = logging.DEBUG logger_args["rotating"] = True logger_args["maxBytes"] = 10000000 logger_args["backupCount"] = 10 logger_args["formatter"] = "[%(asctime)s] [%(name)s] [%(levelname)s]:\t%(message)s" logger_proxy, logger_mutex = make_shared_logger_and_proxy (setup_std_shared_logger, logger_name, logger_args) return [logger_proxy, logger_mutex]
def get_logger(self, logger_name, log_file): ''' Returns a shared logger and proxy ''' # the log file should be this format: # '/<project_path>/<pipeline_name>_<run_id>.log' logger_args = {} logger_args["file_name"] = log_file logger_args["level"] = logging.DEBUG logger_args["rotating"] = True logger_args["maxBytes"] = 10000000 logger_args["backupCount"] = 10 logger_args[ "formatter"] = "[%(asctime)s] [%(name)s] [%(levelname)s]:\t%(message)s" logger_proxy, logger_mutex = make_shared_logger_and_proxy( setup_std_shared_logger, logger_name, logger_args) return [logger_proxy, logger_mutex]
def run_pipeline(): options = parser.parse_args() options.verbose_abbreviated_path = 1 if not check_closed_streams(options): return ExitCode.bad_args logger_args = {'verbose': options.verbose, 'quiet': options.quiet} _log, _log_mutex = proxy_logger.make_shared_logger_and_proxy( logging_factory, __name__, logger_args) _log.debug('ocrmypdf ' + VERSION) _log.debug('tesseract ' + tesseract.version()) check_options(options, _log) # Any changes to options will not take effect for options that are already # bound to function parameters in the pipeline. (For example # options.input_file, options.pdf_renderer are already bound.) if not options.jobs: options.jobs = available_cpu_count() try: work_folder = mkdtemp(prefix="com.github.ocrmypdf.") options.history_file = os.path.join( work_folder, 'ruffus_history.sqlite') start_input_file = os.path.join( work_folder, 'origin') if options.input_file == '-': # stdin _log.info('reading file from standard input') with open(start_input_file, 'wb') as stream_buffer: from shutil import copyfileobj copyfileobj(sys.stdin.buffer, stream_buffer) else: try: re_symlink(options.input_file, start_input_file, _log) except FileNotFoundError: _log.error("File not found - " + options.input_file) return ExitCode.input_file if options.output_file == '-': if sys.stdout.isatty(): _log.error(textwrap.dedent("""\ Output was set to stdout '-' but it looks like stdout is connected to a terminal. Please redirect stdout to a file.""")) return ExitCode.bad_args elif not is_file_writable(options.output_file): _log.error(textwrap.dedent("""\ Output file location is not writable.""")) return ExitCode.file_access_error manager = JobContextManager() manager.register('JobContext', JobContext) manager.start() context = manager.JobContext() context.set_options(options) context.set_work_folder(work_folder) build_pipeline(options, work_folder, _log, context) atexit.register(cleanup_working_files, work_folder, options) cmdline.run(options) except ruffus_exceptions.RethrownJobError as e: if options.verbose: _log.debug(str(e)) # stringify exception so logger doesn't have to # Ruffus flattens exception to 5 element tuples. Because of a bug # in <= 2.6.3 it may present either the single: # (task, job, exc, value, stack) # or something like: # [[(task, job, exc, value, stack)]] # # Generally cross-process exception marshalling doesn't work well # and ruffus doesn't support because BaseException has its own # implementation of __reduce__ that attempts to reconstruct the # exception based on e.__init__(e.args). # # Attempting to log the exception directly marshalls it to the logger # which is probably in another process, so it's better to log only # data from the exception at this point. exitcode = traverse_ruffus_exception(e.args, options, _log) if exitcode is None: _log.error("Unexpected ruffus exception: " + str(e)) _log.error(repr(e)) return ExitCode.other_error else: return exitcode except ExitCodeException as e: return e.exit_code except Exception as e: _log.error(e) return ExitCode.other_error if options.flowchart: _log.info("Flowchart saved to {}".format(options.flowchart)) elif options.output_file != '-': if options.output_type == 'pdfa': pdfa_info = file_claims_pdfa(options.output_file) if pdfa_info['pass']: msg = 'Output file is a {} (as expected)' _log.info(msg.format(pdfa_info['conformance'])) else: msg = 'Output file is okay but is not PDF/A (seems to be {})' _log.warning(msg.format(pdfa_info['conformance'])) return ExitCode.invalid_output_pdf if not qpdf.check(options.output_file, _log): _log.warning('Output file: The generated PDF is INVALID') return ExitCode.invalid_output_pdf else: _log.info("Output sent to stdout") pdfinfo = context.get_pdfinfo() if options.verbose: from pprint import pformat _log.debug(pformat(pdfinfo)) direction = {0: 'n', 90: 'e', 180: 's', 270: 'w'} orientations = [] for n, page in enumerate(pdfinfo): angle = pdfinfo[n].rotation or 0 if angle != 0: orientations.append('{0}{1}'.format( n + 1, direction.get(angle, ''))) if orientations: _log.info('Page orientations detected: ' + ' '.join(orientations)) return ExitCode.ok
root_logger = logging.getLogger(logger_name) root_logger.setLevel(logging.DEBUG) handler = logging.StreamHandler(sys.stderr) formatter_ = logging.Formatter("%(levelname)7s - %(message)s") handler.setFormatter(formatter_) if verbose: handler.setLevel(logging.DEBUG) else: handler.setLevel(logging.INFO) root_logger.addHandler(handler) return root_logger _logger, _logger_mutex = proxy_logger.make_shared_logger_and_proxy( logging_factory, __name__, [None, options.verbose]) class WrappedLogger: def __init__(self, my_logger, my_mutex): self.logger = my_logger self.mutex = my_mutex def log(self, *args, **kwargs): with self.mutex: self.logger.log(*args, **kwargs) def debug(self, *args, **kwargs): with self.mutex: self.logger.debug(*args, **kwargs)
log_file_name, verbose = listargs root_logger = logging.getLogger(logger_name) root_logger.setLevel(logging.DEBUG) handler = logging.StreamHandler(sys.stderr) formatter_ = logging.Formatter("%(levelname)7s - %(message)s") handler.setFormatter(formatter_) if verbose: handler.setLevel(logging.DEBUG) else: handler.setLevel(logging.INFO) root_logger.addHandler(handler) return root_logger _logger, _logger_mutex = proxy_logger.make_shared_logger_and_proxy( logging_factory, __name__, [None, options.verbose]) class WrappedLogger: def __init__(self, my_logger, my_mutex): self.logger = my_logger self.mutex = my_mutex def log(self, *args, **kwargs): with self.mutex: self.logger.log(*args, **kwargs) def debug(self, *args, **kwargs): with self.mutex: self.logger.debug(*args, **kwargs)
from ruffus import pipeline_run, pipeline_printout, Pipeline, parallel, proxy_logger def logging_factory(logger_name, listargs): root_logger = logging.getLogger(logger_name) root_logger.setLevel(logging.DEBUG) handler = logging.StreamHandler(sys.stderr) formatter_ = logging.Formatter("%(levelname)7s - %(message)s") handler.setFormatter(formatter_) handler.setLevel(logging.INFO) root_logger.addHandler(handler) return root_logger log, log_mutex = proxy_logger.make_shared_logger_and_proxy( logging_factory, __name__, []) #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # Tasks #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 @parallel([['A', 1], ['B', 3], ['C', 3], ['D', 4], ['E', 4], ['F', 4]]) def parallel_task(name, param1): sys.stderr.write(" Parallel task %s: \n" % name) #raise task.JobSignalledBreak("Oops! I did it again!") with log_mutex: log.info(" Raising exception") raise Exception("new")
def run_pipeline(): options = parser.parse_args() options.verbose_abbreviated_path = 1 print("Inside of options is: " + options) if not check_closed_streams(options): return ExitCode.bad_args logger_args = {'verbose': options.verbose, 'quiet': options.quiet} _log, _log_mutex = proxy_logger.make_shared_logger_and_proxy( logging_factory, __name__, logger_args) preamble(_log) check_options(options, _log) # Complain about qpdf version < 7.0.0 # Suppress the warning if in the test suite, since there are no PPAs # for qpdf 7.0.0 for Ubuntu trusty (i.e. Travis) if qpdf.version() < '7.0.0' and not os.environ.get('PYTEST_CURRENT_TEST'): complain( "You are using qpdf version {0} which has known issues including " "security vulnerabilities with certain malformed PDFs. Consider " "upgrading to version 7.0.0 or newer.".format(qpdf.version())) # Any changes to options will not take effect for options that are already # bound to function parameters in the pipeline. (For example # options.input_file, options.pdf_renderer are already bound.) if not options.jobs: options.jobs = available_cpu_count() # Performance is improved by setting Tesseract to single threaded. In tests # this gives better throughput than letting a smaller number of Tesseract # jobs run multithreaded. Same story for pngquant. Tess <4 ignores this # variable, but harmless to set if ignored. os.environ.setdefault('OMP_THREAD_LIMIT', '1') check_environ(options, _log) if os.environ.get('PYTEST_CURRENT_TEST'): os.environ['_OCRMYPDF_TEST_INFILE'] = options.input_file try: work_folder = mkdtemp(prefix="com.github.ocrmypdf.") options.history_file = os.path.join( work_folder, 'ruffus_history.sqlite') start_input_file = os.path.join( work_folder, 'origin') check_input_file(options, _log, start_input_file) check_requested_output_file(options, _log) manager = JobContextManager() manager.register('JobContext', JobContext) # pylint: disable=no-member manager.start() context = manager.JobContext() # pylint: disable=no-member context.set_options(options) context.set_work_folder(work_folder) build_pipeline(options, work_folder, _log, context) atexit.register(cleanup_working_files, work_folder, options) cmdline.run(options) except ruffus_exceptions.RethrownJobError as e: if options.verbose: _log.debug(str(e)) # stringify exception so logger doesn't have to exceptions = e.job_exceptions exitcode = traverse_ruffus_exception(exceptions, options, _log) if exitcode is None: _log.error("Unexpected ruffus exception: " + str(e)) _log.error(repr(e)) return ExitCode.other_error return exitcode except ExitCodeException as e: return e.exit_code except Exception as e: _log.error(str(e)) return ExitCode.other_error if options.flowchart: _log.info("Flowchart saved to {}".format(options.flowchart)) return ExitCode.ok elif options.output_file == '-': _log.info("Output sent to stdout") elif os.path.samefile(options.output_file, os.devnull): pass # Say nothing when sending to dev null else: if options.output_type.startswith('pdfa'): pdfa_info = file_claims_pdfa(options.output_file) if pdfa_info['pass']: msg = 'Output file is a {} (as expected)' _log.info(msg.format(pdfa_info['conformance'])) else: msg = 'Output file is okay but is not PDF/A (seems to be {})' _log.warning(msg.format(pdfa_info['conformance'])) return ExitCode.pdfa_conversion_failed if not qpdf.check(options.output_file, _log): _log.warning('Output file: The generated PDF is INVALID') return ExitCode.invalid_output_pdf report_output_file_size(options, _log, start_input_file, options.output_file) pdfinfo = context.get_pdfinfo() if options.verbose: from pprint import pformat _log.debug(pformat(pdfinfo)) log_page_orientations(pdfinfo, _log) return ExitCode.ok
def run_pipeline(): options = parser.parse_args() options.verbose_abbreviated_path = 1 if not check_closed_streams(options): return ExitCode.bad_args logger_args = {'verbose': options.verbose, 'quiet': options.quiet} _log, _log_mutex = proxy_logger.make_shared_logger_and_proxy( logging_factory, __name__, logger_args) _log.debug('ocrmypdf ' + VERSION) _log.debug('tesseract ' + tesseract.version()) _log.debug('qpdf ' + qpdf.version()) check_options(options, _log) PIL.Image.MAX_IMAGE_PIXELS = int(options.max_image_mpixels * 1000000) if PIL.Image.MAX_IMAGE_PIXELS == 0: PIL.Image.MAX_IMAGE_PIXELS = None # Complain about qpdf version < 7.0.0 # Suppress the warning if in the test suite, since there are no PPAs # for qpdf 7.0.0 for Ubuntu trusty (i.e. Travis) if qpdf.version() < '7.0.0' and not os.environ.get('PYTEST_CURRENT_TEST'): complain( "You are using qpdf version {0} which has known issues including " "security vulnerabilities with certain malformed PDFs. Consider " "upgrading to version 7.0.0 or newer.".format(qpdf.version())) # Any changes to options will not take effect for options that are already # bound to function parameters in the pipeline. (For example # options.input_file, options.pdf_renderer are already bound.) if not options.jobs: options.jobs = available_cpu_count() try: work_folder = mkdtemp(prefix="com.github.ocrmypdf.") options.history_file = os.path.join(work_folder, 'ruffus_history.sqlite') start_input_file = os.path.join(work_folder, 'origin') if options.input_file == '-': # stdin _log.info('reading file from standard input') with open(start_input_file, 'wb') as stream_buffer: from shutil import copyfileobj copyfileobj(sys.stdin.buffer, stream_buffer) else: try: re_symlink(options.input_file, start_input_file, _log) except FileNotFoundError: _log.error("File not found - " + options.input_file) return ExitCode.input_file if options.output_file == '-': if sys.stdout.isatty(): _log.error( textwrap.dedent("""\ Output was set to stdout '-' but it looks like stdout is connected to a terminal. Please redirect stdout to a file.""")) return ExitCode.bad_args elif not is_file_writable(options.output_file): _log.error("Output file location (" + options.output_file + ") " + "is not a writable file.") return ExitCode.file_access_error manager = JobContextManager() manager.register('JobContext', JobContext) # pylint: disable=no-member manager.start() context = manager.JobContext() # pylint: disable=no-member context.set_options(options) context.set_work_folder(work_folder) build_pipeline(options, work_folder, _log, context) atexit.register(cleanup_working_files, work_folder, options) cmdline.run(options) except ruffus_exceptions.RethrownJobError as e: if options.verbose: _log.debug(str(e)) # stringify exception so logger doesn't have to # Ruffus flattens exception to 5 element tuples. Because of a bug # in <= 2.6.3 it may present either the single: # (task, job, exc, value, stack) # or something like: # [[(task, job, exc, value, stack)]] # # Generally cross-process exception marshalling doesn't work well # and ruffus doesn't support because BaseException has its own # implementation of __reduce__ that attempts to reconstruct the # exception based on e.__init__(e.args). # # Attempting to log the exception directly marshalls it to the logger # which is probably in another process, so it's better to log only # data from the exception at this point. exitcode = traverse_ruffus_exception(e.args, options, _log) if exitcode is None: _log.error("Unexpected ruffus exception: " + str(e)) _log.error(repr(e)) return ExitCode.other_error return exitcode except ExitCodeException as e: return e.exit_code except Exception as e: _log.error(e) return ExitCode.other_error if options.flowchart: _log.info("Flowchart saved to {}".format(options.flowchart)) elif options.output_file == '-': _log.info("Output sent to stdout") elif os.path.samefile(options.output_file, os.devnull): pass # Say nothing when sending to dev null else: if options.output_type.startswith('pdfa'): pdfa_info = file_claims_pdfa(options.output_file) if pdfa_info['pass']: msg = 'Output file is a {} (as expected)' _log.info(msg.format(pdfa_info['conformance'])) else: msg = 'Output file is okay but is not PDF/A (seems to be {})' _log.warning(msg.format(pdfa_info['conformance'])) return ExitCode.invalid_output_pdf if not qpdf.check(options.output_file, _log): _log.warning('Output file: The generated PDF is INVALID') return ExitCode.invalid_output_pdf pdfinfo = context.get_pdfinfo() if options.verbose: from pprint import pformat _log.debug(pformat(pdfinfo)) log_page_orientations(pdfinfo, _log) return ExitCode.ok
def run_pipeline(): options = parser.parse_args() options.verbose_abbreviated_path = 1 if not check_closed_streams(options): return ExitCode.bad_args logger_args = {'verbose': options.verbose, 'quiet': options.quiet} _log, _log_mutex = proxy_logger.make_shared_logger_and_proxy( logging_factory, __name__, logger_args) _log.debug('ocrmypdf ' + VERSION) _log.debug('tesseract ' + tesseract.version()) _log.debug('qpdf ' + qpdf.version()) check_options(options, _log) PIL.Image.MAX_IMAGE_PIXELS = int(options.max_image_mpixels * 1000000) if PIL.Image.MAX_IMAGE_PIXELS == 0: PIL.Image.MAX_IMAGE_PIXELS = None # Complain about qpdf version < 7.0.0 # Suppress the warning if in the test suite, since there are no PPAs # for qpdf 7.0.0 for Ubuntu trusty (i.e. Travis) if qpdf.version() < '7.0.0' and not os.environ.get('PYTEST_CURRENT_TEST'): complain( "You are using qpdf version {0} which has known issues including " "security vulnerabilities with certain malformed PDFs. Consider " "upgrading to version 7.0.0 or newer.".format(qpdf.version())) # Any changes to options will not take effect for options that are already # bound to function parameters in the pipeline. (For example # options.input_file, options.pdf_renderer are already bound.) if not options.jobs: options.jobs = available_cpu_count() try: work_folder = mkdtemp(prefix="com.github.ocrmypdf.") options.history_file = os.path.join( work_folder, 'ruffus_history.sqlite') start_input_file = os.path.join( work_folder, 'origin') if options.input_file == '-': # stdin _log.info('reading file from standard input') with open(start_input_file, 'wb') as stream_buffer: from shutil import copyfileobj copyfileobj(sys.stdin.buffer, stream_buffer) else: try: re_symlink(options.input_file, start_input_file, _log) except FileNotFoundError: _log.error("File not found - " + options.input_file) return ExitCode.input_file if options.output_file == '-': if sys.stdout.isatty(): _log.error(textwrap.dedent("""\ Output was set to stdout '-' but it looks like stdout is connected to a terminal. Please redirect stdout to a file.""")) return ExitCode.bad_args elif not is_file_writable(options.output_file): _log.error( "Output file location (" + options.output_file + ") " + "is not a writable file.") return ExitCode.file_access_error manager = JobContextManager() manager.register('JobContext', JobContext) # pylint: disable=no-member manager.start() context = manager.JobContext() # pylint: disable=no-member context.set_options(options) context.set_work_folder(work_folder) build_pipeline(options, work_folder, _log, context) atexit.register(cleanup_working_files, work_folder, options) cmdline.run(options) except ruffus_exceptions.RethrownJobError as e: if options.verbose: _log.debug(str(e)) # stringify exception so logger doesn't have to # Ruffus flattens exception to 5 element tuples. Because of a bug # in <= 2.6.3 it may present either the single: # (task, job, exc, value, stack) # or something like: # [[(task, job, exc, value, stack)]] # # Generally cross-process exception marshalling doesn't work well # and ruffus doesn't support because BaseException has its own # implementation of __reduce__ that attempts to reconstruct the # exception based on e.__init__(e.args). # # Attempting to log the exception directly marshalls it to the logger # which is probably in another process, so it's better to log only # data from the exception at this point. exitcode = traverse_ruffus_exception(e.args, options, _log) if exitcode is None: _log.error("Unexpected ruffus exception: " + str(e)) _log.error(repr(e)) return ExitCode.other_error return exitcode except ExitCodeException as e: return e.exit_code except Exception as e: _log.error(e) return ExitCode.other_error if options.flowchart: _log.info("Flowchart saved to {}".format(options.flowchart)) elif options.output_file == '-': _log.info("Output sent to stdout") elif os.path.samefile(options.output_file, os.devnull): pass # Say nothing when sending to dev null else: if options.output_type.startswith('pdfa'): pdfa_info = file_claims_pdfa(options.output_file) if pdfa_info['pass']: msg = 'Output file is a {} (as expected)' _log.info(msg.format(pdfa_info['conformance'])) else: msg = 'Output file is okay but is not PDF/A (seems to be {})' _log.warning(msg.format(pdfa_info['conformance'])) return ExitCode.invalid_output_pdf if not qpdf.check(options.output_file, _log): _log.warning('Output file: The generated PDF is INVALID') return ExitCode.invalid_output_pdf pdfinfo = context.get_pdfinfo() if options.verbose: from pprint import pformat _log.debug(pformat(pdfinfo)) log_page_orientations(pdfinfo, _log) return ExitCode.ok
def run_pipeline(args=None): options = parser.parse_args(args=args) options.verbose_abbreviated_path = 1 if os.environ.get('_OCRMYPDF_THREADS'): options.use_threads = True if not check_closed_streams(options): return ExitCode.bad_args logger_args = {'verbose': options.verbose, 'quiet': options.quiet} _log, _log_mutex = proxy_logger.make_shared_logger_and_proxy( logging_factory, __name__, logger_args ) preamble(_log) check_options(options, _log) check_dependency_versions(options, _log) # Any changes to options will not take effect for options that are already # bound to function parameters in the pipeline. (For example # options.input_file, options.pdf_renderer are already bound.) if not options.jobs: options.jobs = available_cpu_count() # Performance is improved by setting Tesseract to single threaded. In tests # this gives better throughput than letting a smaller number of Tesseract # jobs run multithreaded. Same story for pngquant. Tess <4 ignores this # variable, but harmless to set if ignored. os.environ.setdefault('OMP_THREAD_LIMIT', '1') check_environ(options, _log) if os.environ.get('PYTEST_CURRENT_TEST'): os.environ['_OCRMYPDF_TEST_INFILE'] = options.input_file try: work_folder = mkdtemp(prefix="com.github.ocrmypdf.") options.history_file = os.path.join(work_folder, 'ruffus_history.sqlite') start_input_file = os.path.join(work_folder, 'origin') check_input_file(options, _log, start_input_file) check_requested_output_file(options, _log) manager = JobContextManager() manager.register('JobContext', JobContext) # pylint: disable=no-member manager.start() context = manager.JobContext() # pylint: disable=no-member context.set_options(options) context.set_work_folder(work_folder) build_pipeline(options, work_folder, _log, context) atexit.register(cleanup_working_files, work_folder, options) if hasattr(os, 'nice'): os.nice(5) cmdline.run(options) except ruffus_exceptions.RethrownJobError as e: if options.verbose: _log.debug(str(e)) # stringify exception so logger doesn't have to exceptions = e.job_exceptions exitcode = traverse_ruffus_exception(exceptions, options, _log) if exitcode is None: _log.error("Unexpected ruffus exception: " + str(e)) _log.error(repr(e)) return ExitCode.other_error return exitcode except ExitCodeException as e: return e.exit_code except Exception as e: _log.error(str(e)) return ExitCode.other_error if options.flowchart: _log.info(f"Flowchart saved to {options.flowchart}") return ExitCode.ok elif options.output_file == '-': _log.info("Output sent to stdout") elif os.path.samefile(options.output_file, os.devnull): pass # Say nothing when sending to dev null else: if options.output_type.startswith('pdfa'): pdfa_info = file_claims_pdfa(options.output_file) if pdfa_info['pass']: msg = f"Output file is a {pdfa_info['conformance']} (as expected)" _log.info(msg) else: msg = f"Output file is okay but is not PDF/A (seems to be {pdfa_info['conformance']})" _log.warning(msg) return ExitCode.pdfa_conversion_failed if not qpdf.check(options.output_file, _log): _log.warning('Output file: The generated PDF is INVALID') return ExitCode.invalid_output_pdf report_output_file_size(options, _log, start_input_file, options.output_file) pdfinfo = context.get_pdfinfo() if options.verbose: from pprint import pformat _log.debug(pformat(pdfinfo)) log_page_orientations(pdfinfo, _log) return ExitCode.ok
The only way around this is to only make calls to multiprocessing (i.e. make_shared_logger_and_proxy(...)) after the import phase of module loading. This python bug will be triggered if your make_shared_logger_and_proxy() call is at global scope in a module (i.e. not __main__) and only for python version 3.2 888888888888888888888888888888888888888888888888888888888888888888888888888 """) sys.exit() (logger_proxy, logging_mutex) = make_shared_logger_and_proxy(setup_std_shared_logger, "my_logger", args) # # task1 # @originate(input_file_names, logger_proxy, logging_mutex) def task1(outfile, logger_proxy, logging_mutex): write_input_output_filenames_to_output(None, outfile, logger_proxy, logging_mutex) # # task2 # @transform(task1, suffix(".1"), ".2", logger_proxy, logging_mutex)
The only way around this is to only make calls to multiprocessing (i.e. make_shared_logger_and_proxy(...)) after the import phase of module loading. This python bug will be triggered if your make_shared_logger_and_proxy() call is at global scope in a module (i.e. not __main__) and only for python version 3.2 888888888888888888888888888888888888888888888888888888888888888888888888888 """) sys.exit() (logger_proxy, logging_mutex) = make_shared_logger_and_proxy(setup_std_shared_logger, "my_logger", args) # # task1 # @originate(input_file_names, logger_proxy, logging_mutex) def task1(outfile, logger_proxy, logging_mutex): write_input_output_filenames_to_output( None, outfile, logger_proxy, logging_mutex) # # task2 # @transform(task1, suffix(".1"), ".2", logger_proxy, logging_mutex)
import ruffus from ruffus import pipeline_run, pipeline_printout, Pipeline, parallel, proxy_logger def logging_factory(logger_name, listargs): root_logger = logging.getLogger(logger_name) root_logger.setLevel(logging.DEBUG) handler = logging.StreamHandler(sys.stderr) formatter_ = logging.Formatter("%(levelname)7s - %(message)s") handler.setFormatter(formatter_) handler.setLevel(logging.INFO) root_logger.addHandler(handler) return root_logger log, log_mutex = proxy_logger.make_shared_logger_and_proxy( logging_factory, __name__, []) #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # Tasks #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 @parallel([['A', 1], ['B',3], ['C',3], ['D',4], ['E',4], ['F',4]]) def parallel_task(name, param1): sys.stderr.write(" Parallel task %s: \n" % name) #raise task.JobSignalledBreak("Oops! I did it again!") with log_mutex: log.info(" Raising exception")