Python run Beispiele, ruffus.cmdline.run Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: main.py Projekt: clarionprogrammer/OCRmyPDF

def run_pipeline():
    if not options.jobs or options.jobs == 1:
        options.jobs = available_cpu_count()

    try:
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            print(e)

        # Yuck. Hunt through the ruffus exception to find out what the
        # return code is supposed to be.
        for exc in e.args:
            task_name, job_name, exc_name, exc_value, exc_stack = exc
            if exc_name == 'builtins.SystemExit':
                return eval(
                    exc_value,
                    {'ExitCode': ExitCode}, {'exc_value': exc_value})
        return ExitCode.other_error

    if not validate_pdfa(options.output_file, _log):
        _log.warning('Output file: The generated PDF/A file is INVALID')
        return ExitCode.invalid_output_pdfa

    return ExitCode.ok

Beispiel #2

0

Datei anzeigen

def bootstrap(config=None, option=None):
    """entry point; parse command line argument, create pipeline object,
    and run it
    """
    print("+- Apus powered by Ruffus ver {0} -+".format(ruffus.__version__))
    if config is None:
        config = sys.modules['__main__']
    if option is None:
        option = sys.argv[1:]
    config, option = configure(config, option)
    if option.list_tasks:
        tlist = config.get_task_names()
        if not tlist:
            print("no tasks found")
        else:
            for t in tlist:
                print("{0}".format(t))
        sys.exit(0)
    # set up astromatic config
    config.am = am.AmConfig(**config.env_overrides)
    build_pipeline(config)
    # handle redo-all
    if option.redo_all:
        task_list = ruffus.pipeline_get_task_names()
        option.forced_tasks.extend(task_list)
    if len(option.forced_tasks) > 0:
        for t in option.forced_tasks:
            config.logger.info("forced redo: {0}".format(utils.alert(t)))
    cmdline.run(option, checksum_level=1)

Beispiel #3

0

Datei anzeigen

Datei: main.py Projekt: bjpop/lynch_gatk

def main():
    '''Initialise the pipeline, then run it'''
    # Parse command line arguments
    options = parse_command_line()
    # Initialise the logger
    logger = Logger(__name__, options.log_file, options.verbose)
    # Log the command line used to run the pipeline
    logger.info(' '.join(sys.argv))
    drmaa_session = None
    try:
        # Set up the DRMAA session for running cluster jobs
        import drmaa
        drmaa_session = drmaa.Session()
        drmaa_session.initialize()
    except Exception as e:
        print("{progname} error using DRMAA library".format(progname=program_name), file=sys.stdout)
        print("Error message: {msg}".format(msg=e.message, file=sys.stdout))
        exit(error_codes.DRMAA_ERROR)
    # Parse the configuration file, and initialise global state
    config = Config(options.config)
    config.validate()
    state = State(options=options, config=config, logger=logger,
                  drmaa_session=drmaa_session)
    # Build the pipeline workflow
    pipeline = make_pipeline(state)
    # Run (or print) the pipeline
    cmdline.run(options)
    if drmaa_session is not None:
        # Shut down the DRMAA session
        drmaa_session.exit()

Beispiel #4

0

Datei anzeigen

Datei: main.py Projekt: mawekuwe/OCRmyPDF

def run_pipeline():
    if not options.jobs or options.jobs == 1:
        options.jobs = available_cpu_count()

    try:
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            print(e)

        # Yuck. Hunt through the ruffus exception to find out what the
        # return code is supposed to be.
        for exc in e.args:
            task_name, job_name, exc_name, exc_value, exc_stack = exc
            if exc_name == 'builtins.SystemExit':
                return eval(
                    exc_value,
                    {'ExitCode': ExitCode}, {'exc_value': exc_value})
        return ExitCode.other_error

    if not validate_pdfa(options.output_file, _log):
        _log.warning('Output file: The generated PDF/A file is INVALID')
        return ExitCode.invalid_output_pdfa

    return ExitCode.ok

Beispiel #5

0

Datei anzeigen

Datei: main.py Projekt: khalidm/hiplexpipe_somatic

def main():
    '''Initialise the pipeline, then run it'''
    # Parse command line arguments
    options = parse_command_line()
    # Initialise the logger
    logger = Logger(__name__, options.log_file, options.verbose)
    # Log the command line used to run the pipeline
    logger.info(' '.join(sys.argv))
    drmaa_session = None
    try:
        # Set up the DRMAA session for running cluster jobs
        import drmaa
        drmaa_session = drmaa.Session()
        drmaa_session.initialize()
    except Exception as e:
        print("{progname} error using DRMAA library".format(
            progname=program_name),
              file=sys.stdout)
        print("Error message: {msg}".format(msg=e.message, file=sys.stdout))
        exit(error_codes.DRMAA_ERROR)
    # Parse the configuration file, and initialise global state
    config = Config(options.config)
    config.validate()
    state = State(options=options,
                  config=config,
                  logger=logger,
                  drmaa_session=drmaa_session)
    # Build the pipeline workflow
    pipeline = make_pipeline(state)
    # Run (or print) the pipeline
    cmdline.run(options)
    if drmaa_session is not None:
        # Shut down the DRMAA session
        drmaa_session.exit()

Beispiel #6

0

Datei anzeigen

Datei: main.py Projekt: concepz/OCRmyPDF

def run_pipeline():
    if not options.jobs:
        options.jobs = available_cpu_count()
    try:
        options.history_file = os.path.join(
            work_folder, 'ruffus_history.sqlite')
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            _log.debug(str(e))  # stringify exception so logger doesn't have to

        # Ruffus flattens exception to 5 element tuples. Because of a bug
        # in <= 2.6.3 it may present either the single:
        #   (task, job, exc, value, stack)
        # or something like:
        #   [[(task, job, exc, value, stack)]]
        #
        # Generally cross-process exception marshalling doesn't work well
        # and ruffus doesn't support because BaseException has its own
        # implementation of __reduce__ that attempts to reconstruct the
        # exception based on e.__init__(e.args).
        #
        # Attempting to log the exception directly marshalls it to the logger
        # which is probably in another process, so it's better to log only
        # data from the exception at this point.

        exitcode = traverse_ruffus_exception(e.args)
        if exitcode is None:
            _log.error("Unexpected ruffus exception: " + str(e))
            _log.error(repr(e))
            return ExitCode.other_error
        else:
            return exitcode
    except Exception as e:
        _log.error(e)
        return ExitCode.other_error

    if not validate_pdfa(options.output_file, _log):
        _log.warning('Output file: The generated PDF/A file is INVALID')
        return ExitCode.invalid_output_pdfa

    with _pdfinfo_lock:
        _log.debug(_pdfinfo)
        direction = {0: 'n', 90: 'e',
                     180: 's', 270: 'w'}
        orientations = []
        for n, page in enumerate(_pdfinfo):
            angle = _pdfinfo[n].get('rotated', 0)
            if angle != 0:
                orientations.append('{0}{1}'.format(
                    n + 1,
                    direction.get(angle, '')))
        if orientations:
            _log.info('Page orientations detected: ' + ' '.join(orientations))

    return ExitCode.ok

Beispiel #7

0

Datei anzeigen

Datei: trenchrun.py Projekt: sivy/kuiil

def main():
    parser = cmdline.get_argparse(description="Trench Run pipeline")

    args = parser.parse_args()

    if args.target_tasks:
        cmdline.run(args)

    else:
        pipeline_run(publish_data)

Beispiel #8

0

Datei anzeigen

def run_pipeline():
    if not options.jobs:
        options.jobs = available_cpu_count()
    try:
        options.history_file = os.path.join(work_folder,
                                            'ruffus_history.sqlite')
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            _log.debug(str(e))  # stringify exception so logger doesn't have to

        # Ruffus flattens exception to 5 element tuples. Because of a bug
        # in <= 2.6.3 it may present either the single:
        #   (task, job, exc, value, stack)
        # or something like:
        #   [[(task, job, exc, value, stack)]]
        #
        # Generally cross-process exception marshalling doesn't work well
        # and ruffus doesn't support because BaseException has its own
        # implementation of __reduce__ that attempts to reconstruct the
        # exception based on e.__init__(e.args).
        #
        # Attempting to log the exception directly marshalls it to the logger
        # which is probably in another process, so it's better to log only
        # data from the exception at this point.

        exitcode = traverse_ruffus_exception(e.args)
        if exitcode is None:
            _log.error("Unexpected ruffus exception: " + str(e))
            _log.error(repr(e))
            return ExitCode.other_error
        else:
            return exitcode
    except Exception as e:
        _log.error(e)
        return ExitCode.other_error

    if not validate_pdfa(options.output_file, _log):
        _log.warning('Output file: The generated PDF/A file is INVALID')
        return ExitCode.invalid_output_pdfa

    with _pdfinfo_lock:
        _log.debug(_pdfinfo)
        direction = {0: 'n', 90: 'e', 180: 's', 270: 'w'}
        orientations = []
        for n, page in enumerate(_pdfinfo):
            angle = _pdfinfo[n].get('rotated', 0)
            if angle != 0:
                orientations.append('{0}{1}'.format(n + 1,
                                                    direction.get(angle, '')))
        if orientations:
            _log.info('Page orientations detected: ' + ' '.join(orientations))

    return ExitCode.ok

Beispiel #9

0

Datei anzeigen

Datei: main.py Projekt: claresloggett/rnapipe

def main(program_name, program_version, make_pipeline):
    '''Initialise the pipeline, then run it'''
    # Parse command line arguments
    options = parse_command_line(program_version)
    # Initialise the logger
    # logger = Logger(__name__, options.log_file, options.verbose)
    if options.log_file:
        logging.basicConfig(filename=options.log_file,
                            level=LOGGING_LEVEL,
                            filemode="a",
                            format="%(asctime)s %(levelname)s - %(message)s",
                            datefmt="%m-%d-%Y %H:%M:%S")
    logger = logging.getLogger(__name__)
    # Log the command line used to run the pipeline
    logger.info("*** rnapipe ***")
    logger.info(' '.join(sys.argv))
    drmaa_session = None
    try:
        # Set up the DRMAA session for running cluster jobs
        import drmaa
        drmaa_session = drmaa.Session()
        drmaa_session.initialize()
    except Exception as e:
        print("{progname} error using DRMAA library".format(
            progname=program_name),
              file=sys.stdout)
        print("Error message: {msg}".format(msg=e.message, file=sys.stdout))
        exit(error_codes.DRMAA_ERROR)
    # Parse the configuration file, and initialise global state
    config = Config(options.config)
    config.validate()
    state = State(options=options,
                  config=config,
                  logger=logger,
                  drmaa_session=drmaa_session)
    # Build the pipeline workflow
    pipeline = make_pipeline(state)
    # Run (or print) the pipeline
    cmdline.run(options)
    if drmaa_session is not None:
        # Shut down the DRMAA session
        drmaa_session.exit()

Beispiel #10

0

Datei anzeigen

Datei: main.py Projekt: bjpop/twin_ion_pipeline

def main():
    '''Initialise the pipeline, then run it'''
    # Parse command line arguments
    options = parse_command_line()
    # Initialise the logger
    logger = Logger(__name__, options.log_file, options.verbose)
    # Log the command line used to run the pipeline
    logger.info(' '.join(sys.argv))
    # Set up the DRMAA session for running cluster jobs
    drmaa_session = drmaa.Session()
    drmaa_session.initialize()
    # Parse the configuration file, and initialise global state
    config = Config(options.config)
    config.validate()
    state = State(options=options, config=config, logger=logger,
                  drmaa_session=drmaa_session)
    # Build the pipeline workflow
    pipeline = make_pipeline(state)
    # Run (or print) the pipeline
    cmdline.run(options)
    # Shut down the DRMAA session
    drmaa_session.exit()

Beispiel #11

0

Datei anzeigen

Datei: main.py Projekt: balu-/OCRmyPDF

def run_pipeline():
    if not options.jobs:
        options.jobs = available_cpu_count()
    try:
        options.history_file = os.path.join(work_folder, 'ruffus_history.sqlite')
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            print(e)

        # Yuck. Hunt through the ruffus exception to find out what the
        # return code is supposed to be.
        for exc in e.args:
            task_name, job_name, exc_name, exc_value, exc_stack = exc
            if exc_name == 'builtins.SystemExit':
                match = re.search(r"\.(.+?)\)", exc_value)
                exit_code_name = match.groups()[0]
                exit_code = getattr(ExitCode, exit_code_name, 'other_error')
                return exit_code
            elif exc_name == 'ruffus.ruffus_exceptions.MissingInputFileError':
                print(cleanup_ruffus_error_message(exc_value))
                return ExitCode.input_file
            elif exc_name == 'builtins.TypeError':
                # Even though repair_pdf will fail, ruffus will still try
                # to call split_pages with no input files, likely due to a bug
                if task_name == 'split_pages':
                    print("Input file '{0}' is not a valid PDF".format(
                        options.input_file))
                    return ExitCode.input_file

        return ExitCode.other_error

    if not validate_pdfa(options.output_file, _log):
        _log.warning('Output file: The generated PDF/A file is INVALID')
        return ExitCode.invalid_output_pdfa

    return ExitCode.ok

Beispiel #12

0

Datei anzeigen

Datei: main.py Projekt: khalidm/crpipe

def main():
    '''Initialise the pipeline, then run it'''
    # Parse command line arguments
    options = parse_command_line()
    # Initialise the logger
    logger = Logger(__name__, options.log_file, options.verbose)
    # Log the command line used to run the pipeline
    logger.info(' '.join(sys.argv))
    # Set up the DRMAA session for running cluster jobs
    drmaa_session = drmaa.Session()
    drmaa_session.initialize()
    # Parse the configuration file, and initialise global state
    config = Config(options.config)
    config.validate()
    state = State(options=options,
                  config=config,
                  logger=logger,
                  drmaa_session=drmaa_session)
    # Build the pipeline workflow
    pipeline = make_pipeline(state)
    # Run (or print) the pipeline
    cmdline.run(options)
    # Shut down the DRMAA session
    drmaa_session.exit()

Beispiel #13

0

Datei anzeigen

def run_pipeline():
    if not options.jobs or options.jobs == 1:
        options.jobs = available_cpu_count()
    try:
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            print(e)

        # Yuck. Hunt through the ruffus exception to find out what the
        # return code is supposed to be.
        for exc in e.args:
            task_name, job_name, exc_name, exc_value, exc_stack = exc
            if exc_name == 'builtins.SystemExit':
                match = re.search(r"\.(.+?)\)", exc_value)
                exit_code_name = match.groups()[0]
                exit_code = getattr(ExitCode, exit_code_name, 'other_error')
                return exit_code
            elif exc_name == 'ruffus.ruffus_exceptions.MissingInputFileError':
                print(cleanup_ruffus_error_message(exc_value))
                return ExitCode.input_file
            elif exc_name == 'builtins.TypeError':
                # Even though repair_pdf will fail, ruffus will still try
                # to call split_pages with no input files, likely due to a bug
                if task_name == 'split_pages':
                    print("Input file '{0}' is not a valid PDF".format(
                        options.input_file))
                    return ExitCode.input_file

        return ExitCode.other_error

    if not validate_pdfa(options.output_file, _log):
        _log.warning('Output file: The generated PDF/A file is INVALID')
        return ExitCode.invalid_output_pdfa

    return ExitCode.ok

Beispiel #14

0

Datei anzeigen

def run_pipeline():
    if not options.jobs:
        options.jobs = available_cpu_count()
    try:
        options.history_file = os.path.join(work_folder,
                                            'ruffus_history.sqlite')
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            _log.debug(e)

        # Yuck. Hunt through the ruffus exception to find out what the
        # return code is supposed to be.
        # Ruffus flattens the exception to a string, throwing away all kinds
        # of helpful details
        # task_name, job_name - ruffus status
        # exc_name - class name of exception
        # exc_value - irritating string that makes impossible to recover
        #   exception object
        # exc_stack - string that contains traceback of exception
        for exc in e.args:
            task_name, job_name, exc_name, exc_value, exc_stack = exc
            if exc_name == 'builtins.SystemExit':
                match = re.search(r"\.(.+?)\)", exc_value)
                exit_code_name = match.groups()[0]
                exit_code = getattr(ExitCode, exit_code_name, 'other_error')
                return exit_code
            elif exc_name == 'ruffus.ruffus_exceptions.MissingInputFileError':
                _log.error(cleanup_ruffus_error_message(exc_value))
                return ExitCode.input_file
            elif exc_name == 'builtins.TypeError':
                # Even though repair_pdf will fail, ruffus will still try
                # to call split_pages with no input files, likely due to a bug
                if task_name == 'split_pages':
                    _log.error("Input file '{0}' is not a valid PDF".format(
                        options.input_file))
                    return ExitCode.input_file
            elif exc_name == 'subprocess.CalledProcessError':
                # It's up to the subprocess handler to report something useful
                msg = "Error occurred while running this command:"
                _log.error(msg + '\n' + exc_value)
                return ExitCode.child_process_error
            elif not options.verbose:
                _log.error(e)

        return ExitCode.other_error
    except Exception as e:
        _log.error(e)
        return ExitCode.other_error

    if not validate_pdfa(options.output_file, _log):
        _log.warning('Output file: The generated PDF/A file is INVALID')
        return ExitCode.invalid_output_pdfa

    with _pdfinfo_lock:
        _log.debug(_pdfinfo)
        direction = {0: 'n', 90: 'e', 180: 's', 270: 'w'}
        orientations = []
        for n, page in enumerate(_pdfinfo):
            angle = _pdfinfo[n].get('rotated', 0)
            if angle != 0:
                orientations.append('{0}{1}'.format(n + 1,
                                                    direction.get(angle, '')))
        if orientations:
            _log.info('Page orientations detected: ' + ' '.join(orientations))

    return ExitCode.ok

Beispiel #15

0

Datei anzeigen

Datei: tap2.py Projekt: gmarcais/pavfinder

args = parser.parse_args()
params = get_params(args, args.params)
check_params(args, params)

logs_dir = args.outdir + '/logs'
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir)

log_file = '%s/log.%s.txt' % (logs_dir, datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S"))
logger, logging_mutex = cmdline.setup_logging(__name__,
                                              log_file,
                                              args.verbose)
print 'log_file:', log_file

cmdline.run(args)

read_pairs = []
if args.fq:
    read_pairs = format_read_pairs(fqs=args.fq)
elif args.fq_list:
    read_pairs = format_read_pairs(list_file=args.fq_list)

history_file = '%s/.ruffus_history.sqlite' % args.outdir
bbt_outdir = '%s/bbt_%s' % (args.outdir, get_version('bbt'))
assembly_outdir = '%s/rnabloom_%s' % (args.outdir, get_version('rnabloom'))
pv_outdir = '%s/pv_%s' % (args.outdir, get_version('pv'))
bbt_prefix = bbt_outdir + '/' + args.sample

# for determining how many procs/threads to give to each analysis
num_analysis = 2

Beispiel #16

0

Datei anzeigen

Datei: main.py Projekt: lucasguillermo/OCRmyPDF

def run_pipeline():
    cmdline.run(options, multiprocess=available_cpu_count())

Beispiel #17

0

Datei anzeigen

Datei: ruffus_ver18.py Projekt: chelauk/stem_cell_lab

    except error_drmaa_job as err:
      raise Exception("\n".join(map(str,
                      ["Failed to run:",
                        cmd,
                        err,
                        stdout_res,
                        stderr_res])))

    with logger_mutex:
      logger.debug("kallisto worked")




if __name__ == '__main__':
  cmdline.run (options, multithread = options.jobs)
  drmaa_session.exit()
  pipeline_printout_graph ("bulk_rna-seq.jpg", "jpg", [trim_fastq,hisat2,star,kallisto,cufflinks,qorts],
                          no_key_legend=True,
                          ignore_upstream_of_target=True,
                          pipeline_name="bulk RNA-seq",
                          user_colour_scheme = {
                                                "colour_scheme_index" :2,
                                                "Bulk RNA-seq"      :{"fontcolor" : '"#FF3232"' },
                                                "Task to run"       :{"linecolor" : '"#0044A0"' },
                                                "Final target"      :{"fillcolor" : '"#EFA03B"',
                                                                       "fontcolor" : "black",
                                                                       "dashed"    : 0           }
                                               })
  pipeline_printout()

Beispiel #18

0

Datei anzeigen

Datei: __main__.py Projekt: jbarlow83/OCRmyPDF

def run_pipeline(args=None):
    options = parser.parse_args(args=args)
    options.verbose_abbreviated_path = 1
    if os.environ.get('_OCRMYPDF_THREADS'):
        options.use_threads = True

    if not check_closed_streams(options):
        return ExitCode.bad_args

    logger_args = {'verbose': options.verbose, 'quiet': options.quiet}

    _log, _log_mutex = proxy_logger.make_shared_logger_and_proxy(
        logging_factory, __name__, logger_args
    )
    preamble(_log)
    check_options(options, _log)
    check_dependency_versions(options, _log)

    # Any changes to options will not take effect for options that are already
    # bound to function parameters in the pipeline. (For example
    # options.input_file, options.pdf_renderer are already bound.)
    if not options.jobs:
        options.jobs = available_cpu_count()

    # Performance is improved by setting Tesseract to single threaded. In tests
    # this gives better throughput than letting a smaller number of Tesseract
    # jobs run multithreaded. Same story for pngquant. Tess <4 ignores this
    # variable, but harmless to set if ignored.
    os.environ.setdefault('OMP_THREAD_LIMIT', '1')

    check_environ(options, _log)
    if os.environ.get('PYTEST_CURRENT_TEST'):
        os.environ['_OCRMYPDF_TEST_INFILE'] = options.input_file

    try:
        work_folder = mkdtemp(prefix="com.github.ocrmypdf.")
        options.history_file = os.path.join(work_folder, 'ruffus_history.sqlite')
        start_input_file = os.path.join(work_folder, 'origin')

        check_input_file(options, _log, start_input_file)
        check_requested_output_file(options, _log)

        manager = JobContextManager()
        manager.register('JobContext', JobContext)  # pylint: disable=no-member
        manager.start()

        context = manager.JobContext()  # pylint: disable=no-member
        context.set_options(options)
        context.set_work_folder(work_folder)

        build_pipeline(options, work_folder, _log, context)
        atexit.register(cleanup_working_files, work_folder, options)
        if hasattr(os, 'nice'):
            os.nice(5)
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            _log.debug(str(e))  # stringify exception so logger doesn't have to
        exceptions = e.job_exceptions
        exitcode = traverse_ruffus_exception(exceptions, options, _log)
        if exitcode is None:
            _log.error("Unexpected ruffus exception: " + str(e))
            _log.error(repr(e))
            return ExitCode.other_error
        return exitcode
    except ExitCodeException as e:
        return e.exit_code
    except Exception as e:
        _log.error(str(e))
        return ExitCode.other_error

    if options.flowchart:
        _log.info(f"Flowchart saved to {options.flowchart}")
        return ExitCode.ok
    elif options.output_file == '-':
        _log.info("Output sent to stdout")
    elif os.path.samefile(options.output_file, os.devnull):
        pass  # Say nothing when sending to dev null
    else:
        if options.output_type.startswith('pdfa'):
            pdfa_info = file_claims_pdfa(options.output_file)
            if pdfa_info['pass']:
                msg = f"Output file is a {pdfa_info['conformance']} (as expected)"
                _log.info(msg)
            else:
                msg = f"Output file is okay but is not PDF/A (seems to be {pdfa_info['conformance']})"
                _log.warning(msg)
                return ExitCode.pdfa_conversion_failed
        if not qpdf.check(options.output_file, _log):
            _log.warning('Output file: The generated PDF is INVALID')
            return ExitCode.invalid_output_pdf

        report_output_file_size(options, _log, start_input_file, options.output_file)

    pdfinfo = context.get_pdfinfo()
    if options.verbose:
        from pprint import pformat

        _log.debug(pformat(pdfinfo))

    log_page_orientations(pdfinfo, _log)

    return ExitCode.ok

Beispiel #19

0

Datei anzeigen

Datei: __main__.py Projekt: zgsxwsdxg/OCRmyPDF

def run_pipeline():
    options = parser.parse_args()
    options.verbose_abbreviated_path = 1

    if not check_closed_streams(options):
        return ExitCode.bad_args

    logger_args = {'verbose': options.verbose, 'quiet': options.quiet}

    _log, _log_mutex = proxy_logger.make_shared_logger_and_proxy(
        logging_factory, __name__, logger_args)
    _log.debug('ocrmypdf ' + VERSION)
    _log.debug('tesseract ' + tesseract.version())

    check_options(options, _log)

    # Any changes to options will not take effect for options that are already
    # bound to function parameters in the pipeline. (For example
    # options.input_file, options.pdf_renderer are already bound.)
    if not options.jobs:
        options.jobs = available_cpu_count()
    try:
        work_folder = mkdtemp(prefix="com.github.ocrmypdf.")
        options.history_file = os.path.join(
            work_folder, 'ruffus_history.sqlite')
        start_input_file = os.path.join(
            work_folder, 'origin')

        if options.input_file == '-':
            # stdin
            _log.info('reading file from standard input')
            with open(start_input_file, 'wb') as stream_buffer:
                from shutil import copyfileobj
                copyfileobj(sys.stdin.buffer, stream_buffer)
        else:
            try:
                re_symlink(options.input_file, start_input_file, _log)
            except FileNotFoundError:
                _log.error("File not found - " + options.input_file)
                return ExitCode.input_file

        if options.output_file == '-':
            if sys.stdout.isatty():
                _log.error(textwrap.dedent("""\
                    Output was set to stdout '-' but it looks like stdout
                    is connected to a terminal.  Please redirect stdout to a
                    file."""))
                return ExitCode.bad_args
        elif not is_file_writable(options.output_file):
            _log.error(textwrap.dedent("""\
                Output file location is not writable."""))
            return ExitCode.file_access_error

        manager = JobContextManager()
        manager.register('JobContext', JobContext)
        manager.start()

        context = manager.JobContext()
        context.set_options(options)
        context.set_work_folder(work_folder)

        build_pipeline(options, work_folder, _log, context)
        atexit.register(cleanup_working_files, work_folder, options)
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            _log.debug(str(e))  # stringify exception so logger doesn't have to

        # Ruffus flattens exception to 5 element tuples. Because of a bug
        # in <= 2.6.3 it may present either the single:
        #   (task, job, exc, value, stack)
        # or something like:
        #   [[(task, job, exc, value, stack)]]
        #
        # Generally cross-process exception marshalling doesn't work well
        # and ruffus doesn't support because BaseException has its own
        # implementation of __reduce__ that attempts to reconstruct the
        # exception based on e.__init__(e.args).
        #
        # Attempting to log the exception directly marshalls it to the logger
        # which is probably in another process, so it's better to log only
        # data from the exception at this point.

        exitcode = traverse_ruffus_exception(e.args, options, _log)
        if exitcode is None:
            _log.error("Unexpected ruffus exception: " + str(e))
            _log.error(repr(e))
            return ExitCode.other_error
        else:
            return exitcode
    except ExitCodeException as e:
        return e.exit_code
    except Exception as e:
        _log.error(e)
        return ExitCode.other_error

    if options.flowchart:
        _log.info("Flowchart saved to {}".format(options.flowchart))
    elif options.output_file != '-':
        if options.output_type == 'pdfa':
            pdfa_info = file_claims_pdfa(options.output_file)
            if pdfa_info['pass']:
                msg = 'Output file is a {} (as expected)'
                _log.info(msg.format(pdfa_info['conformance']))
            else:
                msg = 'Output file is okay but is not PDF/A (seems to be {})'
                _log.warning(msg.format(pdfa_info['conformance']))

                return ExitCode.invalid_output_pdf
        if not qpdf.check(options.output_file, _log):
            _log.warning('Output file: The generated PDF is INVALID')
            return ExitCode.invalid_output_pdf
    else:
        _log.info("Output sent to stdout")

    pdfinfo = context.get_pdfinfo()
    if options.verbose:
        from pprint import pformat
        _log.debug(pformat(pdfinfo))
    direction = {0: 'n', 90: 'e',
                 180: 's', 270: 'w'}
    orientations = []
    for n, page in enumerate(pdfinfo):
        angle = pdfinfo[n].rotation or 0
        if angle != 0:
            orientations.append('{0}{1}'.format(
                n + 1,
                direction.get(angle, '')))
    if orientations:
        _log.info('Page orientations detected: ' + ' '.join(orientations))

    return ExitCode.ok

Beispiel #20

0

Datei anzeigen

              (outfile, infiles[0], infiles[1]))

    out.close()


@follows(exampleCombinations, examplePermutations, exampleProduct)
def advancedRuffus():
    '''
    This is a dummy function to demonstrate the use of dummy functions to run
    subsections of the pipeline.
    Running the pipeline as make advancedRuffus will update, if needed,
    exampleCombinations, examplePermutations and exampleProduct,
    plus any prior steps they depend upon - these are exampleOriginate,
    exampleTransform and exampleSubdivide.
    exampleMerge, exampleSplit and exampleCollate will not be run.
    '''


@follows(basicRuffus, advancedRuffus)
def full():
    '''
    All cgat pipelines should end with a full() function which updates,
    if needed, all branches of the pipeline.
    The @follows statement should ensure that all functions are covered,
    either directly or as prerequisites.
    '''


# this is essential to run the pipeline with ruffus
cmdline.run(options)

Beispiel #21

0

Datei anzeigen

Datei: tpch.py Projekt: alexmemory/PrDBSens

            dfm = dfo.join(dft)  # Merge in preparation for comparison
            assert len(dfo) == len(dft) == len(dfm)
            results.append({
                'numerator':
                info['numerator'],
                'denominator':
                info['denominator'],
                'pearson':
                dfm.corr(method='pearson').loc['orig']['xform'],
                'spearman':
                dfm.corr(method='spearman').loc['origRnk']['xformRnk'],
                'kendall':
                dfm.corr(method='kendall').loc['origRnk']['xformRnk']
            })
            lg.info("xform::powall_cmp path::%s ::done" % xfpath)

        newk = 'comparison'
        ous[newk] = pd.DataFrame(results).set_index(
            ['numerator', 'denominator'])
        ous.get_storer(newk).attrs.info = ins.get_storer('orig').attrs.info

    finally:
        ins.close()
        ous.close()


cmdline.run(options,
            checksum_level=rf.ruffus_utility.CHECKSUM_HISTORY_TIMESTAMPS,
            logger=lg)

Beispiel #22

0

Datei anzeigen

    pipe.transform(
        name="convert_csv_files_to_tsv",
        task_func=csv_to_tsv,
        input=output_from("create_three_new_files"),
        filter=suffix(".csv"),
        output=".tsv",
    )

    pipe.transform(
        name="calculate_md5",
        task_func=md5,
        input=output_from("convert_csv_files_to_tsv"),
        filter=suffix(".tsv"),
        output=".md5sum",
    )

    return pipe


if __name__ == "__main__":
    parser = cmdline.get_argparse(description="CNV Calling",
                                  ignored_args=["jobs"])

    options = parser.parse_args()
    options.history_file = os.path.join(WORK_DIR, ".ruffus_history.sqlite")

    pipeline = build_pipeline()

    cmdline.run(options, multithead=3)

Beispiel #23

0

Datei anzeigen

Datei: sipp.py Projekt: pythseq/genomic_purity

# parser.add_argument('--pipeline', "-p",
# 					type=str,
# 					choices = ['sim_pure', 'sim_contam','real_pure','test_pipe'],
#                     help="Defining which pipeline to run")

parser.add_argument(
    '--config_file',
    "-cf",
    type=str,
    #metavar="config_file",
    help="yaml file with pipeline parameters")

options = parser.parse_args()

## standard python logger which can be synchronised across concurrent Ruffus tasks
## define logging output with --log_file  log_file_name
logger, logger_mutex = cmdline.setup_logging(__name__, options.log_file,
                                             options.verbose)

# if we are printing only
if  not options.just_print and \
    not options.flowchart and \
    not options.touch_files_only:

    config_file = file(options.config_file, 'r')
    config = yaml.load(config_file)

    pipeline1a = make_sipp(org_list=config['org_list'], config=config)
    cmdline.run(options, logger=logger)
    sys.exit()

Beispiel #24

0

Datei anzeigen

Datei: main.py Projekt: silasxue/OCRmyPDF

def run_pipeline():
    if not options.jobs:
        options.jobs = available_cpu_count()
    try:
        options.history_file = os.path.join(work_folder, 'ruffus_history.sqlite')
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            _log.debug(e)

        # Yuck. Hunt through the ruffus exception to find out what the
        # return code is supposed to be.
        # Ruffus flattens the exception to a string, throwing away all kinds
        # of helpful details
        # task_name, job_name - ruffus status
        # exc_name - class name of exception
        # exc_value - irritating string that makes impossible to recover
        #   exception object
        # exc_stack - string that contains traceback of exception
        for exc in e.args:
            task_name, job_name, exc_name, exc_value, exc_stack = exc
            if exc_name == 'builtins.SystemExit':
                match = re.search(r"\.(.+?)\)", exc_value)
                exit_code_name = match.groups()[0]
                exit_code = getattr(ExitCode, exit_code_name, 'other_error')
                return exit_code
            elif exc_name == 'ruffus.ruffus_exceptions.MissingInputFileError':
                _log.error(cleanup_ruffus_error_message(exc_value))
                return ExitCode.input_file
            elif exc_name == 'builtins.TypeError':
                # Even though repair_pdf will fail, ruffus will still try
                # to call split_pages with no input files, likely due to a bug
                if task_name == 'split_pages':
                    _log.error("Input file '{0}' is not a valid PDF".format(
                        options.input_file))
                    return ExitCode.input_file
            elif exc_name == 'subprocess.CalledProcessError':
                # It's up to the subprocess handler to report something useful
                msg = "Error occurred while running this command:"
                _log.error(msg + '\n' + exc_value)
                return ExitCode.child_process_error
            elif not options.verbose:
                _log.error(e)

        return ExitCode.other_error
    except Exception as e:
        _log.error(e)
        return ExitCode.other_error

    if not validate_pdfa(options.output_file, _log):
        _log.warning('Output file: The generated PDF/A file is INVALID')
        return ExitCode.invalid_output_pdfa

    with _pdfinfo_lock:
        _log.debug(_pdfinfo)
        direction = {0: 'n', 90: 'e',
                     180: 's', 270: 'w'}
        orientations = []
        for n, page in enumerate(_pdfinfo):
            angle = _pdfinfo[n].get('rotated', 0)
            if angle != 0:
                orientations.append('{0}{1}'.format(
                    n + 1,
                    direction.get(angle, '')))
        if orientations:
            _log.info('Page orientations detected: ' + ' '.join(orientations))

    return ExitCode.ok

Beispiel #25

0

Datei anzeigen

Datei: gatk_exome_pipeline.py Projekt: iris42/gatk_exome_pipeline


@follows(merge_hf_vcf)
@transform((filtrate_low_qual, select_snp_variants, select_indel_variants, hardfilter_indel_variants,
            hardfilter_snp_variants),
           suffix(vcf_ext), vcf_ext+zeroed_ext)
def remove_intermediate_vcfs(in_vcf, out):
    zero_file(in_vcf)
    os.remove(in_vcf+'.idx')
    open(out, 'w').close()

@follows(merge_hf_vcf)
@transform(realign_indel, suffix(realignedbam_ext), realignedbam_ext+zeroed_ext)
def remove_realigned_bam(in_fn, out_fn):
    zero_file(in_fn)
    os.remove(in_fn[:-1]+'i')
    open(out_fn, 'w').close()

@follows(merge_hf_vcf)
@transform(get_recal_group, suffix(recal_ext), recal_ext+zeroed_ext)
def remove_read_group_file(in_fn, out_fn):
    zero_file(in_fn[0])
    open(out_fn, 'w').close()

options.history_file = '.gatk_exome_pipeline.ruffus_history.sqlite'

cmdline.run(options, gnu_make_maximal_rebuild_mode=True, checksum_level=1, touch_file_only=True)

Beispiel #26

0

Datei anzeigen

def run_pipeline():
    options = parser.parse_args()
    options.verbose_abbreviated_path = 1
    print("Inside of options is: " + options)

    if not check_closed_streams(options):
        return ExitCode.bad_args

    logger_args = {'verbose': options.verbose, 'quiet': options.quiet}

    _log, _log_mutex = proxy_logger.make_shared_logger_and_proxy(
        logging_factory, __name__, logger_args)
    preamble(_log)
    check_options(options, _log)

    # Complain about qpdf version < 7.0.0
    # Suppress the warning if in the test suite, since there are no PPAs
    # for qpdf 7.0.0 for Ubuntu trusty (i.e. Travis)
    if qpdf.version() < '7.0.0' and not os.environ.get('PYTEST_CURRENT_TEST'):
        complain(
            "You are using qpdf version {0} which has known issues including "
            "security vulnerabilities with certain malformed PDFs. Consider "
            "upgrading to version 7.0.0 or newer.".format(qpdf.version()))

    # Any changes to options will not take effect for options that are already
    # bound to function parameters in the pipeline. (For example
    # options.input_file, options.pdf_renderer are already bound.)
    if not options.jobs:
        options.jobs = available_cpu_count()

    # Performance is improved by setting Tesseract to single threaded. In tests
    # this gives better throughput than letting a smaller number of Tesseract
    # jobs run multithreaded. Same story for pngquant. Tess <4 ignores this
    # variable, but harmless to set if ignored.
    os.environ.setdefault('OMP_THREAD_LIMIT', '1')

    check_environ(options, _log)
    if os.environ.get('PYTEST_CURRENT_TEST'):
        os.environ['_OCRMYPDF_TEST_INFILE'] = options.input_file

    try:
        work_folder = mkdtemp(prefix="com.github.ocrmypdf.")
        options.history_file = os.path.join(
            work_folder, 'ruffus_history.sqlite')
        start_input_file = os.path.join(
            work_folder, 'origin')

        check_input_file(options, _log, start_input_file)
        check_requested_output_file(options, _log)

        manager = JobContextManager()
        manager.register('JobContext', JobContext)  # pylint: disable=no-member
        manager.start()

        context = manager.JobContext()  # pylint: disable=no-member
        context.set_options(options)
        context.set_work_folder(work_folder)

        build_pipeline(options, work_folder, _log, context)
        atexit.register(cleanup_working_files, work_folder, options)
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            _log.debug(str(e))  # stringify exception so logger doesn't have to
        exceptions = e.job_exceptions
        exitcode = traverse_ruffus_exception(exceptions, options, _log)
        if exitcode is None:
            _log.error("Unexpected ruffus exception: " + str(e))
            _log.error(repr(e))
            return ExitCode.other_error
        return exitcode
    except ExitCodeException as e:
        return e.exit_code
    except Exception as e:
        _log.error(str(e))
        return ExitCode.other_error

    if options.flowchart:
        _log.info("Flowchart saved to {}".format(options.flowchart))
        return ExitCode.ok
    elif options.output_file == '-':
        _log.info("Output sent to stdout")
    elif os.path.samefile(options.output_file, os.devnull):
        pass  # Say nothing when sending to dev null
    else:
        if options.output_type.startswith('pdfa'):
            pdfa_info = file_claims_pdfa(options.output_file)
            if pdfa_info['pass']:
                msg = 'Output file is a {} (as expected)'
                _log.info(msg.format(pdfa_info['conformance']))
            else:
                msg = 'Output file is okay but is not PDF/A (seems to be {})'
                _log.warning(msg.format(pdfa_info['conformance']))
                return ExitCode.pdfa_conversion_failed
        if not qpdf.check(options.output_file, _log):
            _log.warning('Output file: The generated PDF is INVALID')
            return ExitCode.invalid_output_pdf

        report_output_file_size(options, _log, start_input_file,
                                options.output_file)

    pdfinfo = context.get_pdfinfo()
    if options.verbose:
        from pprint import pformat
        _log.debug(pformat(pdfinfo))

    log_page_orientations(pdfinfo, _log)

    return ExitCode.ok

Beispiel #27

0

Datei anzeigen

Datei: __main__.py Projekt: stweil/OCRmyPDF

def run_pipeline():
    options = parser.parse_args()
    options.verbose_abbreviated_path = 1

    if not check_closed_streams(options):
        return ExitCode.bad_args

    logger_args = {'verbose': options.verbose, 'quiet': options.quiet}

    _log, _log_mutex = proxy_logger.make_shared_logger_and_proxy(
        logging_factory, __name__, logger_args)
    _log.debug('ocrmypdf ' + VERSION)
    _log.debug('tesseract ' + tesseract.version())
    _log.debug('qpdf ' + qpdf.version())

    check_options(options, _log)

    PIL.Image.MAX_IMAGE_PIXELS = int(options.max_image_mpixels * 1000000)
    if PIL.Image.MAX_IMAGE_PIXELS == 0:
        PIL.Image.MAX_IMAGE_PIXELS = None

    # Complain about qpdf version < 7.0.0
    # Suppress the warning if in the test suite, since there are no PPAs
    # for qpdf 7.0.0 for Ubuntu trusty (i.e. Travis)
    if qpdf.version() < '7.0.0' and not os.environ.get('PYTEST_CURRENT_TEST'):
        complain(
            "You are using qpdf version {0} which has known issues including "
            "security vulnerabilities with certain malformed PDFs. Consider "
            "upgrading to version 7.0.0 or newer.".format(qpdf.version()))

    # Any changes to options will not take effect for options that are already
    # bound to function parameters in the pipeline. (For example
    # options.input_file, options.pdf_renderer are already bound.)
    if not options.jobs:
        options.jobs = available_cpu_count()
    try:
        work_folder = mkdtemp(prefix="com.github.ocrmypdf.")
        options.history_file = os.path.join(
            work_folder, 'ruffus_history.sqlite')
        start_input_file = os.path.join(
            work_folder, 'origin')

        if options.input_file == '-':
            # stdin
            _log.info('reading file from standard input')
            with open(start_input_file, 'wb') as stream_buffer:
                from shutil import copyfileobj
                copyfileobj(sys.stdin.buffer, stream_buffer)
        else:
            try:
                re_symlink(options.input_file, start_input_file, _log)
            except FileNotFoundError:
                _log.error("File not found - " + options.input_file)
                return ExitCode.input_file

        if options.output_file == '-':
            if sys.stdout.isatty():
                _log.error(textwrap.dedent("""\
                    Output was set to stdout '-' but it looks like stdout
                    is connected to a terminal.  Please redirect stdout to a
                    file."""))
                return ExitCode.bad_args
        elif not is_file_writable(options.output_file):
            _log.error(
                "Output file location (" + options.output_file + ") " +
                "is not a writable file.")
            return ExitCode.file_access_error

        manager = JobContextManager()
        manager.register('JobContext', JobContext)  # pylint: disable=no-member
        manager.start()

        context = manager.JobContext()  # pylint: disable=no-member
        context.set_options(options)
        context.set_work_folder(work_folder)

        build_pipeline(options, work_folder, _log, context)
        atexit.register(cleanup_working_files, work_folder, options)
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            _log.debug(str(e))  # stringify exception so logger doesn't have to

        # Ruffus flattens exception to 5 element tuples. Because of a bug
        # in <= 2.6.3 it may present either the single:
        #   (task, job, exc, value, stack)
        # or something like:
        #   [[(task, job, exc, value, stack)]]
        #
        # Generally cross-process exception marshalling doesn't work well
        # and ruffus doesn't support because BaseException has its own
        # implementation of __reduce__ that attempts to reconstruct the
        # exception based on e.__init__(e.args).
        #
        # Attempting to log the exception directly marshalls it to the logger
        # which is probably in another process, so it's better to log only
        # data from the exception at this point.

        exitcode = traverse_ruffus_exception(e.args, options, _log)
        if exitcode is None:
            _log.error("Unexpected ruffus exception: " + str(e))
            _log.error(repr(e))
            return ExitCode.other_error
        return exitcode
    except ExitCodeException as e:
        return e.exit_code
    except Exception as e:
        _log.error(e)
        return ExitCode.other_error

    if options.flowchart:
        _log.info("Flowchart saved to {}".format(options.flowchart))
    elif options.output_file == '-':
        _log.info("Output sent to stdout")
    elif os.path.samefile(options.output_file, os.devnull):
        pass  # Say nothing when sending to dev null
    else:
        if options.output_type.startswith('pdfa'):
            pdfa_info = file_claims_pdfa(options.output_file)
            if pdfa_info['pass']:
                msg = 'Output file is a {} (as expected)'
                _log.info(msg.format(pdfa_info['conformance']))
            else:
                msg = 'Output file is okay but is not PDF/A (seems to be {})'
                _log.warning(msg.format(pdfa_info['conformance']))
                return ExitCode.invalid_output_pdf
        if not qpdf.check(options.output_file, _log):
            _log.warning('Output file: The generated PDF is INVALID')
            return ExitCode.invalid_output_pdf

    pdfinfo = context.get_pdfinfo()
    if options.verbose:
        from pprint import pformat
        _log.debug(pformat(pdfinfo))

    log_page_orientations(pdfinfo, _log)

    return ExitCode.ok

Beispiel #28

0

Datei anzeigen

Datei: sipp.py Projekt: nate-d-olson/genomic_purity

# parser.add_argument('--pipeline', "-p", 
# 					type=str, 
# 					choices = ['sim_pure', 'sim_contam','real_pure','test_pipe'],
#                     help="Defining which pipeline to run")

parser.add_argument('--config_file', "-cf", 
					type=str,
					#metavar="config_file",
					help="yaml file with pipeline parameters")

options = parser.parse_args()



## standard python logger which can be synchronised across concurrent Ruffus tasks
## define logging output with --log_file  log_file_name
logger, logger_mutex = cmdline.setup_logging (__name__, options.log_file, options.verbose)


# if we are printing only
if  not options.just_print and \
    not options.flowchart and \
    not options.touch_files_only:

    config_file= file(options.config_file, 'r')
    config = yaml.load(config_file)
    
    pipeline1a = make_sipp(org_list = config['org_list'], config = config)
    cmdline.run (options, logger = logger)
    sys.exit()

Beispiel #29

0

Datei anzeigen

Datei: hpv_pipeline.py Projekt: mbiokyle29/pipelines

                continue
            genome = line.rstrip().split("\t")[2]
            scores[genome] += 1

    sorted_scores = sorted(scores.items(), reverse=True,
                           key=operator.itemgetter(1))
    file_root = sam_file.replace(".sorted.sam", "")
    
    fastq_file = file_root + ".fastq"
    bam_file = file_root + ".bam"
    sbam_file = file_root + ".sorted.bam"

    lines = wcl(fastq_file)
    num_lines = int(lines.split()[0])
    num_reads = num_lines / 4

    with open(data_file, "w+") as fh:
        fh.write("### DATA REPORT FOR {} ###\n".format())
        fh.write("fastq lines: \n{}\n".format(lines))
        fh.write("reads: {}\n".format(str(num_reads)))
        fh.write("\n### Genome Hit Data ###")
        for genome,score in sorted_scores:
            fh.write("{}\t{}\n".format(genome, str(score)))

    os.unlink(fastq_file)
    os.unlink(bam_file)
    os.unlink(sbam_file)

# run the pipelined
cmdline.run(options)

Beispiel #30

0

Datei anzeigen

def run_pipeline():
    # Any changes to options will not take effect for options that are already
    # bound to function parameters in the pipeline. (For example
    # options.input_file, options.pdf_renderer are already bound.)
    global options
    if not options.jobs:
        options.jobs = available_cpu_count()
    try:
        options.history_file = os.path.join(work_folder,
                                            'ruffus_history.sqlite')
        start_input_file = os.path.join(work_folder, 'origin')

        if options.input_file == '-':
            # stdin
            _log.info('reading file from standard input')
            with open(start_input_file, 'wb') as stream_buffer:
                from shutil import copyfileobj
                copyfileobj(sys.stdin.buffer, stream_buffer)
        else:
            try:
                re_symlink(options.input_file, start_input_file, _log)
            except FileNotFoundError:
                _log.error("File not found - " + options.input_file)
                return ExitCode.input_file

        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            _log.debug(str(e))  # stringify exception so logger doesn't have to

        # Ruffus flattens exception to 5 element tuples. Because of a bug
        # in <= 2.6.3 it may present either the single:
        #   (task, job, exc, value, stack)
        # or something like:
        #   [[(task, job, exc, value, stack)]]
        #
        # Generally cross-process exception marshalling doesn't work well
        # and ruffus doesn't support because BaseException has its own
        # implementation of __reduce__ that attempts to reconstruct the
        # exception based on e.__init__(e.args).
        #
        # Attempting to log the exception directly marshalls it to the logger
        # which is probably in another process, so it's better to log only
        # data from the exception at this point.

        exitcode = traverse_ruffus_exception(e.args)
        if exitcode is None:
            _log.error("Unexpected ruffus exception: " + str(e))
            _log.error(repr(e))
            return ExitCode.other_error
        else:
            return exitcode
    except Exception as e:
        _log.error(e)
        return ExitCode.other_error

    if options.output_type == 'pdfa':
        pdfa_info = file_claims_pdfa(options.output_file)
        if pdfa_info['pass']:
            msg = 'Output file is a {} (as expected)'
            _log.info(msg.format(pdfa_info['conformance']))
        else:
            msg = 'Output file was generated but is not PDF/A (seems to be {})'
            _log.warning(msg.format(pdfa_info['conformance']))

            return ExitCode.invalid_output_pdf

    if not qpdf.check(options.output_file, _log):
        _log.warning('Output file: The generated PDF is INVALID')
        return ExitCode.invalid_output_pdf

    with _pdfinfo_lock:
        _log.debug(_pdfinfo)
        direction = {0: 'n', 90: 'e', 180: 's', 270: 'w'}
        orientations = []
        for n, page in enumerate(_pdfinfo):
            angle = _pdfinfo[n].get('rotated', 0)
            if angle != 0:
                orientations.append('{0}{1}'.format(n + 1,
                                                    direction.get(angle, '')))
        if orientations:
            _log.info('Page orientations detected: ' + ' '.join(orientations))

    return ExitCode.ok

Beispiel #31

0

Datei anzeigen

Datei: __main__.py Projekt: stweil/OCRmyPDF

def run_pipeline():
    options = parser.parse_args()
    options.verbose_abbreviated_path = 1

    if not check_closed_streams(options):
        return ExitCode.bad_args

    logger_args = {'verbose': options.verbose, 'quiet': options.quiet}

    _log, _log_mutex = proxy_logger.make_shared_logger_and_proxy(
        logging_factory, __name__, logger_args)
    _log.debug('ocrmypdf ' + VERSION)
    _log.debug('tesseract ' + tesseract.version())
    _log.debug('qpdf ' + qpdf.version())

    check_options(options, _log)

    PIL.Image.MAX_IMAGE_PIXELS = int(options.max_image_mpixels * 1000000)
    if PIL.Image.MAX_IMAGE_PIXELS == 0:
        PIL.Image.MAX_IMAGE_PIXELS = None

    # Complain about qpdf version < 7.0.0
    # Suppress the warning if in the test suite, since there are no PPAs
    # for qpdf 7.0.0 for Ubuntu trusty (i.e. Travis)
    if qpdf.version() < '7.0.0' and not os.environ.get('PYTEST_CURRENT_TEST'):
        complain(
            "You are using qpdf version {0} which has known issues including "
            "security vulnerabilities with certain malformed PDFs. Consider "
            "upgrading to version 7.0.0 or newer.".format(qpdf.version()))

    # Any changes to options will not take effect for options that are already
    # bound to function parameters in the pipeline. (For example
    # options.input_file, options.pdf_renderer are already bound.)
    if not options.jobs:
        options.jobs = available_cpu_count()
    try:
        work_folder = mkdtemp(prefix="com.github.ocrmypdf.")
        options.history_file = os.path.join(work_folder,
                                            'ruffus_history.sqlite')
        start_input_file = os.path.join(work_folder, 'origin')

        if options.input_file == '-':
            # stdin
            _log.info('reading file from standard input')
            with open(start_input_file, 'wb') as stream_buffer:
                from shutil import copyfileobj
                copyfileobj(sys.stdin.buffer, stream_buffer)
        else:
            try:
                re_symlink(options.input_file, start_input_file, _log)
            except FileNotFoundError:
                _log.error("File not found - " + options.input_file)
                return ExitCode.input_file

        if options.output_file == '-':
            if sys.stdout.isatty():
                _log.error(
                    textwrap.dedent("""\
                    Output was set to stdout '-' but it looks like stdout
                    is connected to a terminal.  Please redirect stdout to a
                    file."""))
                return ExitCode.bad_args
        elif not is_file_writable(options.output_file):
            _log.error("Output file location (" + options.output_file + ") " +
                       "is not a writable file.")
            return ExitCode.file_access_error

        manager = JobContextManager()
        manager.register('JobContext', JobContext)  # pylint: disable=no-member
        manager.start()

        context = manager.JobContext()  # pylint: disable=no-member
        context.set_options(options)
        context.set_work_folder(work_folder)

        build_pipeline(options, work_folder, _log, context)
        atexit.register(cleanup_working_files, work_folder, options)
        cmdline.run(options)
    except ruffus_exceptions.RethrownJobError as e:
        if options.verbose:
            _log.debug(str(e))  # stringify exception so logger doesn't have to

        # Ruffus flattens exception to 5 element tuples. Because of a bug
        # in <= 2.6.3 it may present either the single:
        #   (task, job, exc, value, stack)
        # or something like:
        #   [[(task, job, exc, value, stack)]]
        #
        # Generally cross-process exception marshalling doesn't work well
        # and ruffus doesn't support because BaseException has its own
        # implementation of __reduce__ that attempts to reconstruct the
        # exception based on e.__init__(e.args).
        #
        # Attempting to log the exception directly marshalls it to the logger
        # which is probably in another process, so it's better to log only
        # data from the exception at this point.

        exitcode = traverse_ruffus_exception(e.args, options, _log)
        if exitcode is None:
            _log.error("Unexpected ruffus exception: " + str(e))
            _log.error(repr(e))
            return ExitCode.other_error
        return exitcode
    except ExitCodeException as e:
        return e.exit_code
    except Exception as e:
        _log.error(e)
        return ExitCode.other_error

    if options.flowchart:
        _log.info("Flowchart saved to {}".format(options.flowchart))
    elif options.output_file == '-':
        _log.info("Output sent to stdout")
    elif os.path.samefile(options.output_file, os.devnull):
        pass  # Say nothing when sending to dev null
    else:
        if options.output_type.startswith('pdfa'):
            pdfa_info = file_claims_pdfa(options.output_file)
            if pdfa_info['pass']:
                msg = 'Output file is a {} (as expected)'
                _log.info(msg.format(pdfa_info['conformance']))
            else:
                msg = 'Output file is okay but is not PDF/A (seems to be {})'
                _log.warning(msg.format(pdfa_info['conformance']))
                return ExitCode.invalid_output_pdf
        if not qpdf.check(options.output_file, _log):
            _log.warning('Output file: The generated PDF is INVALID')
            return ExitCode.invalid_output_pdf

    pdfinfo = context.get_pdfinfo()
    if options.verbose:
        from pprint import pformat
        _log.debug(pformat(pdfinfo))

    log_page_orientations(pdfinfo, _log)

    return ExitCode.ok

Beispiel #32

0

Datei anzeigen

def f_alogFamily(inputFiles, outputFiles):
    touch(outputFiles)

#---------------------------------------------------------------
# homeobox figure
#
@merge(homeobox_R, "ruffus/figure.f_hb")
def f_hb(inputFiles, outputFiles):
    touch(outputFiles)

#---------------------------------------------------------------
# data s1
#
@merge([calculateTpm_R, downloadGenomes_sh], "ruffus/datas1")
def datas1_R(inputFiles, outputFiles):
    jobScript = 'src/R/datas1.R'
    ntasks = '1'
    cpus_per_task = '1'
    job_name = 'datas1_R'
    jobId = submit_job(jobScript, ntasks, cpus_per_task, job_name)
    # update ruffus flag
    print("[", print_now(), ": Job " + job_name + " run with JobID " + jobId + " ]")
    touch(outputFiles)

# options for visualising
pipeline_printout()
pipeline_printout_graph("ruffus/flowchart." + slurm_jobid + ".pdf", "pdf")

# run the pipeline (disabled for now)
cmdline.run(options, multithread = 8)

Beispiel #33

0

Datei anzeigen

Datei: gatk_exome_pipeline.py Projekt: iris42/gatk_exome_pipeline

            hardfilter_indel_variants, hardfilter_snp_variants),
           suffix(vcf_ext), vcf_ext + zeroed_ext)
def remove_intermediate_vcfs(in_vcf, out):
    zero_file(in_vcf)
    os.remove(in_vcf + '.idx')
    open(out, 'w').close()


@follows(merge_hf_vcf)
@transform(realign_indel, suffix(realignedbam_ext),
           realignedbam_ext + zeroed_ext)
def remove_realigned_bam(in_fn, out_fn):
    zero_file(in_fn)
    os.remove(in_fn[:-1] + 'i')
    open(out_fn, 'w').close()


@follows(merge_hf_vcf)
@transform(get_recal_group, suffix(recal_ext), recal_ext + zeroed_ext)
def remove_read_group_file(in_fn, out_fn):
    zero_file(in_fn[0])
    open(out_fn, 'w').close()


options.history_file = '.gatk_exome_pipeline.ruffus_history.sqlite'

cmdline.run(options,
            gnu_make_maximal_rebuild_mode=True,
            checksum_level=1,
            touch_file_only=True)