"""
    # Load packages
    try:
        from ngi_visualizations.preseq_complexity_curves import plot_complexity_curves
    except ImportError, e:
        print("###CF Error: ngi_visualizations Python Package not installed.\n", file=sys.stderr)
        raise ImportError(e)

    # Are we running as a summary module?
    if "summary_module" in cf["parameters"]:
        timestart = datetime.datetime.now()

        # scrape the last file names from each run file
        preseq_files = []
        for cf in cf["runfns"]:
            files, config = Helpers.parse_cf(cf, False, True)
            preseq_files.extend(files)

        # Make one single plot with all files
        plot_complexity_curves.plot_complexity_curves(preseq_files, output_name="{}_preseq".format(cf["pipeline_name"]))
        duration = str(datetime.datetime.now() - timestart)
        print(
            "\n###CFCMD Ran summary ngi_visualizations.plot_complexity_curves.plot_complexity_curves()\n\n",
            file=sys.stderr,
        )
        print("###CF Preseq summary plot successfully exited, took {}..\n".format(duration), file=sys.stderr)

    # Make one plot per file
    else:
        for fn in cf["files"]:
            timestart = datetime.datetime.now()
def mod_time_estimate(cf):
    num_files = cf["num_starting_merged_aligned_files"]
    num_files = 1 if num_files < 1 else num_files
    return Helpers.minutes_to_timestamp(num_files * 6 * 60)
Beispiel #3
0
def do_main_module_function(parameters, required_cores=False, required_mem=False, required_modules=False, runfn=None, print_help=False):
    """
    -----------------------
    Example Python Module
    -----------------------
    Takes results from preseq and plots nice colourful complexity curves
    using the plot_complexity_curves() function in the ngi_visualizations
    Python package. This package is available here:
    https://github.com/ewels/ngi_visualizations
    """
    
    #
    # JOB INITIALISATION
    # These flags are used on the head node to allocate resources
    #

    # --cores specifies how many cores are recommended
    # Return how many cores we need. This can be more or less than
    # the number recommended.
    if required_cores:
        print ('1', file=sys.stdout)
        sys.exit(0)

    # --mem specifies how much memory is recommended
    # Return how much we need. This can be more or less than the amount recommended.
    if required_mem:
        print ('3G', file=sys.stdout)
        sys.exit(0)

    # --modules. Return comma seperated names of any
    # environment modules which should be loaded.
    if required_modules:
        print ('example_module,samtools', file=sys.stdout)
        sys.exit(0)

    # --help. Print help. This code prints the function docstring above
    if print_help:
        print (make_preseq_plots.__doc__, file=sys.stdout)
        sys.exit(0)
    

    #
    # MODULE CODE
    # If we get this far, the module is being run in the pipeline
    #

    # Start the clock
    timestart = datetime.datetime.now()

    # Parse the incoming parameters and the run file
    # This function returns a number of configuration variables as well 
    # as the incoming filenames as a dictionary with the following keys:
    #  files
    #  runfile
    #  job_id
    #  prev_job_id
    #  cores
    #  mem
    #  parameters
    #  config
    p = Helpers.load_runfile_params(parameters)
    
    #
    # RUNNING PYTHON FUNCTIONS
    # This block of code serves as an example of how to execute imported Python code
    #

    # Run the imported python function with the list of filenames
    output_files = imported_functions.my_def(p['files'])
    print("\n###CFCMD Ran my_def() from my_favourite_python_package.imported_functions\n\n", file=sys.stderr)

    # How long did it take?
    duration = str(datetime.datetime.now() - timestart)
    print("###CF Example python command successfully exited, took {}..\n".format(duration), file=sys.stderr)

    # Write the output filen ames to the run file
    for output_fn in output_files:
        # Check we can find our output file
        if os.path.isfile(output_fn):
            # Print the current job ID and the output filename to the run file
            # This is so that subsequent modules can use this output
            try:
                with open(p['runfile'], 'a') as runfile:
                    print("{}\t{}\n".format(p['job_id'], output_fn), file=runfile)
            except IOError as e:
                print("###CF Error: Can't write to {}\n".format(runfile))
                raise IOError(e)



    #
    # RUNNING EXTERNAL SYSTEM COMMANDS
    # This block of code is an example of how to call external programs
    # Obviously, this will over-write the output_files variable from above
    #

    # Print version information about the program to be executed if we can
    print("---------- < module > version information ----------\n", file=sys.stderr)
    print(subprocess.check_output(shlex.split('MY_COMMAND --version'), file=sys.stderr))  
    print("\n------- End of < module > version information ------\n", file=sys.stderr)
    
    # NOTE - missing
    # This code should really take the list of input files and split them into
    # paired end and single end files, but I haven't written the helper functions
    # in Python to do this yet. Raise an issue on GitHub if you need this.

    # Loop through the files
    for fn in p['files']:

        # What's our output filename?
        output_fn = "{}_processed.output".format(fn)

        # Put the command together
        cmd = "my_command -c {} -m {} -g {} -i {} -o {}".format(p['cores'], p['mem'], p['references']['fasta'], fn, output_fn)
        print("\n###CFCMD {}\n\n".format(cmd), file=sys.stderr)

        # Run the system command
        if subprocess.call(shlex.split(cmd)) == 0:

            # How long did it take?
            duration = str(datetime.datetime.now() - timestart)
            
            # Print a success message to the log file which will be e-mailed out
            print("###CF Example module system call was successful,  {}..\n".format(duration), file=sys.stderr)

            # Check we can find our output file
            if os.path.isfile(output_fn):

                # Print the current job ID and the output filename to the run file
                # This is so that subsequent modules can use this output
                try:
                    with open(p['runfile'], 'a') as runfile:
                        print("{}\t{}\n".format(p['job_id'], output_fn), file=runfile)
                except IOError as e:
                    print("###CF Error: Can't write to {}\n".format(runfile))
                    raise IOError(e)

            # Oops - can't find the output file!
            else:
                print("###CF Error: Example module output file {} not found\n".format(output_fn), file=sys.stderr)
                raise IOError

        # Command returned a non-zero code, something went wrong
        else:
            print("###CF Error: Example module failed for input file {}\n".format(fn), file=sys.stderr)
            raise SystemError