""" # Load packages try: from ngi_visualizations.preseq_complexity_curves import plot_complexity_curves except ImportError, e: print("###CF Error: ngi_visualizations Python Package not installed.\n", file=sys.stderr) raise ImportError(e) # Are we running as a summary module? if "summary_module" in cf["parameters"]: timestart = datetime.datetime.now() # scrape the last file names from each run file preseq_files = [] for cf in cf["runfns"]: files, config = Helpers.parse_cf(cf, False, True) preseq_files.extend(files) # Make one single plot with all files plot_complexity_curves.plot_complexity_curves(preseq_files, output_name="{}_preseq".format(cf["pipeline_name"])) duration = str(datetime.datetime.now() - timestart) print( "\n###CFCMD Ran summary ngi_visualizations.plot_complexity_curves.plot_complexity_curves()\n\n", file=sys.stderr, ) print("###CF Preseq summary plot successfully exited, took {}..\n".format(duration), file=sys.stderr) # Make one plot per file else: for fn in cf["files"]: timestart = datetime.datetime.now()
def mod_time_estimate(cf): num_files = cf["num_starting_merged_aligned_files"] num_files = 1 if num_files < 1 else num_files return Helpers.minutes_to_timestamp(num_files * 6 * 60)
def do_main_module_function(parameters, required_cores=False, required_mem=False, required_modules=False, runfn=None, print_help=False): """ ----------------------- Example Python Module ----------------------- Takes results from preseq and plots nice colourful complexity curves using the plot_complexity_curves() function in the ngi_visualizations Python package. This package is available here: https://github.com/ewels/ngi_visualizations """ # # JOB INITIALISATION # These flags are used on the head node to allocate resources # # --cores specifies how many cores are recommended # Return how many cores we need. This can be more or less than # the number recommended. if required_cores: print ('1', file=sys.stdout) sys.exit(0) # --mem specifies how much memory is recommended # Return how much we need. This can be more or less than the amount recommended. if required_mem: print ('3G', file=sys.stdout) sys.exit(0) # --modules. Return comma seperated names of any # environment modules which should be loaded. if required_modules: print ('example_module,samtools', file=sys.stdout) sys.exit(0) # --help. Print help. This code prints the function docstring above if print_help: print (make_preseq_plots.__doc__, file=sys.stdout) sys.exit(0) # # MODULE CODE # If we get this far, the module is being run in the pipeline # # Start the clock timestart = datetime.datetime.now() # Parse the incoming parameters and the run file # This function returns a number of configuration variables as well # as the incoming filenames as a dictionary with the following keys: # files # runfile # job_id # prev_job_id # cores # mem # parameters # config p = Helpers.load_runfile_params(parameters) # # RUNNING PYTHON FUNCTIONS # This block of code serves as an example of how to execute imported Python code # # Run the imported python function with the list of filenames output_files = imported_functions.my_def(p['files']) print("\n###CFCMD Ran my_def() from my_favourite_python_package.imported_functions\n\n", file=sys.stderr) # How long did it take? duration = str(datetime.datetime.now() - timestart) print("###CF Example python command successfully exited, took {}..\n".format(duration), file=sys.stderr) # Write the output filen ames to the run file for output_fn in output_files: # Check we can find our output file if os.path.isfile(output_fn): # Print the current job ID and the output filename to the run file # This is so that subsequent modules can use this output try: with open(p['runfile'], 'a') as runfile: print("{}\t{}\n".format(p['job_id'], output_fn), file=runfile) except IOError as e: print("###CF Error: Can't write to {}\n".format(runfile)) raise IOError(e) # # RUNNING EXTERNAL SYSTEM COMMANDS # This block of code is an example of how to call external programs # Obviously, this will over-write the output_files variable from above # # Print version information about the program to be executed if we can print("---------- < module > version information ----------\n", file=sys.stderr) print(subprocess.check_output(shlex.split('MY_COMMAND --version'), file=sys.stderr)) print("\n------- End of < module > version information ------\n", file=sys.stderr) # NOTE - missing # This code should really take the list of input files and split them into # paired end and single end files, but I haven't written the helper functions # in Python to do this yet. Raise an issue on GitHub if you need this. # Loop through the files for fn in p['files']: # What's our output filename? output_fn = "{}_processed.output".format(fn) # Put the command together cmd = "my_command -c {} -m {} -g {} -i {} -o {}".format(p['cores'], p['mem'], p['references']['fasta'], fn, output_fn) print("\n###CFCMD {}\n\n".format(cmd), file=sys.stderr) # Run the system command if subprocess.call(shlex.split(cmd)) == 0: # How long did it take? duration = str(datetime.datetime.now() - timestart) # Print a success message to the log file which will be e-mailed out print("###CF Example module system call was successful, {}..\n".format(duration), file=sys.stderr) # Check we can find our output file if os.path.isfile(output_fn): # Print the current job ID and the output filename to the run file # This is so that subsequent modules can use this output try: with open(p['runfile'], 'a') as runfile: print("{}\t{}\n".format(p['job_id'], output_fn), file=runfile) except IOError as e: print("###CF Error: Can't write to {}\n".format(runfile)) raise IOError(e) # Oops - can't find the output file! else: print("###CF Error: Example module output file {} not found\n".format(output_fn), file=sys.stderr) raise IOError # Command returned a non-zero code, something went wrong else: print("###CF Error: Example module failed for input file {}\n".format(fn), file=sys.stderr) raise SystemError