def test_job_should_fail_if_too_little_memory_required(self): outfile = os.path.join(self.work_dir, "out") if P.get_parameters()['os'] == 'Linux': self.assertRaises( OSError, P.run, "python -c 'import numpy; " "a = numpy.array(numpy.arange(0, {memory}), numpy.int8); " "out = open(\"{outfile}\", \"w\"); " "out.write(str(len(a)) + \"\\n\"); " "out.close()'".format(memory=self.test_memory_size, outfile=outfile), to_cluster=self.to_cluster, job_memory="{}G".format(0.5 * self.test_memory_size / 10**9)) else: pass
def main(argv=None): if argv is None: argv = sys.argv options, args = P.parse_commandline(argv, config_file="template.yml") global PARAMS if options.config_file: PARAMS = P.get_parameters(options.config_file, defaults={ "min_value": 0.0, "num_samples": 1000, "mu": 0.0, "sigma": 1.0 }) else: sys.exit(P.main(options, args)) pipeline = ruffus.Pipeline("template_pipeline") task_create_files = pipeline.originate( task_func=create_files, output=["sample_{:02}.txt".format(x) for x in range(10)]) task_compute_mean = pipeline.transform(task_func=compute_mean, input=task_create_files, filter=ruffus.suffix(".txt"), output=".mean") task_combine_means = pipeline.merge(task_func=combine_means, input=task_compute_mean, output="means.txt") # primary targets pipeline.merge(task_func=P.EmptyRunner("all"), input=task_combine_means, output="all") E.debug("starting workflow") return P.run_workflow(options, args)
def test_job_should_fail_if_too_little_memory_required_in_second_statement( self): outfile = os.path.join(self.work_dir, "out") infile = "arv=by_id/glon1-4zz18-3cbje7tmr0nitut/study_list.txt" if P.get_parameters()['os'] == 'Linux': self.assertRaises( OSError, P.run, "hostname > {outfile}; " "python -c 'import numpy; " "a = numpy.array(numpy.arange(0, {memory}), numpy.int8); " "out = open(\"{outfile}\", \"w\"); " "out.write(str(len(a)) + \"\\n\"); " "out.close()'".format(memory=self.test_memory_size, infile=infile, outfile=outfile), to_cluster=self.to_cluster, job_memory="{}G".format(0.5 * self.test_memory_size / 10**9)) else: pass
Communicates with the obolibrary (obo foundry) API for hierarchical ontology annotations. Can be used to download and parse any OWL formatted ontology available on this site. """ from ruffus import * from CGATCore import Pipeline as P import os import sys import CGATPipelines.PipelineGeneInfo as PipelineGeneInfo import CGATCore.IOTools as IOTools import pandas as pd PARAMS = P.get_parameters([ "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml", "pipeline.yml" ]) # pick a pathway to use to check pathway annotation has run example_pw = PARAMS['my_gene_info_pathway'].split(",")[0] if example_pw == "all": example_pw = 'kegg' example_homolo = str(PARAMS['my_gene_info_homologene']).split(",") if len(example_homolo) == 0 or example_homolo[0] == 'all': example_homolo = 10090 else: example_homolo = example_homolo[0] # get the list of annotations to be downloaded from my gene info mgiannotations = PARAMS['my_gene_info_annotations']
def setUp(self): BaseTest.setUp(self) P.get_parameters()
import CGATCore.Experiment as E import CGATCore.IOTools as IOTools import CGATPipelines.PipelineMotifs as PipelineMotifs import CGATPipelines.PipelineTracks as PipelineTracks from CGATPipelines.Report import run_report ################################################### ################################################### ################################################### # Pipeline configuration ################################################### from CGATCore import Pipeline as P P.get_parameters([ "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml", "pipeline.yml" ], defaults={'annotations_dir': ""}) PARAMS = P.PARAMS PARAMS_ANNOTATIONS = P.peek_parameters(PARAMS["annotations_dir"], "genesets") ################################################################### ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ################################################################### # load all tracks - exclude input/control tracks Sample = PipelineTracks.Sample
'pipeline_docs', 'themes') logopath = os.path.join(themedir, "cgat_logo.png") ################################################################ # Import pipeline configuration from pipeline.ini in the current # directory and the common one. # PATH were code for pipelines is stored pipelinesdir = os.path.dirname(CGATPipelines.__file__) # The default configuration file - 'inifile' is read by # sphinx-report. inifile = os.path.join(os.path.dirname(CGATPipelines.__file__), 'configuration', 'pipeline.yml') PARAMS = P.get_parameters([inifile, "pipeline.yml"]) # Definition now part of CGATReport # def setup(app): # app.add_config_value('PARAMS', {}, True) ################################################################ ################################################################ ################################################################ # The pipeline assumes that sphinxreport is called within the # working directory. If the report is in a separate build directory, # change the paths below. # # directory with export directory from pipeline # This should be a directory in the build directory - you can # link from here to a directory outside the build tree, though.
def connect(): '''connect to database. Use this method to connect to additional databases. Returns a database connection. ''' dbh = sqlite3.connect(PARAMS["database_name"]) return dbh ######################################################################### P.get_parameters([ "%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml", "pipeline.yml" ], defaults={'paired_end': False}, only_import=__name__ != "__main__") PARAMS = P.PARAMS PipelineMapping.PARAMS = PARAMS PipelineMappingQC.PARAMS = PARAMS PipelineExome.PARAMS = PARAMS ######################################################################### ######################################################################### # Load manual annotations #########################################################################
===================================================================================== ''' import shutil import os import sqlite3 import CGATCore.IOTools as IOTools import CGAT.IndexedGenome as IndexedGenome import CGAT.Bed as Bed from CGATCore import Pipeline as P ############################################################ ############################################################ ############################################################ # Pipeline configuration P.get_parameters(["%s.yml" % __file__[:-len(".py")], "pipeline.yml"]) PARAMS = P.PARAMS ############################################################ ############################################################ ############################################################ def exportIntervalsAsBed(database, query, outfile): '''export intervals from SQlite database as bed files. ''' dbhandle = sqlite3.connect(database) cc = dbhandle.cursor() cc.execute(query) outs = IOTools.open_file(outfile, "w")
from CGATCore import Pipeline as P import CGATCore.IOTools as IOTools import CGAT.Bed as Bed import cgatpipelines.tasks.peakcalling as PipelinePeakcalling import PipelineDeNovoMotifs_python3 as PipelineMotifs import cgatpipelines.tasks.tracks as PipelineTracks ################################################### ################################################### ################################################### # Pipeline configuration ################################################### P.get_parameters( ["%s/pipeline.yml" % os.path.splitext(__file__)[0], "../pipeline.yml", "pipeline.yml"], defaults={ 'paired_end': False}) PARAMS = P.PARAMS PipelinePeakcalling.PARAMS = PARAMS PipelineMotifs.PARAMS = PARAMS ################################################################### ################################################################### ################################################################### # Helper functions mapping tracks to conditions, etc ###################################################################
def main(argv=None): parser = get_option_parser() (options, args) = E.start(parser, add_cluster_options=True) if len(args) == 0: raise ValueError( "command line argument missing - see usage information") options.renumber_column = [x.split(":") for x in options.renumber_column] cmd = args[0] if len(args) > 1: cmd += " '" + "' '".join(args[1:]) + "'" if options.dry_run: cmd = re.sub("%DIR%", "", cmd) retcode = subprocess.call(cmd, shell=True, stdin=sys.stdin, stdout=sys.stdout, cwd=os.getcwd(), close_fds=True) E.stop() sys.exit(0) failed_requests = [] started_requests = [] niterations = 0 P.get_parameters() P.start_session() if not options.collect: tmpdir = os.path.abspath(tempfile.mkdtemp(dir=options.tmpdir)) E.info(" working in directory %s" % tmpdir) if options.split_at_lines: chunk_iterator = chunk_iterator_lines args = (options.split_at_lines, ) elif options.split_at_column: chunk_iterator = chunk_iterator_column args = (options.split_at_column - 1, options.max_files) elif options.split_at_regex: chunk_iterator = chunk_iterator_regex_split args = (re.compile(options.split_at_regex), 0, options.chunksize, options.max_lines) elif options.group_by_regex: chunk_iterator = chunk_iterator_regex_group args = (re.compile(options.group_by_regex), 0, options.chunksize) else: raise ValueError("please specify a way to chunk input data") data = [(x, cmd, options, None, options.subdirs) for x in chunk_iterator(options.stdin, args, prefix=tmpdir, use_header=options.input_header)] statements = [build_command(x) for x in data] started_requests = [(x[0], x[0] + ".out") for x in data] if len(data) == 0: E.warn("no data received") E.stop() sys.exit(0) P.run(statements) else: tmpdir = options.collect started_requests = [(x[:-4], x) for x in glob.glob(tmpdir + "/*.out")] E.info("collecting %i files from %s" % (len(started_requests), tmpdir)) if failed_requests: for fn, cmd in failed_requests: E.error("failed request: filename= %s, cmd= %s" % (fn, cmd)) else: E.info("building result from %i parts" % len(started_requests)) if options.renumber: mapper = MapperLocal(pattern=options.renumber) else: mapper = MapperEmpty() # deal with stdout name = None index = None for pattern, column in options.renumber_column: if re.search(pattern, "stdout"): try: index = int(column) - 1 except ValueError: name = column break if options.binary: ResultBuilderBinary()(started_requests, options.stdout, options) else: regex = None if options.output_regex_header: regex = re.compile(options.output_regex_header) ResultBuilder(mapper=mapper, field_index=index, field_name=name, header_regex=regex)(started_requests, options.stdout, options) # deal with logfiles : combine them into a single file rr = re.search("'--log=(\S+)'", cmd) or re.search("'--L\s+(\S+)'", cmd) if rr: E.info("logging output goes to %s" % rr.groups()[0]) logfile = IOTools.open_file(rr.groups()[0], "a") ResultBuilderLog()([(x[0], "%s.log" % x[0]) for x in started_requests], logfile, options) logfile.close() # deal with other files if options.subdirs: files = glob.glob("%s/*.dir/*" % tmpdir) # remove directory filenames = set([os.path.basename(x) for x in files]) xx = len(".out") for filename in filenames: _, filetype = os.path.splitext(filename) name = None index = None for pattern, column in options.renumber_column: if re.search(pattern, filename): try: index = int(column) - 1 except ValueError: name = column break if options.binary: builder = ResultBuilderBinary(mapper=mapper) elif filetype in (".fa", ".fasta"): builder = ResultBuilderFasta(mapper=mapper) elif filetype in (".mali", ): builder = ResultBuilderFasta(mapper=MapperEmpty()) elif filetype in (".png"): builder = ResultBuilderCopies(mapper=mapper) else: builder = ResultBuilder(mapper=mapper, field_index=index, field_name=name) E.debug("chose the following builder for %s: %s: %s" % (filename, filetype, str(builder))) E.info("collecting results for %s" % filename) input_filenames = [] for fi, fn in started_requests: fn = fn[:-xx] + ".dir/" + filename if os.path.exists(fn): input_filenames.append((fi, fn)) E.info("output of %i files goes to %s" % (len(filenames), filename)) outfile = IOTools.open_file(options.output_pattern % filename, "w") builder(input_filenames, outfile, options) outfile.close() if not options.debug and (not options.resume or not options.collect): if len(failed_requests) == 0: E.info("removing directory %s" % tmpdir) shutil.rmtree(tmpdir) else: E.info("directory %s not removed due to %i failed jobs" % (tmpdir, len(failed_requests))) E.info("job control: nstarted=%i, nfinished=%i, nerrors=%i, nrepeats=%i" % (len(started_requests), len(started_requests) - len(failed_requests), len(failed_requests), niterations)) E.stop()
def main(argv=None): # Parse the options parser = E.OptionParser( version= "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option( "-p", "--params", "--args", dest="params", type="string", help="comma separated list of addtional parameter strings") parser.add_option("-m", "--module", dest="module", type="string", help="the full path to the module file", default=None) parser.add_option("-i", "--input", dest="input_filenames", type="string", action="append", help="input filename") parser.add_option("-o", "--output-section", dest="output_filenames", type="string", action="append", help="output filename") parser.add_option("-f", "--function", dest="function", type="string", help="the module function", default=None) parser.set_defaults(input_filenames=[], output_filenames=[], params=None) (options, args) = E.start(parser) # Check a module and function have been specified if not options.module or not options.function: raise ValueError("Both a function and Module must be specified") # initialize defaults P.get_parameters() # If a full path was given, add this path to the system path location = os.path.dirname(options.module) if location != "": sys.path.append(location) # Establish the module name, accomodating cases where the # .py extension has been included in the module name module_name = os.path.basename(options.module) if module_name.endswith(".py"): module_base_name = module_name[:-3] else: module_base_name = module_name # Import the specified module and map the specified fuction E.info("importing module '%s' " % module_base_name) E.debug("sys.path is: %s" % sys.path) module = importlib.import_module(module_base_name) try: function = getattr(module, options.function) except AttributeError as msg: raise AttributeError( msg.message + "unknown function, available functions are: %s" % ",".join([x for x in dir(module) if not x.startswith("_")])) if options.input_filenames and not options.input_filenames == ["None"]: infiles = options.input_filenames else: infiles = False if options.output_filenames and not options.output_filenames == ["None"]: outfiles = options.output_filenames else: outfiles = False # Parse the parameters into an array if options.params: params = [param.strip() for param in options.params.split(",")] else: params = False # deal with single file case if infiles and len(infiles) == 1: infiles = infiles[0] if outfiles and len(outfiles) == 1: outfiles = outfiles[0] # Make the function call if infiles and outfiles and params: function(infiles, outfiles, params) elif infiles and outfiles and not params: function(infiles, outfiles) elif params: function(params) else: raise ValueError( "Expecting infile+outfile+params or infile+outfile or params") E.stop()