def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) # stop parsing options at the first argument parser.disable_interspersed_args() (options, args) = E.Start(parser, add_pipe_options=True) if len(args) > 0: cmd = args[0] if len(args) > 1: cmd += " '" + "' '".join(args[1:]) + "'" s = subprocess.Popen(cmd, shell=True, cwd=os.getcwd(), close_fds=True) (out, err) = s.communicate() returncode = s.returncode else: returncode = 0 E.Stop() sys.exit(returncode)
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) dir2files = {} for root, directory, files in os.walk("."): dir2files[root] = files ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H:%M:%S') filename = "CWD_%s" % st E.info("outputting directory state to %s" % filename) with iotools.openFile(filename, "w") as outf: outf.write("##contents of cwd on %s\n\n" % st) for directory, files in dir2files.items(): for file in files: path = os.path.join(directory, file) outf.write(path + "\n") # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version= "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option("-m", "--method", dest="method", type="choice", choices=("script", "module"), help="type of tests to create [%default].") parser.set_defaults(method="script") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) if len(args) == 0: raise ValueError( "setup_test.py requires one or more command line arguments") targetdir = os.path.dirname(__file__) counter = E.Counter() for arg in args: counter.input += 1 script_dirname, basename = os.path.split(arg) dirname = os.path.join(targetdir, basename) if os.path.exists(dirname): E.warn("%s already exists - skipping" % basename) counter.skipped += 1 continue os.mkdir(dirname) with open(os.path.join(dirname, "tests.yaml"), "w") as outf: outf.write(YAML_TEMPLATE) counter.created += 1 E.info("%s" % str(counter)) # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-n", "--dry-run", dest="dry_run", action="store_true", help="dry run, do not delete any files [%default]") parser.set_defaults(dry_run=False) # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) filenames = args c = E.Counter() for filename in filenames: c.checked += 1 if os.path.exists(filename + ".log"): if iotools.isComplete(filename + ".log"): c.complete += 1 continue if iotools.isComplete(filename): c.complete += 1 continue c.incomplete += 1 E.info('deleting %s' % filename) if options.dry_run: continue os.unlink(filename) c.deleted += 1 E.info(c) # write footer and output benchmark information. E.Stop()
def main(argv=None): parser = farm.getOptionParser() (options, args) = E.Start(parser, add_cluster_options=True) cmd = args[0] if len(args) > 1: cmd += " '" + "' '".join(args[1:]) + "'" cmd = re.sub("%DIR%", "", cmd) retcode = subprocess.call(cmd, shell=True, stdin=sys.stdin, stdout=sys.stdout, cwd=os.getcwd(), close_fds=True) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-p", "--pattern-identifier", dest="pattern", type="string", help="jobs matching `pattern` in their job " "description will be killed [default=%default].") parser.add_option("-n", "--dry-run", dest="dry_run", action="store_true", help="do dry run, do not kill [default=%default].") parser.set_defaults( pattern=None, dry_run=False, ) # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) output = StringIO.StringIO( subprocess.Popen(["qstat", "-xml"], stdout=subprocess.PIPE).communicate()[0]) tree = xml.etree.ElementTree.ElementTree(file=output) ntested = 0 to_kill = set() if options.pattern: pattern = re.compile(options.pattern) else: pattern = None for x in tree.getiterator("job_list"): ntested += 1 id = x.find("JB_job_number").text name = x.find("JB_name").text if pattern and pattern.search(name): to_kill.add(id) nkilled = len(to_kill) if not options.dry_run: p = subprocess.Popen(["qdel", ",".join(to_kill)], stdout=subprocess.PIPE) stdout, stderr = p.communicate() E.info("ntested=%i, nkilled=%i" % (ntested, nkilled)) # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "-g", "--glob", dest="glob_pattern", type="string", help="glob pattern to use for collecting cluster jobs descriptions " "[%default]") parser.add_option( "-i", "--input-pattern", dest="input_pattern", type="string", help="regular expression to extract job id from filename [%default].") parser.add_option( "-o", "--output-filename-pattern", dest="output_pattern", type="string", help="string to convert a job id to a filename [%default].") parser.set_defaults( glob_pattern="job*.qsub", input_pattern="(\S+).qsub", output_pattern="%s.stdout", remove_old=True, force=False, check_completeness="python", ) (options, args) = E.Start(parser, add_pipe_options=True) if args: filenames = args elif options.glob_pattern: filenames = glob.glob(options.glob_pattern) ninput, nrun, nskipped, nerrors = 0, 0, 0, 0 ndeleted = 0 if options.check_completeness == "python": isComplete = checkPythonRuns ############################################################## ############################################################## ############################################################## # decide what to do ############################################################## jobs = [] files_to_delete = [] for filename in filenames: ninput += 1 try: job_name = re.search(options.input_pattern, filename).groups()[0] except AttributeError: options.stderr.write( "# could not extract invariant job name from %s\n" % filename) nerrors += 1 continue result_filename = options.output_pattern % job_name do = False status = "up-to-date" if options.force: status = "force" do = True if not do: if os.path.exists(result_filename): if isNewer(filename, result_filename): status = "newer" do = True if options.remove_old: files_to_delete.append(result_filename) if not do and not isComplete(result_filename): status = "incomplete" do = True if options.remove_old: files_to_delete.append(result_filename) else: status = "missing" do = True E.info("%s->%s (%s)\n" % (filename, result_filename, status)) if not do: nskipped += 1 continue jobs.append(filename) ############################################################## ############################################################## ############################################################## # delete old files ############################################################## for filename in files_to_delete: if os.path.exists(filename): os.remove(filename) ndeleted += 1 ############################################################## ############################################################## ############################################################## # start jobs ############################################################## for filename in jobs: cmd = "qsub %s" % filename try: retcode = subprocess.call(cmd, shell=True) if retcode != 0: if options.loglevel >= 1: options.stdlog.write("# ERROR: failed to execute %s\n" % cmd) nerrors += 1 continue except OSError as e: if options.loglevel >= 1: options.stdlog.write( "# ERROR: failed to execute %s with msg %s\n" % (cmd, e)) nrun += 1 E.info("ninput=%i, nrun=%i, nskipped=%i, ndeleted=%i, nerrors=%i" % (ninput, nrun, nskipped, ndeleted, nerrors)) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version= "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option("-o", "--output-prefix", dest="output_prefix", type="string", help="output filename prefix [default=%default].") parser.add_option( "-c", "--chromosome-table", dest="filename_chromosome_table", type="string", help= "filename with tab separated list of chromosome names [default=%default]." ) parser.add_option("--action", dest="action", type="choice", choices=("plot", "run"), help="action to perform [default=%default]") parser.add_option( "-s", "--signal-value", dest="signal_value", type="string", help= "use either p.value or sig.value as ranking measure [default=%default]" ) parser.add_option( "-r", "--overlap-ratio", dest="overlap_ratio", type="int", help= "a value between 0 and 1 that controls how much two peaks have to overlap to be called as the same [default=%default]" ) parser.set_defaults( action="plot", output_prefix="output", half_width=None, overlap_ratio=0, is_broadpeak=False, signal_value="signal.value", filename_chromosome_table="genome_table.txt", ) # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) if options.action == "plot": plotIDR(options.output_prefix + ".pdf", args) elif options.action == "run": if len(args) != 2: raise ValueError("require exactly two replicates") runIDR(options, args[0], args[1]) # write footer and output benchmark information. E.Stop()
def main(argv=sys.argv): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-l", "--logfile", dest="logfile", type="string", help="name of logfile [default=%default]") parser.add_option("-t", "--time", dest="time", type="choice", choices=("seconds", "milliseconds"), help="time to show [default=%default]") parser.add_option( "--no-reset", dest="reset", action="store_false", help="do not reset counters when a new pipeline run started " "The default is to reset so that only the counts from the latest " "pipeline execution are show " "[default=%default]") parser.add_option( "-f", "--filter-method", dest="filter", type="choice", choices=("unfinished", "running", "completed", "all"), help="apply filter to output [default=%default]") parser.add_option( "-i", "--ignore-errors", dest="ignore_errors", action="store_true", help="ignore errors [default=%default]") parser.set_defaults(sections=[], logfile="pipeline.log", filter="all", reset=True, time="seconds") (options, args) = E.Start(parser, argv) rx = re.compile("^[0-9]+") if options.sections: profile_sections = options.sections else: profile_sections = ("task", "job") counts = {} for section in profile_sections: counts[section] = collections.defaultdict(Counter) rootpath = os.path.abspath(".") infile = iotools.openFile(options.logfile) for line in infile: if not rx.match(line): continue data = line[:-1].split() if len(data) < 5: continue date, time, level, source = data[:4] if re.search("output generated by", line): if options.reset: E.info("resetting counts at line=%s" % line[:-1]) for section in profile_sections: counts[section] = collections.defaultdict(Counter) continue if not re.match("task\.", source): continue dt = datetime.datetime.strptime( " ".join((date, time)), "%Y-%m-%d %H:%M:%S,%f") msg = "".join(data[4:]) started_task, completed_task, started_job, completed_job = \ (None, None, None, None) if re.search("task.log_at_level.\d+Task=(\S+)", msg): checked_task = re.search( "task.log_at_level.\d+Task=(\S+)", msg).groups()[0] elif re.search("Job=\[(\S+)->(\S+)\]Missingfile[s]*\[(\S+)\]", msg): started_infiles, started_job, missing = re.search( "Job=\[(\S+)->(\S+)\]Missingfile[s]*\[(\S+)\]", msg).groups() elif re.search("Job=\[(\S+)->(\S+)\]Missingfile[s]*", msg): started_infiles, started_job = re.search( "Job=\[(\S+)->(\S+)\]Missingfile[s]*", msg).groups() elif re.search("Job=\[(\S+)->(\S+)\]\s*\.\.\.", msg): # multi-line log messages started_infiles, started_job = re.search( "Job=\[(\S+)->(\S+)\]\s*\.\.\.", msg).groups() elif re.search("Taskentersqueue=(\S+)", msg): started_task = re.search("Taskentersqueue=(\S+)", msg).groups()[0] elif re.search("Job=\[(\S+)->(\S+)\]completed", msg): completed_infiles, completed_job = re.search( "Job=\[(\S+)->(\S+)\]completed", msg).groups() elif re.search("CompletedTask=(\S+)", msg): completed_task = re.search("CompletedTask=(\S+)", msg).groups()[0] elif re.search("UptodateTask=(\S+)", msg): completed_task = re.search("UptodateTask=(\S+)", msg).groups()[0] else: continue try: if started_task: counts["task"][started_task].add(True, dt, started_task) elif completed_task: counts["task"][completed_task].add(False, dt, completed_task) elif started_job: counts["job"][started_job].add(True, dt, started_job) elif completed_job: counts["job"][completed_job].add(False, dt, completed_job) else: raise ValueError("unknown action") except ValueError as msg: if not options.ignore_errors: raise ValueError(str(msg) + "\nat line %s" % line) if options.time == "milliseconds": f = lambda d: d.seconds + d.microseconds / 1000 elif options.time == "seconds": f = lambda d: d.seconds + d.microseconds / 1000000 for section in profile_sections: options.stdout.write("\t".join( ("section", "object", "ncalls", "duration", "percall", "running")) + "\n") running = [] for objct, c in counts[section].items(): # apply filters if options.filter in ("unfinished", "running") and c.running == 0: continue d = f(c.duration) if c.calls > 0: percall = "%6.3f" % (d / float(c.calls)) else: percall = "na" options.stdout.write("\t".join( (list(map(str, (section, objct, c.calls, d, percall, c.running, ))))) + "\n") running.extend([x for x, y in c._started.items() if y != 0]) options.stdout.write("#//\n\n") if running: options.stdout.write("# running %ss\n" % section) options.stdout.write("\n".join(map(str, running)) + "\n") options.stdout.write("#//\n\n") E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "-g", "--glob", dest="glob_pattern", type="string", help="glob pattern to use for collecting files [%default].") parser.add_option( "-f", "--file-pattern", dest="file_pattern", type="string", help="only check files matching this pattern [%default].") parser.add_option("-m", "--mode", dest="mode", type="choice", choices=("file", "node"), help="analysis mode [%default].") parser.add_option( "-r", "--recursive", action="store_true", help="recursively look for logfiles from current directory " "[%default].") parser.set_defaults( truncate_sites_list=0, glob_pattern="*.log", mode="file", recursive=False, ) (options, args) = E.Start(parser) if args: filenames = args elif options.glob_pattern: filenames = glob.glob(options.glob_pattern) if len(filenames) == 0: raise ValueError("no files to analyse") if options.mode == "file": totals = Logfile.LogFileData() options.stdout.write("file\t%s\n" % totals.getHeader()) for filename in filenames: if filename == "-": infile = sys.stdin elif filename[-3:] == ".gz": infile = gzip.open(filename, "r") else: infile = open(filename, "r") subtotals = Logfile.LogFileData() for line in infile: subtotals.add(line) infile.close() options.stdout.write("%s\t%s\n" % (filename, str(subtotals))) totals += subtotals options.stdout.write("%s\t%s\n" % ("total", str(totals))) elif options.mode == "node": chunks_per_node = {} rx_node = re.compile("# job started at .* \d+ on (\S+)") for filename in filenames: if filename == "-": infile = sys.stdin elif filename[-3:] == ".gz": infile = gzip.open(filename, "r") else: infile = open(filename, "r") data = Logfile.LogFileDataLines() for line in infile: if rx_node.match(line): node_id = rx_node.match(line).groups()[0] data = Logfile.LogFileDataLines() if node_id not in chunks_per_node: chunks_per_node[node_id] = [] chunks_per_node[node_id].append(data) continue data.add(line) options.stdout.write("node\t%s\n" % data.getHeader()) total = Logfile.LogFileDataLines() for node, data in sorted(chunks_per_node.items()): subtotal = Logfile.LogFileDataLines() for d in data: # options.stdout.write( "%s\t%s\n" % (node, str(d) ) ) subtotal += d options.stdout.write("%s\t%s\n" % (node, str(subtotal))) total += subtotal options.stdout.write("%s\t%s\n" % ("total", str(total))) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id", usage=globals()["__doc__"]) parser.add_option("-p", "--path", dest="path", type="string", help="path to scan for files [%default]") parser.add_option("-d", "--destination", dest="destination", type="string", help="path to deposit files into [%defaul]") parser.set_defaults(path='/ifs/projects/sftp', url='http://www.cgat.org/downloads/', dest='/ifs/projects/overview') # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) statement = "find %s -name 'index.html'" % options.path process = subprocess.Popen(statement, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = process.communicate() files = stdout.split('\n') files.sort() outfile = iotools.openFile(os.path.join(options.dest, "index.html"), "w") outfile.write(''' <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> <title>cgat project reports</title> <link rel="stylesheet" href="cgat.css" type="text/css" /> <link rel="stylesheet" href="pygments.css" type="text/css" /> <link rel="shortcut icon" href="http://cgatwiki.anat.ox.ac.uk/favicon.ico"> <script type="text/javascript" src="sorttable.js"></script> </head> <body> <div class="related"> <h3>Navigation</h3> <ul> <li><a href="index.html">cgat Projects Overview</a> »</li> </ul> </div> <div class="document"> <div class="documentwrapper"> <div class="bodywrapper"> <div class="body"> <div class="section" id="cgat-pipelines"> <H1>cgat exported project pages</H1> <p> This page is for internal use only. Do not distribute outside of cgat and do not make this page available on the world wide web. </p> <table class="sortable">\n''') outfile.write( '''<tr><th>Project</th><th>Report</th><th>Title</th></tr>\n''') for f in files: if f == '': continue proj = re.search('(proj\d+)', f).groups()[0] relpath = re.sub('.*proj\d+/', '', f) report = re.sub('^[^/]*/', '', os.path.dirname(relpath)) lines = iotools.openFile(f).readlines() titles = [x for x in lines if "<title>" in x] if titles: title = re.search("<title>(.*)</title>", titles[0]).groups()[0] else: title = "NA" if title.endswith("documentation"): title = title[:-len("documentation")] url = os.path.join(options.url, relpath) outfile.write( '<tr><td>%(proj)s</td><td><a HREF="%(url)s">%(report)s</td><td>%(title)s</td></tr>\n' % locals()) outfile.write(''' </table> </div> </div> </div> </div> </div> <div class="sphinxsidebar"> <div class="sphinxsidebarwrapper"> <p class="logo"><a href="contents.html"> <img class="logo" src="cgat_logo.png" alt="Logo"/> </a></p> </body> </html>\n''') outfile.close() E.info('created output file %s' % outfile.name) # write footer and output benchmark information. E.Stop()
def main(argv=None): parser = getOptionParser() (options, args) = E.Start(parser, add_cluster_options=True) if len(args) == 0: raise ValueError( "command line argument missing - see usage information") options.renumber_column = [x.split(":") for x in options.renumber_column] cmd = args[0] if len(args) > 1: cmd += " '" + "' '".join(args[1:]) + "'" if options.dry_run: cmd = re.sub("%DIR%", "", cmd) retcode = subprocess.call(cmd, shell=True, stdin=sys.stdin, stdout=sys.stdout, cwd=os.getcwd(), close_fds=True) E.Stop() sys.exit(0) failed_requests = [] started_requests = [] niterations = 0 if not options.collect: tmpdir = os.path.abspath(tempfile.mkdtemp(dir=options.tmpdir)) E.info(" working in directory %s" % tmpdir) if options.split_at_lines: chunk_iterator = chunk_iterator_lines args = (options.split_at_lines, ) elif options.split_at_column: chunk_iterator = chunk_iterator_column args = (options.split_at_column - 1, options.max_files) elif options.split_at_regex: chunk_iterator = chunk_iterator_regex_split args = (re.compile(options.split_at_regex), 0, options.chunksize, options.max_lines) elif options.group_by_regex: chunk_iterator = chunk_iterator_regex_group args = (re.compile(options.group_by_regex), 0, options.chunksize) else: raise ValueError("please specify a way to chunk input data") data = [(x, cmd, options, None, options.subdirs) for x in chunk_iterator(options.stdin, args, prefix=tmpdir, use_header=options.input_header)] started_requests = [(x[0], x[0] + ".out") for x in data] if len(data) == 0: E.warn("no data received") E.Stop() sys.exit(0) if options.method == "multiprocessing": pool = Pool(options.cluster_num_jobs) results = pool.map(runCommand, data, chunksize=1) elif options.method == "drmaa": results = [] runDRMAA(data, environment=options.environment) elif options.method == "threads": pool = ThreadPool(options.cluster_num_jobs) results = pool.map(runCommand, data, chunksize=1) niterations = 0 for retcode, filename, cmd, logfile, iterations in results: niterations += iterations if not hasFinished(retcode, filename, options.output_tag, logfile): failed_requests.append((filename, cmd)) else: tmpdir = options.collect started_requests = [(x[:-4], x) for x in glob.glob(tmpdir + "/*.out")] E.info("collecting %i files from %s" % (len(started_requests), tmpdir)) if failed_requests: for fn, cmd in failed_requests: E.error("failed request: filename= %s, cmd= %s" % (fn, cmd)) else: E.info("building result from %i parts" % len(started_requests)) if options.renumber: mapper = MapperLocal(pattern=options.renumber) else: mapper = MapperEmpty() # deal with stdout name = None index = None for pattern, column in options.renumber_column: if re.search(pattern, "stdout"): try: index = int(column) - 1 except ValueError: name = column break if options.binary: ResultBuilderBinary()(started_requests, options.stdout, options) else: regex = None if options.output_regex_header: regex = re.compile(options.output_regex_header) ResultBuilder(mapper=mapper, field_index=index, field_name=name, header_regex=regex)(started_requests, options.stdout, options) # deal with logfiles : combine them into a single file rr = re.search("'--log=(\S+)'", cmd) or re.search("'--L\s+(\S+)'", cmd) if rr: E.info("logging output goes to %s" % rr.groups()[0]) logfile = iotools.openFile(rr.groups()[0], "a") ResultBuilderLog()([(x[0], "%s.log" % x[0]) for x in started_requests], logfile, options) logfile.close() # deal with other files if options.subdirs: files = glob.glob("%s/*.dir/*" % tmpdir) # remove directory filenames = set([os.path.basename(x) for x in files]) xx = len(".out") for filename in filenames: _, filetype = os.path.splitext(filename) name = None index = None for pattern, column in options.renumber_column: if re.search(pattern, filename): try: index = int(column) - 1 except ValueError: name = column break if options.binary: builder = ResultBuilderBinary(mapper=mapper) elif filetype in (".fa", ".fasta"): builder = ResultBuilderFasta(mapper=mapper) elif filetype in (".mali", ): builder = ResultBuilderFasta(mapper=MapperEmpty()) elif filetype in (".psl"): builder = ResultBuilderPSL(mapper=mapper) elif filetype in (".gtf", ".gff"): builder = ResultBuilderGFF(mapper=mapper, field_index=index, field_name=name) elif filetype in (".png"): builder = ResultBuilderCopies(mapper=mapper) else: builder = ResultBuilder(mapper=mapper, field_index=index, field_name=name) E.debug("chose the following builder for %s: %s: %s" % (filename, filetype, str(builder))) E.info("collecting results for %s" % filename) input_filenames = [] for fi, fn in started_requests: fn = fn[:-xx] + ".dir/" + filename if os.path.exists(fn): input_filenames.append((fi, fn)) E.info("output of %i files goes to %s" % (len(filenames), filename)) outfile = iotools.openFile(options.output_pattern % filename, "w") builder(input_filenames, outfile, options) outfile.close() if not options.debug and (not options.resume or not options.collect): if len(failed_requests) == 0: E.info("removing directory %s" % tmpdir) shutil.rmtree(tmpdir) else: E.info("directory %s not removed due to %i failed jobs" % (tmpdir, len(failed_requests))) E.info("job control: nstarted=%i, nfinished=%i, nerrors=%i, nrepeats=%i" % (len(started_requests), len(started_requests) - len(failed_requests), len(failed_requests), niterations)) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-n", "--dry-run", dest="dry_run", action="store_true", help="do dry run, do not kill [default=%default].") parser.add_option("-l", "--ignore-links", dest="ignore_links", action="store_true", help="do not zap symbolic links [default=%default].") parser.set_defaults( dry_run=False, ignore_links=False, ) # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) outfile = options.stdout fields = ('st_atime', 'st_blksize', 'st_blocks', 'st_ctime', 'st_dev', 'st_gid', 'st_ino', 'st_mode', 'st_mtime', 'st_nlink', 'st_rdev', 'st_size', 'st_uid') outfile.write("filename\tlinkdest\t%s\n" % "\t".join(fields)) # remove any duplicates and sort args = sorted(set(args)) for fn in args: # stat follows times to links original = os.stat(fn) if os.path.islink(fn): if not options.ignore_links: linkdest = os.readlink(fn) E.info('breaking link from %s to %s' % (fn, linkdest)) if not options.dry_run: os.unlink(fn) f = open(fn, "w") f.close() else: E.info('truncating file %s' % fn) linkdest = "" if not options.dry_run: f = open(fn, "w") f.truncate() f.close() outfile.write("%s\t%s\t%s\n" % ( fn, linkdest, "\t".join([str(getattr(original, x)) for x in fields]))) if not options.dry_run: # Set original times os.utime(fn, (original.st_atime, original.st_mtime)) os.chmod(fn, original.st_mode) # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "-s", "--scratch-dir", dest="scratchdir", type="string", help="the scratch directory on the nodes [default=%default].") parser.add_option( "-c", "--collection", dest="collection", type="string", help="files will be put into collection. This is a directory that " "will be created just below the scratch directory [default=%default].") parser.set_defaults( scratchdir="/scratch", collection="", nodes=[], ) # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) if len(args) == 0: raise ValueError("please specify a collection of files/directories " "that should be mirrored.") targetdir = os.path.join(options.scratchdir, options.collection) nodes = getNodes(options.nodes) E.info("copying to %s on nodes %s" % (targetdir, ",".join(nodes))) ninput, noutput, nskipped = 0, 0, 0 filenames = " ".join(args) for node in nodes: E.info("copying to node %s" % node) ninput += 1 statement = ''' ssh %(node)s mkdir %(targetdir)s >& /dev/null; rsync --progress -az %(filenames)s %(node)s:%(targetdir)s ''' % locals() E.run(statement) noutput += 1 E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped)) E.Stop()
def main(argv=None): if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-g", "--gtf", dest="gtf", type="string", help="path to input gtf") parser.add_option("-p", "--gtfpath", dest="gtfpath", type="string", help="path to online gtf") parser.add_option("-c", "--remove-contigs", dest="remove_contigs", type="string", help="contigs to remove, delimited by |") parser.add_option("-k", "--keep-contigs", dest="keep_contigs", type="string", help="""all contigs to keep, delimited by |. Contigs specified in --remove-contigs will still be removed""") parser.add_option("-o", "--outfile", dest="outfile", type="string", help="path to processed output gtf") parser.add_option("-f", "--filter", dest="filters", type="string", action="append", help="""List of filters to apply to your GTF""") parser.set_defaults( remove_contigs=None, keep_contigs=None, ) (options, args) = E.Start(parser) if options.gtf: gtf = options.gtf elif options.gtfpath: getGTF(options.gtfpath) gtf = options.gtfpath.split("/")[-1] else: raise ValueError("Please provide a GTF or the path to an online GTF") if not options.outfile: raise ValueError("Please provide an output file name") d = 0 if options.remove_contigs or options.keep_contigs: d += 1 statement = 'zcat %s |' % gtf if options.remove_contigs: statement += removeNamedContigs(options.remove_contigs) if options.keep_contigs: statement += keepOnlyNamedContigs(options.keep_contigs) if options.outfile.endswith(".gz"): outfile = options.outfile else: outfile = options.outfile + ".gz" statement += "gzip > %s " % outfile os.system(statement) T1 = gtf if options.filters: d += 1 for filterstring in options.filters: T2 = P.getTempFilename(".") T2 = T2 + ".gtf" filterGTF(T1, filterstring, T2) T1 = T2 shutil.move(T2, options.outfile) if d == 0: raise ValueError("No filters provided")
def main(argv=None): # Parse the options parser = E.OptionParser(version="%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option("-p", "--params", dest="params", type="string", help="comma separated list of addtional parameter strings") parser.add_option("-m", "--module", dest="module", type="string", help="the full path to the module file", default=None) parser.add_option("-i", "--input", dest="input_filenames", type="string", action="append", help="input filename") parser.add_option("-o", "--output-section", dest="output_filenames", type="string", action="append", help="output filename") parser.add_option("-f", "--function", dest="function", type="string", help="the module function", default=None) parser.set_defaults( input_filenames=[], output_filenames=[], params=None ) (options, args) = E.Start(parser) # Check a module and function have been specified if not options.module or not options.function: raise ValueError("Both a function and Module must be specified") # If a full path was given, add this path to the system path location = os.path.dirname(options.module) if location != "": sys.path.append(location) # Establish the module name, accomodating cases where the # .py extension has been included in the module name module_name = os.path.basename(options.module) if module_name.endswith(".py"): module_base_name = module_name[:-3] else: module_base_name = module_name # Import the specified module and map the specified fuction E.info("importing module '%s' " % module_base_name) E.debug("sys.path is: %s" % sys.path) module = importlib.import_module(module_base_name) try: function = getattr(module, options.function) except AttributeError as msg: raise AttributeError(msg.message + "unknown function, available functions are: %s" % ",".join([x for x in dir(module) if not x.startswith("_")])) if options.input_filenames and not options.input_filenames == ["None"]: infiles = options.input_filenames else: infiles = False if options.output_filenames and not options.output_filenames == ["None"]: outfiles = options.output_filenames else: outfiles = False # Parse the parameters into an array if options.params: params = [param.strip() for param in options.params.split(",")] else: params = False # deal with single file case if infiles and len(infiles) == 1: infiles = infiles[0] if outfiles and len(outfiles) == 1: outfiles = outfiles[0] # Make the function call if infiles and outfiles and params: function(infiles, outfiles, params) elif infiles and outfiles and not params: function(infiles, outfiles) elif params: function(params) else: raise ValueError( "Expecting infile+outfile+params or infile+outfile or params") E.Stop()
def main(argv=None): parser = E.OptionParser( version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-o", "--output_file", type="string", default=None, help="[Optional] Filename to output results to. [default=STDOUT]") parser.add_option("-u", "--url", type="string", default="http://www.cbioportal.org/public-portal/webservice.do", help="[Optional] Url to the cBioPortal webservice [default=%default]") cqueryopts = optparse.OptionGroup( parser, "Common parameters", "Common arguments to the query") cqueryopts.add_option("-s", "--study_id", dest="study_id", type="string", default=None, help="[Required/OPtional] cBioPortal ID for study [default=%default].\n This or study_name required for: getGeneticProfiles, getCaseLists, getProteinArrayInfo, getLink,getOncoprintHTML, getPercentAltered, getTotalAltered") cqueryopts.add_option("-n", "--study_name", dest="study_name", type="string", default=None, help="[Required/Optional] cBioPortal Name for study [defualt=%default].\n See above for which commands require this.") cqueryopts.add_option("-c", "--case_set_id", dest="case_set_id", type="string", default=None, help="[Required for some] cBioPortal case_set_id specifying the case list to use.\nRequired for getProfileData, getMutationData, getClincalData, getProteinArrayData, getPercentAltered, getTotalAltered. Default is case_set_id for case list 'All Tumours' ") cqueryopts.add_option("-g", "--gene_list", dest="gene_list", type="string", default=None, help="[Required for some] Comma seperated list of HUGO gene symbols or Entrez gene IDs.\nRequired for getProfileData, getMutationData, getLink, getOncoprintHTML") cqueryopts.add_option("-f", "--gene_list_file", dest="gene_list_file", type="string", default=None, help="[Optional] Filename to read in gene_list from") cqueryopts.add_option("-p", "--profile_id", dest="profile_id", type="string", help="[Optional] Comma seperated list of cBioPortal genetic_profile_ids. If none are specified then the list of profiles for the study where display in analysis is True is used.") squeryopts = optparse.OptionGroup( parser, "Query specific parameters", "Arguments specific to a particular query") squeryopts.add_option("--protein_array_type", dest="protein_array_type", type="string", default="protein_level", help="[Optional] Either protein_level or phosphorylation [default=%default]") squeryopts.add_option("--protein_array_id", dest="protein_array_id", type="string", help="[Required for some] comma seperated list of one or more protein array IDs") squeryopts.add_option("--array_info", dest="protein_array_info", type="int", default=0, help="[Optional] If 1, antibody infomation will also be exported in a getProteinArrayData query [default=%default]") squeryopts.add_option("--output-report", dest="report", type="string", default="full", help="[Optional] Report type to display for getLink. Either full or oncoprint_html [default=%default] ") squeryopts.add_option("--threshold", dest="threshold", type="int", default=2, help="[Optional] Threshold for deciding if an alteration is significant for continuous metrics [default=%default]") parser.add_option_group(cqueryopts) parser.add_option_group(squeryopts) (options, args) = E.Start( parser, add_pipe_options=False, add_output_options=False, argv=argv) portal = CBioPortal(url=options.url, study=options.study_id, study_name=options.study_name, case_list_id=options.case_set_id) results = [] if options.gene_list_file: infile = iotools.open_file(options.gene_list_file) gene_list = [x.strip() for x in infile] elif options.gene_list: gene_list = options.gene_list.split(",") if options.profile_id: profile_id = options.profile_id.split(",") else: profile_id = None if "getCancerStudies" in args: results.append(portal.getCancerStudies()) if "getGeneticProfiles" in args: results.append(portal.getGeneticProfiles()) if "getCaseLists" in args: results.append(portal.getCaseLists()) if "getProfileData" in args: results.append( portal.getProfileData(gene_list=gene_list, genetic_profile_id=profile_id)) if "getMutationData" in args: results.append( portal.getMutationData(gene_list=gene_list, genetic_profile_id=profile_id)) if "getClinicalData" in args: results.append(portal.getClinicalData()) if "getProteinArrayInfo" in args: results.append(portal.getProteinArrayInfo( gene_list=gene_list, protein_array_type=options.protein_array_type)) if "getProteinArrayData" in args: results.append(portal.getProteinArrayData( protein_array_id=options.protein_array_id, array_info=options.array_info)) if "getPercentAltered" in args: results.append(portal.getPercentAltered( gene_list=gene_list, genetic_profile_id=profile_id, threshold=options.threshold)) if "getLink" in args: results.append( portal.getLink(gene_list=gene_list, report=options.report)) if "getOncoprintHTML" in args: results.append(portal.getOncoprintHTML(gene_list=gene_list)) if len(results) == 0: sys.stderr.write("No recognised query commands provided") sys.exit() if options.output_file: outf = iotools.open_file(options.output_file, "w") else: outf = sys.stdout for result in results: try: outf.write(tableToString(result)) except: outf.write(result) E.Stop()
''' TEMPLATE_PIPELINEMODULE = ''' .. automodule:: %(prefix)s :members: :show-inheritance: ''' import glob import os import cgatcore.experiment as E if __name__ == "__main__": E.Start() dirs = (("../scripts/*.py", TEMPLATE_SCRIPT, 'scripts'), ("../cgat/*.py", TEMPLATE_MODULE, 'modules'), ("../cgatPipelines/pipeline*.py", TEMPLATE_PIPELINE, 'pipelines'), ("../cgatPipelines/[A-Z]*.py", TEMPLATE_PIPELINEMODULE, 'pipelinemodules')) ncreated, nskipped = 0, 0 for glob_expression, template, dest in dirs: if not os.path.exists(dest): os.mkdir(dest) files = glob.glob(os.path.abspath(glob_expression))