def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "-d", "--directory", dest="directory", type="string", help="supply directory where the input summaries aer located") # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) infiles = glob.glob(os.path.join(options.directory, "*/*genes*summary*")) sys.stdout.write("category\tnreads\tpreads\tsample\n") for infile in infiles: reformat(infile) # write footer and output benchmark information. E.stop()
def main(argv=None): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-r", "--run-id", dest="run_id", type="int", help="numerical identifier of a run [%default]") parser.add_option("-d", "--database-url", dest="database_url", type="string", help="database url [%default]") parser.add_option("-n", "--dry-run", dest="dry_run", action="store_true", help="only show statements to be executed [%default]") parser.set_defaults( run_id=None, database_url="sqlite:///./csvdb", dry_run=False, ) (options, args) = E.start(parser, argv=argv, add_output_options=True) purge_run_id(options.run_id, options.database_url, dry_run=options.dry_run) E.stop()
def main(argv): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-o", "--option", dest="option", type="string") (options, args) = E.start(parser, argv) with IOTools.open_file(args[0]) as inf: data = "".join(inf.readlines()).strip() with IOTools.open_file(args[1]) as inf: reference = "".join(inf.readlines()).strip() data_counts = Counter(data) ref_counts = Counter(reference) keys = set(list(data_counts.keys()) + list(ref_counts.keys())) options.stdout.write("key\tinput\treference\n") for key in sorted(keys): options.stdout.write( "\t".join((key, str(data_counts[key]), str(ref_counts[key]))) + "\n") E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) # stop parsing options at the first argument parser.disable_interspersed_args() (options, args) = E.Start(parser, add_pipe_options=True) if len(args) > 0: cmd = args[0] if len(args) > 1: cmd += " '" + "' '".join(args[1:]) + "'" s = subprocess.Popen(cmd, shell=True, cwd=os.getcwd(), close_fds=True) (out, err) = s.communicate() returncode = s.returncode else: returncode = 0 E.Stop() sys.exit(returncode)
def main(argv=None): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "--regex-filename", dest="regex_filename", type="string", help="extract column name from filename via regular expression " "[%default]") parser.add_option("--filter", dest="filters", type="choice", action="append", choices=("PASS", "SNP"), help="apply filters to VCFs when reading " "[%default]") parser.set_defaults( regex_filename=None, filters=[], ) (options, args) = E.start(parser, argv=argv, add_output_options=True) if len(args) < 2: raise ValueError("requiring at least 2 input filenames") dfs = [] for filename in args: if options.regex_filename: try: name = re.search(options.regex_filename, filename).groups()[0] except AttributeError: raise ValueError( "regular expression '{}' does not match {}".format( options.regex_filename, filename)) else: name = iotools.snip(os.path.basename(filename), ".vcf.gz") E.debug("reading data from {}".format(filename)) df = read_vcf_positions_into_dataframe(filename, filters=options.filters) df[name] = 1 dfs.append(df) ndata = len(dfs) merged_df = dfs[0] for df in dfs[1:]: merged_df = merged_df.merge(df, how="outer") merged_df = merged_df.fillna(0) ddf = merged_df.drop(["chrom", "pos"], axis=1) set_counts = ddf.groupby(by=list(ddf.columns)).size() set_counts = set_counts.reset_index() set_counts.columns = list(set_counts.columns[:-1]) + ["counts"] set_counts.to_csv(options.stdout, sep="\t", index=False) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) dir2files = {} for root, directory, files in os.walk("."): dir2files[root] = files ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H:%M:%S') filename = "CWD_%s" % st E.info("outputting directory state to %s" % filename) with iotools.openFile(filename, "w") as outf: outf.write("##contents of cwd on %s\n\n" % st) for directory, files in dir2files.items(): for file in files: path = os.path.join(directory, file) outf.write(path + "\n") # write footer and output benchmark information. E.Stop()
def main(argv=None): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "-s", "--sample-size", dest="sample_size", type="float", help="sample size. If less than 0, take a proportion of the chromosome size. " "If greater than 0, take a fixed number of variants [%default]") parser.set_defaults( input_filename_fasta=None, sample_size=0.001, sample_name="NA12878" ) (options, args) = E.start(parser, argv=argv, add_output_options=True) if len(args) > 0: options.input_filename_fasta = args[0] if options.input_filename_fasta == "-": options.input_filename_fasta = options.stdin outf = options.stdout outf.write("##fileformat=VCFv4.1\n") outf.write("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n") outf.write("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{}\n".format(options.sample_name)) with pysam.FastxFile(options.input_filename_fasta) as inf: for record in inf: contig = record.name sequence = record.sequence if options.sample_size < 1.0: nsamples = int(float(len(sequence)) * options.sample_size) else: nsamples = int(options.sample_size) E.info("generating {} sampled variants for contig {}".format(nsamples, contig)) sampled_positions = set() missing_nsamples = nsamples while len(sampled_positions) < nsamples: raw_positions = random.sample(list(range(len(sequence))), nsamples - len(sampled_positions)) filtered_positions = [x for x in raw_positions if sequence[x] != "N"] sampled_positions.update(filtered_positions) E.debug("sample update: total={}, raw={}, filtered={}".format( len(sampled_positions), len(raw_positions), len(filtered_positions))) sampled_positions = sorted(sampled_positions) for position in sampled_positions: base = sequence[position] outf.write("{}\t{}\t.\t{}\t{}\t.\t.\t.\tGT\t0/0\n".format( contig, position + 1, base, base)) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version= "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option("-i", "--test-option", dest="test_option", type="string", help="test option [default=%default].") parser.set_defaults(test_option="test") # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) files = glob.glob(os.path.join(os.path.dirname(__file__), "*.pyx")) # do sth ninput, nskipped, noutput = 0, 0, 0 for f in files: E.info("rebuilding %s" % f) ninput += 1 prefix, suffix = os.path.splitext(f) for ext in (".c", ".pyxbldc"): try: os.remove(prefix + ext) except OSError: pass dirname, basename = os.path.split(prefix) assert basename.startswith("_") scriptname = os.path.join(dirname, basename[1:]) + ".py" if not os.path.exists(scriptname): E.warn("script %s does not exist - skipped" % scriptname) nskipped += 1 continue E.info("compiling %s" % scriptname) os.system("%s %s --help > /dev/null" % (sys.executable, scriptname)) noutput += 1 E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped)) # write footer and output benchmark information. E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version= "%prog version: $Id: script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option("-m", "--method", dest="method", type="choice", choices=("script", "module"), help="type of tests to create [%default].") parser.set_defaults(method="script") # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) if len(args) == 0: raise ValueError( "setup_test.py requires one or more command line arguments") targetdir = os.path.dirname(__file__) counter = E.Counter() for arg in args: counter.input += 1 script_dirname, basename = os.path.split(arg) dirname = os.path.join(targetdir, basename) if os.path.exists(dirname): E.warn("%s already exists - skipping" % basename) counter.skipped += 1 continue os.mkdir(dirname) with open(os.path.join(dirname, "tests.yaml"), "w") as outf: outf.write(YAML_TEMPLATE) counter.created += 1 E.info("%s" % str(counter)) # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version="%prog version: $Id$", usage=globals()["__doc__"]) (options, args) = E.start(parser, argv=argv) if len(args) == 0: args.append("-") E.info(options.stdin) infile = IOTools.open_file(options.stdin.name) iterator = FastaIterator.FastaIterator(infile) # outfile_info = IOTools.open_file(options.info_file, "w") d = collections.OrderedDict() cluster_dict = dict() # first iterate over the fasta file and generate a dict # with the name (title) as the key and the sequence as the value # Remove any pseudo sequences for cur_record in iterator: # This is a temp fix because bedtools getfasta --name seems to have # changed the way it names the fasta titles. This may be temp but This # will fix this issue for the time being. m = re.match("(chr\d+.tRNA\d+-\S+-(pseudo)?)::\S+([+|-])", cur_record.title.replace("(","").replace(")","")) if m == None: continue if m.group(2) == "pseudo": pass else: key = str(m.group(1) + m.group(3)) d[key] = cur_record.sequence # next iterate of over the dict give the cluster a number # this will be used to then map back for the info name for key, value in d.items(): # Add CCA tail options.stdout.write((">%s\n%scca\n")%(key, value)) E.stop()
def main(argv): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) (options, args) = E.start(parser, argv) data = "".join(open(args[0]).readlines()) print(data[::-1])
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: fastas2fasta.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) (options, args) = E.start(parser) if len(args) < 2: raise ValueError( "please supply at least two filenames to concatenate.") iterators = [] for a in args: iterators.append(FastaIterator.FastaIterator(iotools.open_file(a, "r"))) ninput, noutput, nerrors = 0, 0, 0 while 1: sequences = [] ids = [] for iterator in iterators: try: cur_record = next(iterator) except StopIteration: break sequences.append(re.sub(" ", "", cur_record.sequence)) ids.append(cur_record.title) if not sequences: break ninput += 1 if len(sequences) != len(iterators): raise ValueError("unequal number of sequences in files") noutput += 1 options.stdout.write(">%s\n%s\n" % (ids[0], "".join(sequences))) E.info("ninput=%i, noutput=%i, nerrors=%i" % (ninput, noutput, nerrors)) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "-a", "--first-fastq-file", dest="fastq1", type="string", help="supply read1 fastq file") parser.add_option( "-b", "--second-fastq-file", dest="fastq2", type="string", help="supply read2 fastq file") # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) if args and len(args) == 2: options.fastq1, options.fastq2 = args fastq1 = iotools.open_file(options.fastq1) fastq2 = iotools.open_file(options.fastq2) E.info("iterating over fastq files") f1_count = 0 for f1, f2 in zip_longest(Fastq.iterate(fastq1), Fastq.iterate(fastq2)): if not (f1 and f2) or (not f2 and f1): try: raise PairedReadError( "unpaired reads detected. Are files sorted? are " "files of equal length?") except PairedReadError as e: raise PairedReadError(e).with_traceback(sys.exc_info()[2]) else: assert f1.identifier.endswith("/1") and \ f2.identifier.endswith("/2"), \ "Reads in file 1 must end with /1 and reads in file 2 with /2" options.stdout.write( ">%s\n%s\n>%s\n%s\n" % (f1.identifier, f1.seq, f2.identifier, f2.seq)) f1_count += 1 E.info("output: %i pairs" % f1_count) # write footer and output benchmark information. E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv == None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-o", "--ontology", dest="ontology", type="string", help="ontology label") parser.add_option("-f", "--filter", dest="filter", action="store_true", help="filter out genesets") parser.add_option("-l", "--filter-list", dest="filter_list", type="string", help="list of pathways to keep") parser.set_defaults(ontology=None, filter=False, filter_list=None) ## add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) if options.filter: assert options.filter_list, "must specify a list of pathways to keep" filter_set = set() for line in open(options.filter_list).readlines(): filter_set.add(line[:-1]) inf = options.stdin for line in inf.readlines(): data = line[:-1].split("\t") name, description, evidence = data[0], data[0], data[1] if options.filter: if name not in filter_set: continue genes = data[2:] for gene in genes: options.stdout.write("\t".join( [options.ontology, gene, name, description, evidence]) + "\n") ## write footer and output benchmark information. E.stop()
def main(argv=None): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-d", "--delimiter", dest="delimiter", type="string", help="delimiter to separate columns [%default]") parser.add_option("-m", "--method", dest="methods", type="choice", action="append", choices=["row-describe", "column-describe"], help="additional methods to apply [%default]") parser.set_defaults( delimiter="\t", methods=[], ) (options, args) = E.start(parser, argv=argv, add_output_options=True) if not options.methods: options.methods = ["summary"] table = pandas.read_csv(options.stdin, options.delimiter) options.stdout.write("metric\tcount\tpercent\tinfo\n") for method in options.methods: label = re.sub("-", "_", method) if method == "summary": for category, count, denominator, info in compute_table_summary( table): options.stdout.write("\t".join( map(str, (category, count, iotools.pretty_percent(count, denominator, na=""), info))) + "\n") elif method == "column-describe": df = table.describe().T.stack() with E.open_output_file(label) as outf: outf.write("label\tcategory\tvalue\n") df.to_csv(outf, sep="\t") elif method == "row-describe": df = table.T.describe().stack() with E.open_output_file(label) as outf: outf.write("label\tcategory\tvalue\n") df.to_csv(outf, sep="\t") E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) (options, args) = E.start(parser, argv=argv) if len(args) == 0: args.append("-") E.info(options.stdin) infile = IOTools.open_file(options.stdin.name) lines = infile.readlines() for line in lines: column = line.split() new_columns = [ column[0], str(int(column[1]) - 50), str(int(column[2]) + 50), column[3], column[4], column[5], str(int(column[1]) - 50), str(int(column[2]) + 50), column[8], column[9] ] if "pseudo" not in column[3]: if int(column[9]) == 2: [c, d] = column[10].split(",") block = int(column[2]) - int(column[1]) - int(d) + 50 new_10 = ''.join(str(int(c) + 50) + ',' + str(int(d) + 50)) new_11 = ''.join('0' + ',' + str(block)) new_columns = new_columns + [new_10, new_11] else: new_columns = new_columns + [ str(int(column[10]) + 100), column[11] ] options.stdout.write('\t'.join(new_columns[0:]) + '\n') E.stop()
def main(argv): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-o", "--option", dest="option", type="string") (options, args) = E.start(parser, argv) data = "".join(open(args[0]).readlines()) print(re.sub("o", "a", data)) E.stop()
def main(argv=sys.argv): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-i", "--input-fastq-file", dest="input_fastq_file", type="string", help="input fastq file. " "[%default]") parser.add_option("-m", "--method", dest="methods", action="append", type="choice", choices=("length", ), help="methods to apply [%default]") parser.set_defaults( methods=[], input_fastq_file=None, ) (options, args) = E.start(parser, argv) if len(args) == 1: options.input_fastq_file = args[0] if options.input_fastq_file is None: raise ValueError("missing input fastq file") counter = E.Counter() # note: complete rewrite with Counters, currently only length if options.methods != ["length"]: raise NotImplementedError() with pysam.FastqFile(options.input_fastq_file) as inf: for read in inf: counter.input += 1 options.stdout.write( "\t".join(map(str, (read.name, len(read.sequence)))) + "\n") counter.output += 1 E.info(counter) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-n", "--dry-run", dest="dry_run", action="store_true", help="dry run, do not delete any files [%default]") parser.set_defaults(dry_run=False) # add common options (-h/--help, ...) and parse command line (options, args) = E.Start(parser, argv=argv) filenames = args c = E.Counter() for filename in filenames: c.checked += 1 if os.path.exists(filename + ".log"): if iotools.isComplete(filename + ".log"): c.complete += 1 continue if iotools.isComplete(filename): c.complete += 1 continue c.incomplete += 1 E.info('deleting %s' % filename) if options.dry_run: continue os.unlink(filename) c.deleted += 1 E.info(c) # write footer and output benchmark information. E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) (options, args) = E.start(parser, argv=argv) if len(args) == 0: args.append("-") E.info(options.stdin) infile = IOTools.open_file(options.stdin.name) iterator = FastaIterator.FastaIterator(infile) # outfile_info = IOTools.open_file(options.info_file, "w") d = collections.OrderedDict() cluster_dict = dict() # first iterate over the fasta file and generate a dict # with the name (title) as the key and the sequence as the value # Remove any pseudo sequences for cur_record in iterator: key = cur_record.title if "pseudo" in key: pass else: d[key] = cur_record.sequence # next iterate of over the dict give the cluster a number # this will be used to then map back for the info name for key, value in d.items(): # Add CCA tail options.stdout.write((">%s\n%scca\n") % (key, value)) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version= "%prog version: $Id: cgat_script_template.py 2781 2009-09-10 11:33:14Z andreas $", usage=globals()["__doc__"]) parser.set_defaults() # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) if len(args) == 0 or (len(args) == 1 and args[0] == "-"): infile = options.stdin else: infile = fileinput.FileInput(args) # do sth ninput, nskipped, noutput = 0, 0, 0 header = False for line in infile: ninput += 1 if line.startswith("#"): pass elif not header: header = line elif line == header: nskipped += 1 continue options.stdout.write(line) noutput += 1 E.info("ninput=%i, noutput=%i, nskipped=%i" % (ninput, noutput, nskipped)) # write footer and output benchmark information. E.stop()
def main(argv=None): if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("--fastq1", dest="fastq1") parser.add_option("--to-drop-single", dest='to_remove_singletons') parser.add_option("--fastq-out1", dest="fq_out1") parser.add_option("--fastq-drop1", dest="fq_dropped1") (options, args) = E.start(parser) reads_to_remove = IOTools.open_file( options.to_remove_singletons).readlines() reads_to_remove = set([x.strip() for x in reads_to_remove]) fastq_out = IOTools.open_file(options.fq_out1, 'w') fastq_host = IOTools.open_file(options.fq_dropped1, 'w') reads = 0 dropped_reads = 0 for read in Fastq.iterate(IOTools.open_file(fastq1)): reads += 1 if read.identifier.split()[0] in reads_to_remove: fastq_host.write("@%s\n%s\n+\n%s\n" % (read.identifier, read.seq, read.quals)) dropped_reads += 1 else: fastq_out.write("@%s\n%s\n+\n%s\n" % (read.identifier, read.seq, read.quals)) fastq_out.close() fastq_host.close() try: percent_dropped = dropped_reads / float(reads) * 100 except ZeroDivisionError: percent_dropped = 0.0 E.info('Dropped %i of %i reads (%f percent)' \ % (dropped_reads, reads, percent_dropped))
def main(argv=sys.argv): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option( "-i", "--input-fastq", dest="input_fastq_file", type="string", help="input fastq file") parser.add_option( "-m", "--method", dest="method", type="choice", choices=["ont2pacbio"], help="methods to apply [%default]") parser.set_defaults( input_fastq_file=None, line_width=80, method=None, ) (options, args) = E.start(parser, argv, add_output_options=True) if len(args) == 1: options.input_fastq_file = args[0] if options.input_fastq_file == "-": options.input_fastq_file = options.stdin outf = options.stdout line_width = options.line_width well_no = 0 for record in pysam.FastqFile(options.input_fastq_file): well_no += 1 quals = record.get_quality_array() seq = record.sequence qv = int(math.floor(sum(quals) / len(quals))) outf.write(">{}/{}/{}_{} RQ=0.{}\n".format( "test", well_no, 1, len(seq) + 1, qv)) for x in range(0, len(seq), line_width): outf.write(seq[x:x + line_width] + "\n") E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-k", "--keep-header", dest="keep_header", type="int", help="randomize, but keep header in place [%default]") parser.set_defaults(keep_header=0) # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) inf = options.stdin outf = options.stdout c = E.Counter() for x in range(options.keep_header): c.header += 1 outf.write(inf.readline()) lines = inf.readlines() c.lines_input = len(lines) random.shuffle(lines) for line in lines: outf.write(line) c.lines_output = len(lines) E.info(c) # write footer and output benchmark information. E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) infile = argv[-1] for record in makeSplicedFasta(infile): options.stdout.write(record) # write footer and output benchmark information. E.stop()
def main(argv=None): parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) (options, args) = E.start(parser, argv=argv, add_output_options=True) total_counter = E.Counter() table = [] for section, map_task2runner in [("tool", map_tool_to_runner), ("metric", map_metric_to_runner), ("split", map_split_to_runner), ("collate", map_collate_to_runner)]: E.debug("processing section: {}".format(section)) counter = E.Counter() for task, taskf in sorted(map_task2runner.items()): counter.ntasks += 1 comments = [] try: version = taskf().get_version() counter.version_ok += 1 except Exception: version = "" comments.append("unavailable") counter.version_fail += 1 comments = "; ".join(comments) table.append((section, task, version, comments)) E.info("{}: {}".format(section, counter)) total_counter += counter options.stdout.write("section\ttask\tversion\tcomments\n") for row in table: options.stdout.write("\t".join(map(str, row)) + "\n") E.info("{}: {}".format("total", counter)) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id$") parser.add_option("-s", "--method=sort --sort-order", dest="sort", type="string", help="fields to take (in sorted order).") (options, args) = E.start(parser, add_csv_options=True) reader = csv.DictReader(E.stdin, dialect=options.csv_dialect) if options.sort: fields = options.sort.split(",") else: fields = None writer = csv.DictWriter(E.stdout, fields, dialect=options.csv_dialect, lineterminator=options.csv_lineterminator, extrasaction='ignore') E.stdout.write("\t".join(fields) + "\n") for row in reader: row = iotools.convertDictionary(row) writer.writerow(row) E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("--task", dest="task", type="choice", choices=["extract_table", "get_coverage", "clean_table"], help="task to perform") parser.add_option("-t", "--table-name", dest="table", type="string", help="table in SQLite DB to extract") # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv, add_database_options=True) if options.task == "extract_table": out_df = getTableFromDb(options.database_url, options.table) elif options.task == "get_coverage": out_df = getModelCoverage(options.database_url, table_regex="(\S+)_transcript_counts") elif options.task == "clean_table": infile = argv[-1] out_df = cleanStatsTable(infile) out_df.to_csv(options.stdout, sep="\t", index_label="track") # write footer and output benchmark information. E.stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv # setup command line parser parser = E.OptionParser(version="%prog version: $Id$", usage=globals()["__doc__"]) parser.add_option("-t", "--test", dest="test", type="string", help="supply help") # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) # write footer and output benchmark information. E.stop()
def main(argv=None): # Parse the options parser = E.OptionParser( version= "%prog version: $Id: cgat_script_template.py 2871 2010-03-03 10:20:44Z andreas $", usage=globals()["__doc__"]) parser.add_option( "-p", "--params", "--args", dest="params", type="string", help="comma separated list of addtional parameter strings") parser.add_option("-m", "--module", dest="module", type="string", help="the full path to the module file", default=None) parser.add_option("-i", "--input", dest="input_filenames", type="string", action="append", help="input filename") parser.add_option("-o", "--output-section", dest="output_filenames", type="string", action="append", help="output filename") parser.add_option("-f", "--function", dest="function", type="string", help="the module function", default=None) parser.set_defaults(input_filenames=[], output_filenames=[], params=None) (options, args) = E.start(parser) # Check a module and function have been specified if not options.module or not options.function: raise ValueError("Both a function and Module must be specified") # initialize defaults P.get_parameters() # If a full path was given, add this path to the system path location = os.path.dirname(options.module) if location != "": sys.path.append(location) # Establish the module name, accomodating cases where the # .py extension has been included in the module name module_name = os.path.basename(options.module) if module_name.endswith(".py"): module_base_name = module_name[:-3] else: module_base_name = module_name # Import the specified module and map the specified fuction E.info("importing module '%s' " % module_base_name) E.debug("sys.path is: %s" % sys.path) module = importlib.import_module(module_base_name) try: function = getattr(module, options.function) except AttributeError as msg: raise AttributeError( msg.message + "unknown function, available functions are: %s" % ",".join([x for x in dir(module) if not x.startswith("_")])) if options.input_filenames and not options.input_filenames == ["None"]: infiles = options.input_filenames else: infiles = False if options.output_filenames and not options.output_filenames == ["None"]: outfiles = options.output_filenames else: outfiles = False # Parse the parameters into an array if options.params: params = [param.strip() for param in options.params.split(",")] else: params = False # deal with single file case if infiles and len(infiles) == 1: infiles = infiles[0] if outfiles and len(outfiles) == 1: outfiles = outfiles[0] # Make the function call if infiles and outfiles and params: function(infiles, outfiles, params) elif infiles and outfiles and not params: function(infiles, outfiles) elif params: function(params) else: raise ValueError( "Expecting infile+outfile+params or infile+outfile or params") E.stop()