def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.ArgumentParser(description=__doc__) parser.add_argument("--version", action='version', version="1.0") parser.add_argument("-o", "--output-section", dest="output", type=str, choices=("full", "name"), help="output either ``full`` overlapping entries, only the ``name``s.") parser.set_defaults( output="full", ) # add common options (-h/--help, ...) and parse command line (args, unknown) = E.start(parser, argv=argv, unknowns=True) if len(unknown) != 2: raise ValueError("two arguments required") if unknown[0] == "-": infile1 = args.stdin else: infile1 = iotools.open_file(unknown[0], "r") infile2 = iotools.open_file(unknown[1], "r") idx = Bed.readAndIndex(infile2, with_values=True) output = args.output outfile = args.stdout if output == "name": outfile.write("name1\tname2\n") outf = lambda x: x.fields[0] else: outf = str for bed in Bed.iterator(infile1): try: overlaps = idx[bed.contig].find(bed.start, bed.end) except (KeyError, IndexError): # ignore missing contig and zero length intervals continue for o in overlaps: outfile.write("\t".join((outf(bed), outf(o[2]))) + "\n") E.stop()
def __init__(self, filename, *args, **kwargs): assert filename is not None,\ "please supply filename for CounterOverlap" Counter.__init__(self, *args, **kwargs) self.filename = filename E.info("reading intervals from %s" % self.filename) self.index = Bed.readAndIndex(iotools.open_file(self.filename, "r"), per_track=True) E.info("read intervals for %s tracks" % len(self.index)) self.tracks = list(self.index.keys()) self.headers = [] for track in self.tracks: self.headers.extend(["%s_nover" % track, "%s_bases" % track])
def buildIndex(self, filename): return Bed.readAndIndex(iotools.open_file(filename, "r"))
def __init__(self, filename): self.mIndices = Bed.readAndIndex(iotools.open_file(filename, "r"), per_track=True)
def annotateCpGIslands(infiles, outfile): '''annotate transcript by absence/presence of CpG islands ''' cpgfile, tssfile = infiles cpg = Bed.readAndIndex(iotools.openFile(cpgfile)) extension_upstream = PARAMS["cpg_search_upstream"] extension_downstream = PARAMS["cpg_search_downstream"] c = E.Counter() outf = iotools.openFile(outfile, "w") outf.write( "transcript_id\tstrand\tstart\tend\trelative_start\trelative_end\n") for tss in Bed.iterator(iotools.openFile(tssfile)): c.tss_total += 1 if tss.strand == "+": start, end = tss.start - \ extension_upstream, tss.start + extension_downstream else: start, end = tss.end - \ extension_downstream, tss.end + extension_upstream try: matches = list(cpg[tss.contig].find(start, end)) except KeyError: c.promotor_without_matches += 1 continue if len(matches) == 0: c.promotor_without_matches += 1 continue c.promotor_output += 1 for match in matches: c.matches_total += 1 genome_start, genome_end, x = match l = genome_end - genome_start # get relative location of match if tss.strand == "+": relative_start = genome_start - tss.start else: relative_start = tss.end - genome_end relative_end = relative_start + l outf.write("\t".join( map(str, (tss.name, tss.strand, genome_start, genome_end, relative_start, relative_end))) + "\n") c.matches_output += 1 outf.close() with iotools.openFile(outfile + ".summary", "w") as outf: outf.write("category\tcounts\n") outf.write(c.asTable() + "\n") E.info(c)
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if not argv: argv = sys.argv # setup command line parser parser = E.OptionParser( version= "%prog version: $Id: bed2graph.py 2861 2010-02-23 17:36:32Z andreas $", usage=globals()["__doc__"]) parser.add_option( "-o", "--output-section", dest="output", type="choice", choices=("full", "name"), help= "output either ``full`` overlapping entries, only the ``name``s. [default=%default]." ) parser.set_defaults(output="full", ) # add common options (-h/--help, ...) and parse command line (options, args) = E.start(parser, argv=argv) if len(args) != 2: raise ValueError("two arguments required") if args[0] == "-": infile1 = options.stdin else: infile1 = iotools.open_file(args[0], "r") infile2 = iotools.open_file(args[1], "r") idx = Bed.readAndIndex(infile2, with_values=True) output = options.output outfile = options.stdout if output == "name": outfile.write("name1\tname2\n") outf = lambda x: x.fields[0] else: outf = str for bed in Bed.iterator(infile1): try: overlaps = idx[bed.contig].find(bed.start, bed.end) except (KeyError, IndexError): # ignore missing contig and zero length intervals continue for o in overlaps: outfile.write("\t".join((outf(bed), outf(o[2]))) + "\n") E.stop()