def bed(args): """ %prog bed fastafile kmer.dump.txt Map kmers on FASTA. """ from jcvi.formats.fasta import rc, parse_fasta p = OptionParser(bed.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) fastafile, dumpfile = args fp = open(dumpfile) KMERS = set() for row in fp: kmer = row.split()[0] kmer_rc = rc(kmer) KMERS.add(kmer) KMERS.add(kmer_rc) K = len(kmer) logging.debug("Imported {} {}-mers".format(len(KMERS), K)) for name, seq in parse_fasta(fastafile): name = name.split()[0] for i in range(len(seq) - K): if i % 5000000 == 0: print("{}:{}".format(name, i), file=sys.stderr) kmer = seq[i:i + K] if kmer in KMERS: print("\t".join(str(x) for x in (name, i, i + K, kmer)))
def bed(args): """ %prog bed fastafile kmer.dump.txt Map kmers on FASTA. """ from jcvi.formats.fasta import rc, parse_fasta p = OptionParser(bed.__doc__) opts, args = p.parse_args(args) if len(args) != 2: sys.exit(not p.print_help()) fastafile, dumpfile = args fp = open(dumpfile) KMERS = set() for row in fp: kmer = row.split()[0] kmer_rc = rc(kmer) KMERS.add(kmer) KMERS.add(kmer_rc) K = len(kmer) logging.debug("Imported {} {}-mers".format(len(KMERS), K)) for name, seq in parse_fasta(fastafile): name = name.split()[0] for i in range(len(seq) - K): if i % 5000000 == 0: print >> sys.stderr, "{}:{}".format(name, i) kmer = seq[i: i + K] if kmer in KMERS: print "\t".join(str(x) for x in (name, i, i + K, kmer))
def mid(args): """ %prog mid mappingfile Produce a MID configuration file from primer mapping. The primer mapping can be two or three columns, the third column is the optional 3`-primer. """ from jcvi.formats.fasta import rc p = OptionParser(mid.__doc__) opts, args = p.parse_args(args) if len(args) != 1: sys.exit(not p.print_help()) mappingfile, = args fp = open(mappingfile) data = [row.split() for row in fp] templatefile = "/usr/local/seq454-64_v2.6/config/MIDConfig.parse" midfile = op.basename(templatefile) fw = open(midfile, "w") lines = open(templatefile).readlines() insertline = 45 first, second = lines[:insertline], lines[insertline:] for row in first: fw.write(row) # The inserted block print >> fw, "MYMIDs\n{" for atoms in data: natoms = len(atoms) assert natoms in (2, 3) name, p5seq = atoms[:2] line = ' mid = "{0}", "{1}", 1'.format(name, p5seq) # Since most often I find p3 primers by parsing newbler progress file # The string I grepped needs to be reverse-complemented p3seq = atoms[2] p3seq = rc(p3seq) if natoms == 3: line += ', "{0}"'.format(p3seq) line += ';' print >> fw, line print >> fw, "}" for row in second: fw.write(row) logging.debug("Barcodes written to `{0}`.".format(midfile))
def rc(self): self.seq = rc(self.seq) self.qual = self.qual[::-1]