Beispiel #1
0
def bed(args):
    """
    %prog bed fastafile kmer.dump.txt

    Map kmers on FASTA.
    """
    from jcvi.formats.fasta import rc, parse_fasta

    p = OptionParser(bed.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, dumpfile = args
    fp = open(dumpfile)
    KMERS = set()
    for row in fp:
        kmer = row.split()[0]
        kmer_rc = rc(kmer)
        KMERS.add(kmer)
        KMERS.add(kmer_rc)

    K = len(kmer)
    logging.debug("Imported {} {}-mers".format(len(KMERS), K))

    for name, seq in parse_fasta(fastafile):
        name = name.split()[0]
        for i in range(len(seq) - K):
            if i % 5000000 == 0:
                print("{}:{}".format(name, i), file=sys.stderr)
            kmer = seq[i:i + K]
            if kmer in KMERS:
                print("\t".join(str(x) for x in (name, i, i + K, kmer)))
Beispiel #2
0
def bed(args):
    """
    %prog bed fastafile kmer.dump.txt

    Map kmers on FASTA.
    """
    from jcvi.formats.fasta import rc, parse_fasta

    p = OptionParser(bed.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastafile, dumpfile = args
    fp = open(dumpfile)
    KMERS = set()
    for row in fp:
        kmer = row.split()[0]
        kmer_rc = rc(kmer)
        KMERS.add(kmer)
        KMERS.add(kmer_rc)

    K = len(kmer)
    logging.debug("Imported {} {}-mers".format(len(KMERS), K))

    for name, seq in parse_fasta(fastafile):
        name = name.split()[0]
        for i in range(len(seq) - K):
            if i % 5000000 == 0:
                print >> sys.stderr, "{}:{}".format(name, i)
            kmer = seq[i: i + K]
            if kmer in KMERS:
                print "\t".join(str(x) for x in (name, i, i + K, kmer))
Beispiel #3
0
def mid(args):
    """
    %prog mid mappingfile

    Produce a MID configuration file from primer mapping. The primer mapping can
    be two or three columns, the third column is the optional 3`-primer.
    """
    from jcvi.formats.fasta import rc

    p = OptionParser(mid.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    mappingfile, = args

    fp = open(mappingfile)
    data = [row.split() for row in fp]
    templatefile = "/usr/local/seq454-64_v2.6/config/MIDConfig.parse"
    midfile = op.basename(templatefile)
    fw = open(midfile, "w")

    lines = open(templatefile).readlines()

    insertline = 45
    first, second = lines[:insertline], lines[insertline:]
    for row in first:
        fw.write(row)

    # The inserted block
    print >> fw, "MYMIDs\n{"
    for atoms in data:
        natoms = len(atoms)
        assert natoms in (2, 3)
        name, p5seq = atoms[:2]
        line = '        mid = "{0}", "{1}", 1'.format(name, p5seq)

        # Since most often I find p3 primers by parsing newbler progress file
        # The string I grepped needs to be reverse-complemented
        p3seq = atoms[2]
        p3seq = rc(p3seq)

        if natoms == 3:
            line += ', "{0}"'.format(p3seq)
        line += ';'
        print >> fw, line

    print >> fw, "}"

    for row in second:
        fw.write(row)

    logging.debug("Barcodes written to `{0}`.".format(midfile))
Beispiel #4
0
 def rc(self):
     self.seq = rc(self.seq)
     self.qual = self.qual[::-1]
Beispiel #5
0
 def rc(self):
     self.seq = rc(self.seq)
     self.qual = self.qual[::-1]