Python must_open Examples, jcvi.formats.fasta.must_open Python Examples

Example #1

0

Show file

def FastqPairedIterator(read1, read2):
    if read1 == read2:
        p1fp = p2fp = must_open(read1)
    else:
        p1fp = must_open(read1)
        p2fp = must_open(read2)

    return p1fp, p2fp

Example #2

0

Show file

File: fastq.py Project: Nicholas-NVS/jcvi

def FastqPairedIterator(read1, read2):
    if read1 == read2:
        p1fp = p2fp = must_open(read1)
    else:
        p1fp = must_open(read1)
        p2fp = must_open(read2)

    return p1fp, p2fp

Example #3

0

Show file

def splitread(args):
    """
    %prog splitread fastqfile

    Split fastqfile into two read fastqfiles, cut in the middle.
    """
    p = OptionParser(splitread.__doc__)
    p.add_option(
        "-n",
        dest="n",
        default=76,
        type="int",
        help="Split at N-th base position",
    )
    p.add_option(
        "--rc",
        default=False,
        action="store_true",
        help="Reverse complement second read",
    )
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    (pairsfastq, ) = args

    base = op.basename(pairsfastq).split(".")[0]
    fq1 = base + ".1.fastq"
    fq2 = base + ".2.fastq"
    fw1 = must_open(fq1, "w")
    fw2 = must_open(fq2, "w")

    fp = must_open(pairsfastq)
    n = opts.n
    minsize = n * 8 / 5

    for name, seq, qual in FastqGeneralIterator(fp):
        if len(seq) < minsize:
            logging.error("Skipping read {0}, length={1}".format(
                name, len(seq)))
            continue

        name = "@" + name
        rec1 = FastqLite(name, seq[:n], qual[:n])
        rec2 = FastqLite(name, seq[n:], qual[n:])
        if opts.rc:
            rec2.rc()

        print(rec1, file=fw1)
        print(rec2, file=fw2)

    logging.debug("Reads split into `{0},{1}`".format(fq1, fq2))
    fw1.close()
    fw2.close()

Example #4

0

Show file

def pairinplace(args):
    """
    %prog pairinplace bulk.fastq

    Pair up the records in bulk.fastq by comparing the names for adjancent
    records. If they match, print to bulk.pairs.fastq, else print to
    bulk.frags.fastq.
    """
    from jcvi.utils.iter import pairwise

    p = OptionParser(pairinplace.__doc__)
    p.add_option("-r", dest="rclip", default=1, type="int",
            help="pair ID is derived from rstrip N chars [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastqfile, = args
    base = op.basename(fastqfile).split(".")[0]

    frags = base + ".frags.fastq"
    pairs = base + ".pairs.fastq"
    if fastqfile.endswith(".gz"):
        frags += ".gz"
        pairs += ".gz"

    fragsfw = must_open(frags, "w")
    pairsfw = must_open(pairs, "w")

    N = opts.rclip
    strip_name = lambda x: x[:-N] if N else str

    fh_iter = iter_fastq(fastqfile, key=strip_name)
    skipflag = False  # controls the iterator skip
    for a, b in pairwise(fh_iter):
        if b is None:  # hit the eof
            break

        if skipflag:
            skipflag = False
            continue

        if a.id == b.id:
            print >> pairsfw, a
            print >> pairsfw, b
            skipflag = True
        else:
            print >> fragsfw, a

    # don't forget the last one, when b is None
    if not skipflag:
        print >> fragsfw, a

    logging.debug("Reads paired into `%s` and `%s`" % (pairs, frags))

Example #5

0

Show file

File: fastq.py Project: Nicholas-NVS/jcvi

def suffix(args):
    """
    %prog suffix fastqfile CAG

    Filter reads based on suffix.
    """
    from jcvi.utils.cbook import percentage

    p = OptionParser(suffix.__doc__)
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastqfile, sf = args
    fw = must_open(opts.outfile, "w")
    nreads = nselected = 0
    for rec in iter_fastq(fastqfile):
        nreads += 1
        if rec is None:
            break
        if rec.seq.endswith(sf):
            print >> fw, rec
            nselected += 1
    logging.debug("Selected reads with suffix {0}: {1}".format(sf, percentage(nselected, nreads)))

Example #6

0

Show file

File: fastq.py Project: tanghaibao/jcvi

def uniq(args):
    """
    %prog uniq fastqfile

    Retain only first instance of duplicate reads. Duplicate is defined as
    having the same read name.
    """
    p = OptionParser(uniq.__doc__)
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastqfile, = args
    fw = must_open(opts.outfile, "w")
    nduplicates = nreads = 0
    seen = set()
    for rec in iter_fastq(fastqfile):
        nreads += 1
        if rec is None:
            break
        name = rec.name
        if name in seen:
            nduplicates += 1
            continue
        seen.add(name)
        print(rec, file=fw)
    logging.debug("Removed duplicate reads: {}".\
                  format(percentage(nduplicates, nreads)))

Example #7

0

Show file

def uniq(args):
    """
    %prog uniq fastqfile

    Retain only first instance of duplicate reads. Duplicate is defined as
    having the same read name.
    """
    p = OptionParser(uniq.__doc__)
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastqfile, = args
    fw = must_open(opts.outfile, "w")
    nduplicates = nreads = 0
    seen = set()
    for rec in iter_fastq(fastqfile):
        nreads += 1
        if rec is None:
            break
        name = rec.name
        if name in seen:
            nduplicates += 1
            continue
        seen.add(name)
        print >> fw, rec
    logging.debug("Removed duplicate reads: {}".\
                  format(percentage(nduplicates, nreads)))

Example #8

0

Show file

def suffix(args):
    """
    %prog suffix fastqfile CAG

    Filter reads based on suffix.
    """
    p = OptionParser(suffix.__doc__)
    p.set_outfile()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    fastqfile, sf = args
    fw = must_open(opts.outfile, "w")
    nreads = nselected = 0
    for rec in iter_fastq(fastqfile):
        nreads += 1
        if rec is None:
            break
        if rec.seq.endswith(sf):
            print >> fw, rec
            nselected += 1
    logging.debug("Selected reads with suffix {0}: {1}".\
                  format(sf, percentage(nselected, nreads)))

Example #9

0

Show file

File: fastq.py Project: yangjl/jcvi

def catread(args):
    """
    %prog catread fastqfile1 fastqfile2

    Concatenate paired end reads into one. Useful for example to do single-end
    mapping and perform filtering on the whole read pair level.
    """
    p = OptionParser(catread.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    r1, r2 = args
    p1fp, p2fp = FastqPairedIterator(r1, r2)
    outfile = pairspf((r1, r2)) + ".cat.fastq"
    fw = must_open(outfile, "w")
    while True:
        a = list(islice(p1fp, 4))
        if not a:
            break
        atitle, aseq, _, aqual = a
        btitle, bseq, _, bqual = list(islice(p2fp, 4))
        print >> fw, "\n".join((atitle.strip(), aseq.strip() + bseq.strip(), \
                                "+", aqual.strip() + bqual.strip()))

Example #10

0

Show file

def catread(args):
    """
    %prog catread fastqfile1 fastqfile2

    Concatenate paired end reads into one. Useful for example to do single-end
    mapping and perform filtering on the whole read pair level.
    """
    p = OptionParser(catread.__doc__)
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    r1, r2 = args
    p1fp, p2fp = FastqPairedIterator(r1, r2)
    outfile = pairspf((r1, r2)) + ".cat.fastq"
    fw = must_open(outfile, "w")
    while True:
        a = list(islice(p1fp, 4))
        if not a:
            break
        atitle, aseq, _, aqual = a
        btitle, bseq, _, bqual = list(islice(p2fp, 4))
        print >> fw, "\n".join((atitle.strip(), aseq.strip() + bseq.strip(), \
                                "+", aqual.strip() + bqual.strip()))

Example #11

0

Show file

def splitread(args):
    """
    %prog splitread fastqfile

    Split fastqfile into two read fastqfiles, cut in the middle.
    """
    p = OptionParser(splitread.__doc__)
    p.add_option("-n",
                 dest="n",
                 default=76,
                 type="int",
                 help="Split at N-th base position [default: %default]")
    p.add_option("--rc",
                 default=False,
                 action="store_true",
                 help="Reverse complement second read [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    pairsfastq, = args

    base = op.basename(pairsfastq).split(".")[0]
    fq1 = base + ".1.fastq"
    fq2 = base + ".2.fastq"
    fw1 = must_open(fq1, "w")
    fw2 = must_open(fq2, "w")

    fp = must_open(pairsfastq)
    n = opts.n

    for name, seq, qual in FastqGeneralIterator(fp):

        name = "@" + name
        rec1 = FastqLite(name, seq[:n], qual[:n])
        rec2 = FastqLite(name, seq[n:], qual[n:])
        if opts.rc:
            rec2.rc()

        print >> fw1, rec1
        print >> fw2, rec2

    logging.debug("Reads split into `{0},{1}`".format(fq1, fq2))
    fw1.close()
    fw2.close()

Example #12

0

Show file

File: fastq.py Project: tanghaibao/jcvi

def splitread(args):
    """
    %prog splitread fastqfile

    Split fastqfile into two read fastqfiles, cut in the middle.
    """
    p = OptionParser(splitread.__doc__)
    p.add_option("-n", dest="n", default=76, type="int",
            help="Split at N-th base position [default: %default]")
    p.add_option("--rc", default=False, action="store_true",
            help="Reverse complement second read [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    pairsfastq, = args

    base = op.basename(pairsfastq).split(".")[0]
    fq1 = base + ".1.fastq"
    fq2 = base + ".2.fastq"
    fw1 = must_open(fq1, "w")
    fw2 = must_open(fq2, "w")

    fp = must_open(pairsfastq)
    n = opts.n
    minsize = n * 8 / 5

    for name, seq, qual in FastqGeneralIterator(fp):
        if len(seq) < minsize:
            logging.error("Skipping read {0}, length={1}".format(name, len(seq)))
            continue

        name = "@" + name
        rec1 = FastqLite(name, seq[:n], qual[:n])
        rec2 = FastqLite(name, seq[n:], qual[n:])
        if opts.rc:
            rec2.rc()

        print(rec1, file=fw1)
        print(rec2, file=fw2)

    logging.debug("Reads split into `{0},{1}`".format(fq1, fq2))
    fw1.close()
    fw2.close()

Example #13

0

Show file

def shuffle(args):
    """
    %prog shuffle p1.fastq p2.fastq

    Shuffle pairs into interleaved format.
    """
    p = OptionParser(shuffle.__doc__)
    p.set_tag()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    p1, p2 = args
    pairsfastq = pairspf((p1, p2)) + ".fastq"
    tag = opts.tag

    p1fp = must_open(p1)
    p2fp = must_open(p2)
    pairsfw = must_open(pairsfastq, "w")
    nreads = 0
    while True:
        a = list(islice(p1fp, 4))
        if not a:
            break

        b = list(islice(p2fp, 4))
        if tag:
            name = a[0].rstrip()
            a[0] = name + "/1\n"
            b[0] = name + "/2\n"

        pairsfw.writelines(a)
        pairsfw.writelines(b)
        nreads += 2

    pairsfw.close()
    extra = nreads * 2 if tag else 0
    checkShuffleSizes(p1, p2, pairsfastq, extra=extra)

    logging.debug(
        "File `{0}` verified after writing {1} reads.".format(pairsfastq, nreads)
    )
    return pairsfastq

Example #14

0

Show file

File: fastq.py Project: yangjl/jcvi

def shuffle(args):
    """
    %prog shuffle p1.fastq p2.fastq

    Shuffle pairs into interleaved format.
    """
    p = OptionParser(shuffle.__doc__)
    p.set_tag()
    opts, args = p.parse_args(args)

    if len(args) != 2:
        sys.exit(not p.print_help())

    p1, p2 = args
    pairsfastq = pairspf((p1, p2)) + ".fastq"
    tag = opts.tag

    p1fp = must_open(p1)
    p2fp = must_open(p2)
    pairsfw = must_open(pairsfastq, "w")
    nreads = 0
    while True:
        a = list(islice(p1fp, 4))
        if not a:
            break

        b = list(islice(p2fp, 4))
        if tag:
            name = a[0].rstrip()
            a[0] = name + "/1\n"
            b[0] = name + "/2\n"

        pairsfw.writelines(a)
        pairsfw.writelines(b)
        nreads += 2

    pairsfw.close()
    extra = nreads * 2 if tag else 0
    checkShuffleSizes(p1, p2, pairsfastq, extra=extra)

    logging.debug("File `{0}` verified after writing {1} reads.".\
                     format(pairsfastq, nreads))
    return pairsfastq

Example #15

0

Show file

def shuffle(args):
    """
    %prog shuffle p1.fastq p2.fastq pairs.fastq

    Shuffle pairs into interleaved format, using `shuffleSequences_fastq.pl`.
    """
    from itertools import izip

    p = OptionParser(shuffle.__doc__)
    p.add_option("--tag", dest="tag", default=False, action="store_true",
            help="add tag (/1, /2) to the read name")
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    p1, p2, pairsfastq = args
    tag = opts.tag

    p1fp = must_open(p1)
    p2fp = must_open(p2)
    pairsfw = must_open(pairsfastq, "w")
    nreads = 0
    while True:
        a = list(islice(p1fp, 4))
        if not a:
            break

        b = list(islice(p2fp, 4))
        if tag:
            name = a[0].rstrip()
            a[0] = name + "/1\n"
            b[0] = name + "/2\n"

        pairsfw.writelines(a)
        pairsfw.writelines(b)
        nreads += 2

    pairsfw.close()
    extra = nreads * 2 if tag else 0
    checkShuffleSizes(p1, p2, pairsfastq, extra=extra)

    logging.debug("File sizes verified after writing {0} reads.".format(nreads))

Example #16

0

Show file

def iter_fastq(filename, offset=0, key=None):
    if isinstance(filename, str):
        logging.debug("Read file `{0}`".format(filename))
        fh = must_open(filename)
    else:
        fh = filename

    while True:
        rec = FastqRecord(fh, offset=offset, key=key)
        if not rec.name:
            break
        yield rec
    yield None  # sentinel

Example #17

0

Show file

File: fastq.py Project: Nicholas-NVS/jcvi

def iter_fastq(filename, offset=0, key=None):
    if isinstance(filename, str):
        logging.debug("Read file `{0}`".format(filename))
        fh = must_open(filename)
    else:
        fh = filename

    while True:
        rec = FastqRecord(fh, offset=offset, key=key)
        if not rec.name:
            break
        yield rec
    yield None  # sentinel

Example #18

0

Show file

File: fastq.py Project: bennyyu/jcvi

def split_barcode(t):

    barcode, excludebarcode, outdir, inputfile = t
    trim = len(barcode.seq)

    fp = must_open(inputfile)
    outfastq = op.join(outdir, barcode.id + ".fastq")
    fw = open(outfastq, "w")
    for title, seq, qual in FastqGeneralIterator(fp):
        if seq[:trim] != barcode.seq:
            continue
        hasexclude = any(seq.startswith(x.seq) for x in excludebarcode)
        if hasexclude:
            continue
        print >> fw, "@{0}\n{1}\n+\n{2}".format(title, seq[trim:], qual[trim:])

    fw.close()

Example #19

0

Show file

def split_barcode(t):

    barcode, excludebarcode, site, outdir, inputfile = t
    trim = len(barcode.seq)

    fp = must_open(inputfile)
    outfastq = op.join(outdir, barcode.id + ".fastq")
    fw = open(outfastq, "w")
    for title, seq, qual in FastqGeneralIterator(fp):
        if seq[:trim] != barcode.seq:
            continue
        hasexclude = any(seq.startswith(x.seq) for x in excludebarcode)
        if hasexclude:
            continue
        seq = seq[trim:]
        hassite = any(seq.startswith(x) for x in site)
        if not hassite:
            continue
        print >> fw, "@{0}\n{1}\n+\n{2}".format(title, seq, qual[trim:])

    fw.close()

Example #20

0

Show file

def pairinplace(args):
    """
    %prog pairinplace bulk.fastq

    Pair up the records in bulk.fastq by comparing the names for adjancent
    records. If they match, print to bulk.pairs.fastq, else print to
    bulk.frags.fastq.
    """
    from jcvi.utils.iter import pairwise

    p = OptionParser(pairinplace.__doc__)
    p.set_rclip()
    p.set_tag()
    p.add_option("--base",
                help="Base name for the output files [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(not p.print_help())

    fastqfile, = args
    base = opts.base or op.basename(fastqfile).split(".")[0]

    frags = base + ".frags.fastq"
    pairs = base + ".pairs.fastq"
    if fastqfile.endswith(".gz"):
        frags += ".gz"
        pairs += ".gz"

    fragsfw = must_open(frags, "w")
    pairsfw = must_open(pairs, "w")

    N = opts.rclip
    tag = opts.tag
    strip_name = (lambda x: x[:-N]) if N else None

    fh_iter = iter_fastq(fastqfile, key=strip_name)
    skipflag = False  # controls the iterator skip
    for a, b in pairwise(fh_iter):
        if b is None:  # hit the eof
            break

        if skipflag:
            skipflag = False
            continue

        if a.name == b.name:
            if tag:
                a.name += "/1"
                b.name += "/2"
            print(a, file=pairsfw)
            print(b, file=pairsfw)
            skipflag = True
        else:
            print(a, file=fragsfw)

    # don't forget the last one, when b is None
    if not skipflag:
        print(a, file=fragsfw)

    logging.debug("Reads paired into `%s` and `%s`" % (pairs, frags))
    return pairs