Exemple #1
0
#!/usr/bin/env python

from __future__ import print_function
import sys
from parsers import parse_fa
import collections
import numpy as np


def print_seq(contig_header, seq, linelen=60):
    print(contig_header)
    i = 0
    while i < len(seq):
        print(seq[i:i + linelen])
        i += linelen


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("usage: {} file.fa".format(sys.argv[0]), file=sys.stderr)
        exit(1)

    for label, seq in parse_fa(sys.argv[1]):
        c = collections.Counter(seq)
        unmasked = np.sum(c[x] for x in "ACGT")
        if unmasked == 121:
            print_seq(label, seq)
Exemple #2
0
from __future__ import print_function
import sys
import matplotlib

matplotlib.use('Agg')  # don't try to use $DISPLAY
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import parsers

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("usage: {} in.{{fa,fq}}".format(sys.argv[0]), file=sys.stderr)

    dist = []

    for seq in parsers.parse_fa(sys.argv[1]):
        label = seq[0]
        extra = label.split(" ")[1]
        for ef in extra.split(";"):
            k, v = ef.split("=")
            if k == "DIST":
                dist.append(int(v))

    pdf = PdfPages("plot_dist_hist.pdf")
    fig_w, fig_h = plt.figaspect(9.0 / 16.0)
    fig1 = plt.figure(figsize=(fig_w, fig_h))
    ax1 = fig1.add_subplot(111)

    ax1.hist(dist, bins=range(0, max(dist) + 1))
    ax1.set_xlabel("Edit Distance")
    ax1.set_ylabel("Frequency")
Exemple #3
0
                    s.append("C")
                else:
                    s.append("T")
                continue

    return "".join(s)


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("usage: {} file.fastq[.gz]".format(sys.argv[0]), file=sys.stderr)
        exit(1)

    filename = sys.argv[1]

    for lineno, line in enumerate(parsers.parse_fa(filename), 1):

        if len(line) == 3:
            # fastq, output single sequence
            label, seq, qual = line
            bs_seq = bstreat(seq)
            print("{}\n{}\n+\n{}".format(label, bs_seq, qual))

        elif len(line) == 2:
            # fasta, output two sequences
            label, seq = line
            lfields = label.split()
            sname = lfields[0]
            if len(lfields) > 1:
                comment = " " + " ".join(lfields[1:])
            else:
Exemple #4
0
#!/usr/bin/env python

from __future__ import print_function
import sys
import parsers
import collections

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print(len(sys.argv), sys.argv)
        print("usage: {} in.fq".format(sys.argv[0]), file=sys.stderr)
        exit(1)

    quals = collections.Counter()

    for l, s, q in parsers.parse_fa(sys.argv[1]):
        quals.update(q)

    for k in sorted(quals.keys(), key=ord):
        v = quals[k]
        print("``{}''".format(k), ord(k), v)
                        help="reads longer than this are ignored")
    parser.add_argument("in_fa", help="in.{fa,fq}")
    parser.add_argument("out_pdf",
                        default="plot_fraglen_hist.pdf",
                        help="out.pdf")
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_args()

    lengths = np.zeros(args.max_len)
    lmin = 1e6
    lmax = 0

    for line in parsers.parse_fa(args.in_fa):
        length = len(line[1])
        if length > args.max_len:
            continue

        lengths[length] += 1

        if length < lmin:
            lmin = length
        elif length > lmax:
            lmax = length

    pdf = PdfPages(args.out_pdf)
    #fig_w, fig_h = plt.figaspect(9.0/16.0)
    #fig_w, fig_h = plt.figaspect(3.0/4.0)
    fig_w = fig_h = 307.28987 / 72.27
Exemple #6
0
                yield last
            else:
                exclude = False

        last = [chrom, pos, ref, alt, dp]

    if not exclude:
        yield last


if __name__ == "__main__":
    if len(sys.argv) != 3:
        print("usage: {} in.fa in.vcf".format(sys.argv[0]), file=sys.stderr)
        exit(1)

    fp = parse_fa(sys.argv[1])

    snpdict = {}

    for label, seq in fp:
        chrom, extent = label[1:].split(":")
        start, end = map(int, extent.split("-"))
        pos = start + (end - start) / 2
        if windowmasker(seq) > 0:
            continue
        snpdict[(chrom, pos)] = seq

    plen = 60

    print("chrom",
          "pos",
Exemple #7
0
#
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

import sys
import random
from parsers import parse_fa

sequences = []

for line in parse_fa("/dev/stdin"):
    assert (len(line) == 2)
    _, seq = line
    sequences.append(seq)

random.shuffle(sequences)


def print_fa(label, seq, linelen=60):
    print(">{}".format(label))
    while len(seq) > 0:
        print(seq[:linelen])
        seq = seq[linelen:]


for n, seq in enumerate(sequences, 1):