Esempio n. 1
0
#Downsample a library
import sys

from nucio import typeify, fileIterator
from seqio import iteratorFromExtension, recordToString, seqlen



if not len(sys.argv) == 5:
    sys.exit("Usage: downsample.py genome_size desired_cov input.{fa,fq} output.{fa,fq}\n")


types = [int, float, str, str]
sysins = sys.argv[1:len(types)+1]
(genome_size, target_cov, infn, outfn) =  typeify(sysins,types)

max_bases = genome_size * target_cov 
total_bases = 0

with open(outfn, "w") as of:
    for record in fileIterator(infn,iteratorFromExtension(infn)):
        length = seqlen(record)
        if "N" in record.seq:
            continue
        if total_bases > max_bases:
            break
        of.write(recordToString(record))
        of.write("\n")
        total_bases += length
    
Esempio n. 2
0
from seqio import iteratorFromExtension, recordToString, FastqRecord
from nucio import fileIterator
from args import parseArgs, getHelpStr, CLArgument

description = (
    "Usage: deplex_pb.py [options] file1.{fa,fq} [file2.{fa,fq} ..]\n\n"
    "Deplexes a file based on some delimiter")

argument_list = [["delim", "delim", str, "/", "Delimiter to split the input"]]

arguments = map(CLArgument._make, argument_list)

if not len(sys.argv) >= 2:
    sys.exit(getHelpStr(description, arguments) + "\n")

(p_arg_map, args_remaining) = parseArgs(sys.argv[1:], arguments)

its = map(iteratorFromExtension, args_remaining)

file_its = starmap(fileIterator, izip(args_remaining, its))

fh_h = {}

for entry in chain.from_iterable(file_its):
    h = entry.name.split(p_arg_map["delim"])[0]
    ext = ".fastq" if isinstance(entry, FastqRecord) else ".fasta"
    if not h in fh_h:
        fh_h[h] = open(h + ext, "w")
    fh_h[h].write(recordToString(entry))
    fh_h[h].write("\n")
Esempio n. 3
0
#!/usr/bin/env python

import sys

from seqio import iteratorFromExtension, recordToString
from nucio import fileIterator
from misc import reverse_complement

if not len(sys.argv) == 2:
    sys.exit("reverseComplement.py in.{fa,fq}")

f = sys.argv[1]

for record in fileIterator(f, iteratorFromExtension(f)):
    print recordToString(record._replace(seq=reverse_complement(record.seq)))
Esempio n. 4
0
#!/usr/bin/env python

import sys

from seqio import iteratorFromExtension, recordToString
from nucio import fileIterator 
from misc import reverse_complement

if not len(sys.argv) == 2:
    sys.exit("reverseComplement.py in.{fa,fq}")

f = sys.argv[1]

for record in fileIterator(f,iteratorFromExtension(f)):
    print recordToString(record._replace(seq=reverse_complement(record.seq)))
Esempio n. 5
0
from seqio import iteratorFromExtension, recordToString, FastqRecord
from nucio import fileIterator
from args import parseArgs, getHelpStr, CLArgument

description = (
    "Usage: deplex_pb.py [options] file1.{fa,fq} [file2.{fa,fq} ..]\n\n" "Deplexes a file based on some delimiter"
)

argument_list = [["delim", "delim", str, "/", "Delimiter to split the input"]]

arguments = map(CLArgument._make, argument_list)

if not len(sys.argv) >= 2:
    sys.exit(getHelpStr(description, arguments) + "\n")

(p_arg_map, args_remaining) = parseArgs(sys.argv[1:], arguments)

its = map(iteratorFromExtension, args_remaining)

file_its = starmap(fileIterator, izip(args_remaining, its))

fh_h = {}

for entry in chain.from_iterable(file_its):
    h = entry.name.split(p_arg_map["delim"])[0]
    ext = ".fastq" if isinstance(entry, FastqRecord) else ".fasta"
    if not h in fh_h:
        fh_h[h] = open(h + ext, "w")
    fh_h[h].write(recordToString(entry))
    fh_h[h].write("\n")