Example #1
0
#!/usr/bin/env python
# Remove sequences shorter than

from sys import argv, exit
from FB_functions import read_fasta

if len(argv) < 3:
    print "usage: script.py seqfile.fasta 10"
    print "removes sequences shorter than 10 nucleotides/amino acids"
    exit()

sequences = read_fasta(argv[1])

for sequence in sequences:
    if len(sequence[1]) > int(argv[2]):
        print sequence[0] + "\n" + sequence[1]
Example #2
0
                  help="Set a fixed fraglength to create fragments of [default=%default]", 
                  default=False)
parser.add_option("-o", "--outputfile", dest="outputfile", type="string", default="fragments.pfa",
                  help="Filename for output file")

(options, args) = parser.parse_args()

if len(args)<1:
    print parser.print_help()
    print "ERROR: Need fasta file to work on!"
    exit()



# Read QNR-sequences from fasta and store sequences in list of tuples
seqlist = read_fasta(args[0])

##---------------------------------------------------------------------------##
##                       DATA FOR FRAGMENT CREATION                          ##
##---------------------------------------------------------------------------##

# The number of different fragment lengths to be created:
MAXIMUM_FRAGMENT_LENGTH = options.fraglength  # max value is ~210 (213) for Qnr

# Number of fragments per fragment length:
NUMBER_OF_FRAGMENTS_PER_LENGTH = options.replicates

# Determine the minimum sequence length
# (sets upper limit for fragment starting point)
SEQUENCE_MINLENGTH = 5000 # sufficiently large number
for seqid,sequence in seqlist:
#!/usr/bin/env python
# Classify cluster members according to
# fasta file of reference sequences.

from sys import argv, exit
from FB_functions import read_fasta
import re


if len(argv)<2:
    print "usage: script.py cluster.fasta..."
    exit()

refseqs = read_fasta("/home/boulund/qnr-search_project/qnrsequences/pmqr.pfa")


# Compile a regex to find the Qnr-tag of the reference sequences
qnrname_regex = re.compile(r'(Qnr\w\d*)')

for file in argv[1:]:
    #print "\n"+file+"\n"
    outfile = open(file+".classified","w")
    cluster = read_fasta(file)
    for seq in cluster:
        success = False
        for refseq in refseqs:
            qnrname = re.search(qnrname_regex,refseq[0])
            if qnrname is not None:
                if seq[1] in refseq[1]:
                    success = True
                    outfile.write(">-"+qnrname.group(0)+"-"+seq[0][1:]+"\n")