Beispiel #1
0
    else:
        print "invalid character found in secondary structure of " + secondary_structure.name 
            
            
print "checked", i, "entries"

prot_trn = prot[0::2]  # even-numbered indices
prot_tst = prot[1::2]  # odd-numbered indices
sstr_trn = sstr[0::2]  # even-numbered indices
sstr_tst = sstr[1::2]  # odd-numbered indices

print len(prot_trn[0])

W = 5
print prot_trn[0]
print slidewin(prot_trn[0], W)
print len(slidewin(prot_trn[0], W))
nb = NaiveBayes([Protein_Alphabet for _ in range(W)], DSSP3_Alphabet)

# iterate over the sequence of proteins in the training set
# for i in range(len(prot_trn)):  
print "-------------------------------------------------------"
for i in range(1):  
    subseqs = slidewin(prot_trn[i], W)  # construct sub-seqs
    subtarg = sstr_trn[i][W / 2:-W / 2 + 1]  # secondary structure elem.  remove the overhang
    print subtarg
    print sstr_trn[i]
    # print subseqs, subtarg
    
    for j in range(len(subseqs)):
        nb.observe(subseqs[j], subtarg[j])  # let NB count
Beispiel #2
0
sstr = readFastaFile('sstr3.fa', DSSP3_Alphabet)

prot_trn = prot[0::2]  # even-numbered indices
prot_tst = prot[1::2]  # odd-numbered indices
sstr_trn = sstr[0::2]  # even-numbered indices
sstr_tst = sstr[1::2]  # odd-numbered indices

print "W, Q3, C, E, H"

for W in range(3, 50):
    nb = NaiveBayes([Protein_Alphabet for _ in range(W)], DSSP3_Alphabet)

    # iterate over the sequence of proteins in the training set
    # for i in range(len(prot_trn)):  
    for i in range(len(prot_trn)):  
        subseqs = slidewin(prot_trn[i], W)  # construct sub-seqs
        subtarg = sstr_trn[i][W / 2:-W / 2 + 1]  # secondary structure elem.  remove the overhang
        for j in range(len(subseqs)):
            nb.observe(subseqs[j], subtarg[j])  # let NB count
            # print "observing", subseqs[j], subtarg[j]
        
    # create an array of zeroes of length of the secondary structure alphabet
    cm = numpy.zeros((len(DSSP3_Alphabet), len(DSSP3_Alphabet)))

    # iterate over the proteins in the test set
    for i in range(len(prot_tst)):
        subseqs = slidewin(prot_tst[i], W)
        subtarg = sstr_tst[i][W / 2:-W / 2 + 1]
        for j in range(len(subseqs)):
            out = nb[subseqs[j]]
            c_targ = DSSP3_Alphabet.index(subtarg[j])  
Beispiel #3
0
"""
Created on 20/08/2014

@author: jacekrad
"""
from sequence import *
from prob import *
from __builtin__ import exit
from sys import stderr
from spred import slidewin

prot = readFastaFile("prot2.fa", Protein_Alphabet)
sstr = readFastaFile("sstr3.fa", DSSP3_Alphabet)

prot_trn = prot[0::2]  # even-numbered indices
prot_tst = prot[1::2]  # odd-numbered indices
sstr_trn = sstr[0::2]  # even-numbered indices
sstr_tst = sstr[1::2]  # odd-numbered indices

W = 5
print "Sequence length  :", len(prot_trn[0])
print "Number of windows:", len(slidewin(prot_trn[0], W))