def build_exact(footprint): """Build a PSSM based on p.footprint""" PSSM = [] for char in footprint.lower(): PSSM.append(exact_codes[char]) return PSSM
def distance(self, rhs, method='MSE'): """Calculate the distance between myself and another PPR using method: method = 'MSE': Mean Squared Error 'KL': KL divergance """ if method == None: method = "MSE" return PSSM.distance(self.pssm, rhs.pssm, method)
def distance(self, rhs, method="MSE"): """Calculate the distance between myself and another PPR using method: method = 'MSE': Mean Squared Error 'KL': KL divergance """ if method == None: method = "MSE" return PSSM.distance(self.pssm, rhs.pssm, method)
def nwprofile(S, gap, seq, residues,sc): '''A function that performs the global alignment of a sequence to a profile''' f=frequency(S,residues) M=PSSM(seq,sc,residues,S) F=[[0.0] * (len(f)+1) for i in range (0, len(seq)+1)] for i in range(1, len(seq)+1): for j in range(1, len(f)+1): F[i][j]=max([F[i-1][j]+gap, F[i][j-1]+gap, F[i-1][j-1]+M[i-1][j-1]]) return F[i][j] return F
def load_pprs(): pprs = list(SeqIO.parse(os.path.join(utils.TestDir, "Known_PPRs.gb"), "gb", alphabet=Alphabet.generic_dna)) #apply known binding sites f = open(os.path.join(utils.TestDir, "known_sites.json")) sites = json.load(f) for name,location in sites.iteritems(): for p in pprs: if p.name.lower() == name.lower(): p.footprints = [l.replace('u','t') for l in location] #build PSSMs for p in pprs: p.barkan = PSSM.build(p, 'barkan') p.yagi = PSSM.build(p, 'yagi') p.exact = [build_exact(footprint) for footprint in p.footprints] return pprs
def load_pprs(): pprs = list( SeqIO.parse(os.path.join(utils.TestDir, "Known_PPRs.gb"), "gb", alphabet=Alphabet.generic_dna)) #apply known binding sites f = open(os.path.join(utils.TestDir, "known_sites.json")) sites = json.load(f) for name, location in sites.iteritems(): for p in pprs: if p.name.lower() == name.lower(): p.footprints = [l.replace('u', 't') for l in location] #build PSSMs for p in pprs: p.barkan = PSSM.build(p, 'barkan') p.yagi = PSSM.build(p, 'yagi') p.exact = [build_exact(footprint) for footprint in p.footprints] return pprs
def PSSM_single_test(length=1000): seq = Seq(''.join(random.choice('ACGT') for x in range(length-len(target)))) pos = random.randrange(0, len(seq)) seq = seq[0:pos] + str(target.seq) + seq[pos:] aln = PSSM.search(ppr,seq,gaps=2) rank = -1 for i,a in enumerate(aln): if a.pos == pos+1: rank = i break return rank
def PSSM_single_test(length=1000): seq = Seq(''.join( random.choice('ACGT') for x in range(length - len(target)))) pos = random.randrange(0, len(seq)) seq = seq[0:pos] + str(target.seq) + seq[pos:] aln = PSSM.search(ppr, seq, gaps=2) rank = -1 for i, a in enumerate(aln): if a.pos == pos + 1: rank = i break return rank
def smprofile(S, gap, seq, residues,sc): '''A function that performs the local alignment of a sequence to a profile''' f=frequency(S,residues) M=PSSM(seq,sc,residues,S) F=[[0.0] * (len(f)+1) for i in range (0, len(seq)+1)] for i in range(1, len(seq)+1): for j in range(1, len(f)+1): F[i][j]=max([F[i-1][j]+gap, F[i][j-1]+gap, F[i-1][j-1]+M[i-1][j-1],0.0]) maxscore=0 for i in range(1, len(seq)+1): for j in range(1, len(f)+1): if F[i][j]>maxscore: maxscore=F[i][j] print F print maxscore
def get_domains(ppr, plastid, percentile=10.0, gaps=1, type='PPR'): """Get a list of putative binding domains in the plastid""" feats = PSSM.search(ppr,plastid,gaps=gaps,show_stats=False) if feats: top = feats[0].qualifiers['odds'] bottom = feats[-1].qualifiers['odds'] threshold = top - (percentile/100.0) * (top-bottom) for f in feats: f.type = type for i in range(len(feats)): if feats[i].qualifiers['odds'] < threshold: return feats[0:i-1] return feats
def __init__(self, seq_record): self.seq_record = seq_record self.pssm = PSSM.build(self.seq_record)