def __init__(self, prot_sequence, monoisotopic=False): if prot_sequence.islower(): self.sequence = Seq(prot_sequence.upper(), IUPAC.protein) else: self.sequence = Seq(prot_sequence, IUPAC.protein) self.amino_acids_content = None self.amino_acids_percent = None self.length = len(self.sequence) self.monoisotopic = monoisotopic
def posterior(seq, emission_mat, transition_mat, k_counter, seeds, rec_num, counter): """ calculates the most probable state for every base in seq :param seq: sequence :param emission_mat :param transition_mat :param k_counter: num of states :return: seq of states, aligned to original seq """ N = len(seq) forward_table = forward(seq, emission_mat, transition_mat, k_counter) backward_table = backward(seq, emission_mat, transition_mat, k_counter) posterior_table = forward_table + backward_table # motif_order = EMPTY_STRING seq_obj = Seq(N - 2, rec_num) last_motif_0 = FIRST_MOTIF_STATE + len(seeds[0]) - 1 first_motif_1 = last_motif_0 + 1 last_motif_1 = first_motif_1 + len(seeds[1]) - 1 first_motif_2 = last_motif_1 + 1 last_motif_2 = first_motif_2 + len(seeds[2]) - 1 # decide states for j in range(1, N - 1): curr_k = int(np.argmax(posterior_table[:, j])) if FIRST_MOTIF_STATE <= curr_k <= last_motif_0: # motif_order += MOTIF_0 seq_obj.add_motif_base(0, (seq[j], curr_k - FIRST_MOTIF_STATE), j - 1) elif first_motif_1 <= curr_k <= last_motif_1: # motif_order += MOTIF_1 seq_obj.add_motif_base(1, (seq[j], curr_k - first_motif_1), j - 1) elif first_motif_2 <= curr_k <= last_motif_2: # motif_order += MOTIF_2 seq_obj.add_motif_base(2, (seq[j], curr_k - first_motif_2), j - 1) elif curr_k == 2: # motif_order += TELO_BACKGROUND seq_obj.add_telo_background(seq[j], j - 1) elif curr_k == 1: # motif_order += 'P' seq_obj.add_pre_telo((seq[j], curr_k)) else: # motif_order += BACKGROUND seq_obj.add_normal_dna_base((seq[j], curr_k)) # print_results(seq[1:-1], motif_order) seq_obj.print_statistics(doc=None, counter=counter) seq_obj.save_to_file() return
def ungap(seq): """given a sequence with gap encoding, return the ungapped sequence""" #TODO - Fix this? It currently assumes the outmost AlphabetEncoder #is for the gap. Consider HasStopCodon(Gapped(Protein())) as a test case. gap = seq.gap_char letters = [] for c in seq.data: if c != gap: letters.append(c) return Seq.Seq("".join(letters), seq.alphabet.alphabet)
def reduce_sequence(seq, reduction_table, new_alphabet=None): """ given an amino-acid sequence, return it in reduced alphabet form based on the letter-translation table passed. Some "standard" tables are in Alphabet.Reduced. seq: a Seq.Seq type sequence reduction_table: a dictionary whose keys are the "from" alphabet, and values are the "to" alphabet""" if new_alphabet is None: new_alphabet = Alphabet.single_letter_alphabet new_alphabet.letters = '' for letter in reduction_table: new_alphabet.letters += letter new_alphabet.size = len(new_alphabet.letters) new_seq = Seq.Seq('', new_alphabet) for letter in seq: new_seq += reduction_table[letter] return new_seq
#!/usr/bin/env python #-*- coding:utf-8 -*- from PyQt4.QtGui import * from PyQt4.QtCore import * from Seq import * PDPI = 0.0 DPI = 0.0 DPMM = 0.0 symbols = dict() icons = dict() shortcuts = dict() defaultTextStyles = [] docName = QString() dataPath = QString() mscoreGlobalShare = QString("share\\") mscore = 0 gscore = 0 seq = Seq() recentScores = QStringList() revision = QString() instrumentGroups = list() articulation = list() actions = dict()
import os class RfamSearch(): def __init__(self): pass def cmscan(self, seq): print seq # make tmp file f = open('/tmp/ss.fa', 'w') f.write('>test\n') f.write(seq.seq) f.close() old_pwd = os.getcwd() os.chdir('/home/magnus/work/rfamdb') cmd = 'cmscan -E 1 Rfam.cm /tmp/ss.fa > /tmp/cmscan.txt' subprocess.Popen(cmd, shell=True) self.output = open('/tmp/cmscan.txt').read() os.chdir(old_pwd) return self.output #main if __name__ == '__main__': import Seq seq = Seq.Seq("GGCGCGGCACCGUCCGCGGAACAAACGG") rs = RfamSearch() rs.cmscan(seq)