Exemple #1
0
def main(fn):
    # read in the CAI table
    G = GeneticCode("euplotid_genetic_code.txt")
    G.read_CAI_table("euplotid_CAI_table.txt")

    c = 0
    for seq_record in SeqIO.parse(fn, "fasta"):
        if c > 100: break
        sequence = str(seq_record.seq)
        s = Sequence(sequence)
        s.set_genetic_code(G)
        # s.truncate()
        s.build_tree()
        # get the first stop in the first frame
        main_orf = ""
        for m, n in s.unique_stop_sequence:
            if m == 0 and n > 0:  # exclude the terminal frame markers e.g. (0,-1)
                main_orf = s.sequence[:n]
                break
        s.estimate_frameshift_CAI()
        with open(seq_record.id + ".cai", 'w') as f:
            if main_orf != "":
                t = Sequence(main_orf)
                t.set_genetic_code(G)
                t.estimate_CAI()
                print >> f, t.repr_as_row()
            for fs in s.frameshift_sequences:
                print >> f, s.frameshift_sequences[fs].repr_as_row()

        c += 1
	def parseSeq(self,fileSeq):
		#=================================================
		#==== A METTRE LORS DE LA REMISE ===
		#inputFile = input("fullPath of Sequence file : ")
		inputFile = fileSeq
		#=================================================
		f = open(inputFile,"r")
		initSeq = ">"
		seq = ""
		listSequences = []
		for line in f:
			if line[0] == initSeq:
				# On ajoute le precedent element
				if len(seq) > 0:
					listSequences.append(Sequence(seq,name,ref,number))
				splittedLine = line.split("|")
				name = splittedLine[0][1:]
				ref = splittedLine[1]
				number = splittedLine[2]
				seq = ""
			else:
				seq += line
				if seq[-1] == "\n":
					seq = seq[:len(seq)-1]
		# Add last
		listSequences.append(Sequence(seq,name,ref,number))
		f.close()

		return listSequences
Exemple #3
0
def main(gc):
    G = GeneticCode(gc)
    # G.build_CAI_table( "/Users/paulkorir/Dropbox/Euplotes/FrameshiftPredictionData/E.crassus_CDS.fasta" )
    G.build_CAI_table(
        "/home/paul/bioinf/Resources/H_sapiens/H_sapiens_Ens75_CDS.fasta")
    #G.build_CAI_table( "a_fasta.fa" )
    G.write_CAI_table("CAI_tables/homo_CAI_table.txt")

    sys.exit(0)

    # G.write_CAI_table( "euplotid_CAI_table.txt" )
    #G.write_CAI_table( "test_CAI_table.txt" )
    G.read_CAI_table("euplotid_CAI_table.txt")
    #print G
    s = Sequence(
        "TAGAGATACACTGACTTACTTTCAAATACTATAAAACGGAATAGCCTAAGAATGAAATAAAGTAAAACATGACCATCAGGAGAAAGTTGAACAACTAGAGAGGGAGAATATTAAGCTTTATGCCCAATTAAAAAAGCTTGCAAAAAGTGAAAGAAATCTAATGAAGAAACTAGACGAAAGAGACCGGGAGATAACCAATCTAAAAGATACAAACATGAGGTTCAATTACAAACTCAATAGAGCACTCTATGCTAATGAAGAGCTGCAAAATAAAGTAACTGAATCTGACTACAAACTTCAACAAAAAAGAGATGAATTTATGAAAGACATAGAGCAAACTAACCAAATCC"
    )
    #s = Sequence( "TCAAACCGAGACTTACTAAAGTTGATCATCATAAGACTC" )
    s.set_genetic_code(G)
    s.estimate_CAI()
    # s.as_codons = True
    print s
    # print s.CAI_score
    s.truncate()
    print s
    s.build_tree()
    # print s.tree
    for i in xrange(3):
        print s.binary_frame(i, "")
    s.estimate_frameshift_CAI()
    for fs in s.frameshift_sequences:
        print s.frameshift_sequences[fs].repr_as_row()
Exemple #4
0
    def getD2StarWeight(self,seqA,seqB,k,r,flag,sequences,kmersetdic,weight,kmer_pro):
        seqLis=[]
        # 变成list
        seqLis.append(seqA)
        seqLis.append(seqB)
        Sq =Sequence.Sequence()
        # 获取 关键字集合 字典dic
        kmerSet,dic =Sq.getSeqKerSet(seqLis,k)
        # 获取kmer概率
#        Ma=markov.Markov()
#        kmerPA={}
#        kmerPB={}
        if flag==False:
            lisFeaA=Sq.getD2SCount(seqA,seqLis,k,r,flag,dic)
            lisFeaB=Sq.getD2SCount(seqB,seqLis,k,r,flag,dic)
#            kmerPA=Ma.get_Single_kmer_Pro(seqA,seqLis,k,r)
#            kmerPB=Ma.get_Single_kmer_Pro(seqB,seqLis,k,r)
        else:
            lisFeaA=Sq.getD2SCount(seqA,seqLis,k,r,flag,kmersetdic,kmer_pro)
            lisFeaB=Sq.getD2SCount(seqB,seqLis,k,r,flag,kmersetdic,kmer_pro)
#            kmerPA=Ma.get_Mul_kmer_Pro(seqA,sequences,k,r)
#            kmerPB=Ma.get_Mul_kmer_Pro(seqB,sequences,k,r)
        #计算D2Star
        su=0.0
        lenA=len(seqA)
        lenB=len(seqB)
        for key in dict.keys(lisFeaA):
            su=su+(lisFeaA[key]*lisFeaB[key])/math.sqrt(lenA*kmer_pro[key]*lenB*kmer_pro[key])*weight[key]
        return 1/(su+np.spacing(1))
Exemple #5
0
    def fetch_shot(self, m_shot_code):
        # use the sequence matching regular expression here instead of hard coding m_shot_code[0:5]
        matchobject = DBAccessGlobals.DBAccessGlobals.g_shot_regexp.search(m_shot_code)
        shot = None
        seq = None
        # make sure this file matches the shot pattern
        if not matchobject:
            raise ValueError("Shot name provided %s does not match regular expression!"%m_shot_code)
        else:
            shot = matchobject.groupdict()['shot']
            seq = matchobject.groupdict()['sequence']
            
        local_seq = Sequence.Sequence(seq, DBAccessGlobals.DBAccessGlobals.get_path_for_sequence(seq), -1)
        dbseq = self.fetch_sequence(seq)
        local_seq.g_dbid = dbseq.g_dbid
                    
        shot_ret = Shot.Shot(shot, DBAccessGlobals.DBAccessGlobals.get_path_for_shot(shot), -1, local_seq, None, 1001, 1009, 1092, 1100, 84)

        shot_table = self.g_tinydb.table('Shot')
        shot_query = tinydb.Query()
        dbshot = shot_table.get(shot_query.code == m_shot_code)
        if dbshot:
            shot_ret.g_dbid = dbshot.doc_id
            shot_ret.g_task_template = dbshot.task_template
            shot_ret.g_head_in = dbshot.sg_head_in
            shot_ret.g_cut_in = dbshot.sg_cut_in
            shot_ret.g_cut_out = dbshot.sg_cut_out
            shot_ret.g_tail_out = dbshot.sg_tail_out
            shot_ret.g_cut_duration = dbshot.sg_cut_duration

        return shot_ret
Exemple #6
0
 def pcc(self,seqA,seqB,k):
     seqLis=[]
     # 变成list
     seqLis.append(seqA)
     seqLis.append(seqB)
     Sq =Sequence.Sequence()
     # 获取 关键字集合 字典dic
     kmerSet,dic =Sq.getSeqKerSet(seqLis,k)
     lisFea,freq=Sq.getSeqfreq(seqLis,k,dic)
     #计算平均值
     meanA=0.0
     meanB=0.0
     for i in range(freq.shape[1]):
         meanA=freq[0,i]+meanA
         meanB=freq[1,i]+meanB
     meanA=meanA/freq.shape[1]
     meanB=meanB/freq.shape[1]
     #计算协方差
     cov=0.0
     for i in range(freq.shape[1]):
         cov=cov+(freq[0,i]-meanA)*(freq[1,i]-meanB)
     cov=cov/(freq.shape[1]-1)
     # 计算方差
     stA=np.std(freq[0,:],ddof=1)
     stB=np.std(freq[1,:],ddof=1)
     #计算pcc
     pcc=cov/(stA*stB)
     return abs(1/pcc)
Exemple #7
0
def main(fn):
    TM = TransitionMatrix()
    TM.read("euplotid_transition_matrix.pic")
    pdf = PdfPages("likelihood_profiles_test.pdf")
    b = 0  # count the ones that pass
    c = 0  # count all
    for seq_record in SeqIO.parse(fn, "fasta"):
        if c > 1000:
            break
        sequence = str(seq_record.seq)
        seq_name = seq_record.id
        s = Sequence(sequence=sequence, name=seq_name)
        s.truncate(effect_truncation=True, verbose=False)
        no_of_leaves = s.count_leaves()
        if no_of_leaves > 1000:
            print >> sys.stderr, "Complex tree with %s leaves...omitting." % no_of_leaves
            continue
        s.set_transition_matrix(TM)
        s.build_tree()
        s.get_frameshift_signals()
        s.estimate_likelihood()
        s.estimate_frameshift_likelihood()
        s.get_most_likely_frameshift()
        if s.most_likely_frameshift is not None:
            if 1 < len(s.most_likely_frameshift.path) < 4:
                #s.plot_differential_graded_likelihood( outfile=pdf, show_path_str=True )
                s.plot_differential_graded_likelihood()
                b += 1
        c += 1
    pdf.close()

    print >> sys.stderr, "Processed %d (of %d) sequences [%.2f%%]." % (
        b, c, b / c * 100)
Exemple #8
0
def main( fn, seq_name ):
	TM = TransitionMatrix()
	TM.read( "transition_matrices/euplotid_transition_matrix.pic" )
	# find the sequence we're looking for
	found = False
	for seq_record in SeqIO.parse( fn, "fasta" ):
		if seq_record.id == seq_name:
			sequence = str( seq_record.seq )
			s = Sequence( sequence=sequence, name=seq_name )
			s.truncate( effect_truncation=True, verbose=False )
			no_of_leaves = s.count_leaves()
			if no_of_leaves > 1000:
				print >> sys.stderr, "Complex tree with %s leaves...omitting." % no_of_leaves
				continue
			s.set_transition_matrix( TM )
			s.build_tree()
			s.get_frameshift_signals()
			s.estimate_likelihood()
			s.estimate_frameshift_likelihood()
			s.get_most_likely_frameshift()
			s.get_indexes()
			s.repr_frameshift_sites( include_nulls=False )
#			print s.most_likely_frameshift.path
#			print s.most_likely_frameshift.partial_gradients
			s.plot_differential_graded_likelihood( show_name=False, show_starts=False, show_ML=False )
			found = True
			break
	
	if not found:
		print >> sys.stderr, "Sequence %s was not found." % seq_name
Exemple #9
0
    def trans_SingleSeq_Matrix(self, sequence, sequences, r):
        if r <= 0:
            print("r<=0,无转移矩阵,请重新输入")
            return
        lis = []
        lis.append(sequence)
        # 获取前缀个数
        Sq = Sequence.Sequence()
        kmerset, dic = Sq.getSeqKerSet(sequences, r)
        # 统计个数
        dic0, count0 = Sq.getSeqCount(lis, r, dic)
        # 去掉最后一位
        dic0[0][sequence[-r:]] = dic0[0][sequence[-r:]] - 1
        # 获取后缀个数
        kmerset1, dic1 = Sq.getSeqKerSet(sequences, r + 1)
        # 统计个数
        dic2, count2 = Sq.getSeqCount(lis, r + 1, dic1)

        resultdic = dict.copy(dic2[0])
        for key in dic2[0].keys():
            if dic0[0][key[0:-1]] == 0:
                resultdic[key] = 0
            else:
                resultdic[key] = dic2[0][key] / dic0[0][key[0:-1]]
        return resultdic
Exemple #10
0
 def get_Mul_kmer_Pro(self, sequences, k, r):
     if r >= k:
         r = 0
     Sq = Sequence.Sequence()
     # 单条序列的kmerset,dic
     #        kmerSet,dic=Sq.getSingleSeqKerSet(sequence,k)
     kmers, dic1 = Sq.getSeqKerSet(sequences, k)
     resultdic = dict.copy(dic1)
     #        print("sad",resultdic)
     # 初始概率:
     initProdic = self.init_MUl_pro(sequences)
     if r == 0:
         for kmer in dict.keys(dic1):
             pro = 1
             for i in range(len(kmer)):
                 pro = initProdic[kmer[i]] * pro
             resultdic[kmer] = pro
     elif r < 0:
         print("r的值设定有误,不能小于0")
     else:
         # 状态转移
         transdic = self.trans_MulSeq_Matrix(sequences, r)
         for kmer in dict.keys(dic1):
             # 初始概率
             pro = 1
             for i in range(r):
                 pro = initProdic[kmer[i]] * pro
         # kmer概率
             for loc in range(len(kmer) - r):
                 pro = pro * transdic[kmer[loc:loc + r + 1]]
             resultdic[kmer] = pro
     return Sq.addfloat(resultdic)
Exemple #11
0
def main(fasta_file):
    bad_sequence = [
        "comp1705_c0_seq1", "comp1716_c0_seq1", "comp1809_c0_seq1",
        "comp2102_c0_seq1", "comp2215_c0_seq1", "comp2215_c0_seq2",
        "comp2216_c0_seq1", "comp2216_c0_seq2"
    ]
    c = 0
    for seq_record in SeqIO.parse(fasta_file, "fasta"):
        if c > 10: break
        if seq_record.id.split(" ")[0] in bad_sequence:
            continue
        fname = seq_record.id.split(" ")[0] + ".fa"
        sequence = str(seq_record.seq)
        # first_start = sequence.find( "ATG" )
        # 		if first_start < 0:
        # 			print >> sys.stderr, "Missing start codon in sequence %s" % seq_record.id
        # 			continue
        # 		else:
        # 			sequence = sequence[first_start:]
        s = Sequence(sequence)
        # s.truncaste()
        s.build_tree()
        with open(fname, 'w') as f:
            for k in s.frameshift_sequences:
                F = s.frameshift_sequences[k]  # a FrameshiftSequence object
                print >> f, ">%s" % "|".join(map(
                    lambda x: "%s:%s" % x, F.path)) + ";" + ",".join(F.signals)
                print >> f, F.frameshifted_sequence
        c += 0
def allignSequences(S, T):
    S = '-' + S
    T = '-' + T
    n = len(S)
    m = len(T)
    Seq, V = [], []

    #Defining the matrices
    for i in range(n):
        V.append([0.0] * m)
        lists = []
        for j in range(m):
            lists.append(Sequence())
        Seq.append(lists)

    #Initializing the matrices
    for i in range(n):
        V[i][0] = i * -1
        Seq[i][0].s = S[1:i + 1]
        Seq[i][0].t = '-' * i

    for j in range(m):
        V[0][j] = j * -1
        Seq[0][j].s = '-' * j
        Seq[0][j].t = T[1:j + 1]

    #Dynamic programming approach
    for i in range(1, n):
        for j in range(1, m):
            ch = getMax(i, j, S, T, Seq, V)
            putMax(ch, i, j, S, T, Seq, V)

    #Return result
    return (V[n - 1][m - 1], Seq[n - 1][m - 1], getMeasure(Seq[n - 1][m - 1]))
Exemple #13
0
def test_MassUpdate():
    St = States.clsStatesTable()
    Sq = Sequence.clsSequence()
    Sq.Import("csv/test/testSeq.csv")
    for step in Sq.Steps:
        St.Update(step.state0)
        St.Update(step.state1)
    assert St.len == 100
def main():
    s = 'AUGGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAUAGGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAGAAUAG'
    s = Sequence.Sequence(no_of_shifts=3, min_length=50, max_length=100)
    s.generate_frameshift_sequence()
    print s.info()
    print s
    print

    print get_stop_pos2(str(s))
Exemple #15
0
def test_MassUpdate3():
    St = States.clsStatesTable()
    Sq = Sequence.clsSequence()
    Sq.Import("csv/test/testSeq.csv")
    X = [step.state0 for step in Sq.Steps]
    r = [-1.5 for step in Sq.Steps]
    St.Update(X, reward=r)
    for i in range(St.len):
        St[i].reward = -1.5
    assert St.len == 99
    def track(self, image):

        left = max(round(self.position[0] - float(self.window) / 2), 0)
        top = max(round(self.position[1] - float(self.window) / 2), 0)

        right = min(round(self.position[0] + float(self.window) / 2), image.shape[1] - 1)
        bottom = min(round(self.position[1] + float(self.window) / 2), image.shape[0] - 1)

        if right - left < self.template.shape[1] or bottom - top < self.template.shape[0]:
            return Sequence.Rectangle(self.position[0] + self.size[0] / 2, self.position[1] + self.size[1] / 2, self.size[0], self.size[1])

        cut = image[int(top):int(bottom), int(left):int(right)]

        matches = cv2.matchTemplate(cut, self.template, cv2.TM_CCOEFF_NORMED)
        min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(matches)

        self.position = (left + max_loc[0] + float(self.size[0]) / 2, top + max_loc[1] + float(self.size[1]) / 2)

        return Sequence.Rectangle(left + max_loc[0], top + max_loc[1], self.size[0], self.size[1])
Exemple #17
0
def test_MassUpdate2():
    St = States.clsStatesTable()
    Sq = Sequence.clsSequence()
    Sq.Import("csv/test/testSeq.csv")
    X = [step.state0 for step in Sq.Steps]
    Q = [[-1, -2, -3] for step in Sq.Steps]
    St.Update(X, Q=Q)
    for i in range(St.len):
        St[i].Q = [-1, -2, -3]
    assert St.len == 99
Exemple #18
0
def test_Reset():
    St = States.clsStatesTable()
    Sq = Sequence.clsSequence()
    Sq.Import("csv/test/testSeq.csv")
    for step in Sq.Steps:
        St.Update(step.state0)
        St.Update(step.state1)
    St.Reset()
    assert St[0] == None
    assert St.len == 0
Exemple #19
0
    def fetch_sequence(self, m_seq_code):
        seq_path = DBAccessGlobals.DBAccessGlobals.get_path_for_sequence(m_seq_code)
        seq_ret = Sequence.Sequence(m_seq_code, seq_path, -1)
        seq_table = self.g_tinydb.table('Sequence')
        seq_query = tinydb.Query()
        dbseq = seq_table.get(seq_query.code == m_seq_code)

        if dbseq:
            seq_ret.g_dbid = dbseq.doc_id
            
        return seq_ret
Exemple #20
0
 def getMulD2StarWeight(self,seqA,seqB,kstart,kend,r,flag,sequences,weight,kmer_pro):
     seqLis=[]
     # 变成list
     seqLis.append(seqA)
     seqLis.append(seqB)        
     # 获取 关键字集合 字典dic
     Sq=Sequence.Sequence()
     lisFeaA=Sq.getD2StarMulCount(seqA,sequences,kstart,kend,r,flag,kmer_pro)
     lisFeaB=Sq.getD2StarMulCount(seqB,sequences,kstart,kend,r,flag,kmer_pro)
     su=0.0
     for key in dict.keys(lisFeaA):
         su=su+(lisFeaA[key]*lisFeaB[key])*weight[key]
     return 1/(su+np.spacing(1))
Exemple #21
0
 def manhattan(self,seqA,seqB,k):
     seqLis=[]
     # 变成list
     seqLis.append(seqA)
     seqLis.append(seqB)
     Sq =Sequence.Sequence()
     # 获取 关键字集合 字典dic
     kmerSet,dic =Sq.getSeqKerSet(seqLis,k)
     lisFea,freq=Sq.getSeqfreq(seqLis,k,dic)
     su=0.0
     for key in kmerSet:
         su=abs(lisFea[0][key]-lisFea[1][key])+su
     return su
Exemple #22
0
 def EuD(self,seqA,seqB,k):
     seqLis=[]
     # 变成list
     seqLis.append(seqA)
     seqLis.append(seqB)
     Sq =Sequence.Sequence()
     # 获取 关键字集合 字典dic
     kmerSet,dic =Sq.getSeqKerSet(seqLis,k)
     lisFea,freq=Sq.getSeqfreq(seqLis,k,dic)
     su=0.0
     for key in kmerSet:
         su=(lisFea[0][key]-lisFea[1][key])**2+su
     return math.sqrt(su)
Exemple #23
0
 def getD2(self,seqA,seqB,k):
     seqLis=[]
     # 变成list
     seqLis.append(seqA)
     seqLis.append(seqB)
     Sq =Sequence.Sequence()
     # 获取 关键字集合 字典dic
     kmerSet,dic =Sq.getSeqKerSet(seqLis,k)
     lisFea,count=Sq.getSeqCount(seqLis,k,dic)
     #计算D2
     su=0.0
     for i in range(count.shape[1]):
         su=su+count[0,i]*count[1,i]
     return 1/(su+np.spacing(1))
Exemple #24
0
    def getMulD2Weight(self,seqA,seqB,kstart,kend,sequences,weight):
        seqLis=[]
        # 变成list
        seqLis.append(seqA)
        seqLis.append(seqB)
        Sq =Sequence.Sequence()
        # 获取 关键字集合 字典dic
        lisFea=Sq.getMulCount(seqLis,kstart,kend,sequences)
#        print(lisFea)
        #计算D2
        su=0.0
        for key in lisFea[0]:
            su=su+lisFea[0][key]*lisFea[1][key]*weight[key]
        return 1/(su+np.spacing(1))
Exemple #25
0
 def getD2Weight(self,seqA,seqB,k,sequences,weight):
     seqLis=[]
     # 变成list
     seqLis.append(seqA)
     seqLis.append(seqB)
     Sq =Sequence.Sequence()
     # 获取 关键字集合 字典dic
     kmerSet,dic =Sq.getSeqKerSet(sequences,k)
     lisFea,count=Sq.getSeqCount(seqLis,k,dic)
     #计算D2
     su=0.0
     for key in lisFea[0]:
         su=su+lisFea[0][key]*lisFea[1][key]*weight[key]
     return 1/(su+np.spacing(1))
Exemple #26
0
 def chebyshev(self,seqA,seqB,k):
     seqLis=[]
     # 变成list
     seqLis.append(seqA)
     seqLis.append(seqB)
     Sq =Sequence.Sequence()
     # 获取 关键字集合 字典dic
     kmerSet,dic =Sq.getSeqKerSet(seqLis,k)
     lisFea,freq=Sq.getSeqfreq(seqLis,k,dic)
     #计算切比雪夫距离
     ma=-sys.maxsize-1
     for i in range(freq.shape[1]):
         ma=max(ma,abs(freq[0,i]-freq[1,i]))
     return ma
Exemple #27
0
def test_Key():
    St = States.clsStatesTable()
    Sq = Sequence.clsSequence()
    Sq.Import("csv/test/testSeq.csv")
    for step in Sq.Steps:
        St.Update(step.state0)
        St.Update(step.state1)
    c = 0
    for i in range(10):
        for j in range(10):
            t = 1 if i == 9 and j == 9 else 0
            stat = [float(i), float(j), t]
            assert St[stat].features == stat
            c += 1
Exemple #28
0
    def getD2_single(self,seqA,seqB,kstart,kend):
        seqLis=[]
        # 变成list
        seqLis.append(seqA)
        seqLis.append(seqB)
        Sq =Sequence.Sequence()
        # 获取 关键字集合 字典dic
        countLis=Sq.getMulCount(seqLis,kstart,kend,seqLis)
#        kmerSet,dic =Sq.getSeqKerSet(seqLis,k)
#        lisFea,count=Sq.getSeqCount(seqLis,k,dic)
        #计算D2
        su=0.0
        for key in range(dict.keys(countLis[0])):
            su=su+countLis[0,key]*countLis[1,key]
        return 1/(su+np.spacing(1))
Exemple #29
0
 def KLD(self,seqA,seqB,k):
     seqLis=[]
     # 变成list
     seqLis.append(seqA)
     seqLis.append(seqB)
     Sq =Sequence.Sequence()
     # 获取 关键字集合 字典dic
     kmerSet,dic =Sq.getSeqKerSet(seqLis,k)
     lisFea,freq=Sq.getSeqfreq(seqLis,k,dic)
     k1=0.0
     k2=0.0
     for j in range(freq.shape[1]):
         k1=k1+freq[0,j]*math.log2(freq[0,j]/freq[1,j])
         k2=k2+freq[1,j]*math.log2(freq[1,j]/freq[0,j])
     return (k1+k2)/2
Exemple #30
0
def test_Sort():
    St = States.clsStatesTable()
    Sq = Sequence.clsSequence()
    Sq.Import("csv/test/testSeq.csv")
    for step in Sq.Steps:
        St.Update(step.state0)
        St.Update(step.state1)
    St.Sort()
    c = 0
    for i in range(10):
        for j in range(10):
            if not (i == 9 and j == 9):
                assert St[c].features == [float(i), float(j), 0]
            else:
                assert St[c].features == [float(i), float(j), 1]
            c += 1
Exemple #31
0
#!/usr/bin/env python

"""
translate.py <filename>

Translates a DNA sequence to a protein sequence
"""

import sys

import Fasta
import Sequence


if len(sys.argv)!=2 or '-h' in sys.argv or '--help' in sys.argv:
    sys.exit(__doc__)

w = 60

iFilename = sys.argv[1]
faFile = Fasta.load_mfa_iter(iFilename)
for header,seq in faFile:
    protein = Sequence.translate(seq)

    print '>%s' % header
    for i in xrange(0, len(protein), w):
        print protein[i:i+w]
Exemple #32
0
#!/usr/bin/env python

"""
reverse_comp.py <filename>

Prints the reverse complement of a DNA string (in Fasta format).
"""

import sys

import Fasta
import Sequence


if len(sys.argv) != 2 or "-h" in sys.argv or "--help" in sys.argv:
    sys.exit(__doc__)

iFilename = sys.argv[1]
header, seq = Fasta.load(iFilename)

seq = Sequence.reverse_complement(seq.upper())

print ">%s" % header
for i in xrange(0, len(seq), 80):
    print seq[i : i + 80]