Exemple #1
0
    def find_new_clone(self, new_seq_buil, old_seq_buil):
        Align = MegaAlignment()
        Ls, New_dic = Align.name2seq(new_seq_buil)
        Ls, Old_dic = Align.name2seq(old_seq_buil)
        #   print 'old list',Old_dic.keys(),	 '\nnew list',New_dic.keys()
        Iden = 'y'
        for Clo in New_dic:
            if Clo != '#hg19':
                NewSeq = New_dic[Clo]
                Redun = Align.find_redundant(NewSeq, Old_dic)
                if Redun == []:
                    Iden = 'n'
                #   print 'new seq',Clo

        return Iden
    def get_decomposed_seq(self):
        Align=MegaAlignment()	
        TuLs, Tu2Seq = Align.name2seq(self.tumor_seqs)			
        print('make SNV clusters')
        clusters = SNPClusterGenerator_cnv1(self.ini_seq_builder, self.v_obs, self.Tu2CloFre, self._CNV_file, self.freq_cutoff)		
        Tumor_cluster_dic = clusters.cluster_cnv()	#Tu2Cluster={tumor:[[seq_builder,{tumor:{clone frequency}}]]}			
        print('Decompose incorrect sample genotype clones')	
 
        AllhitWithDecom={}	
        All_convol_tuseq=[]	
        DecomLs=[]		
        
        for Tu in Tumor_cluster_dic:
            ClusterInfo = Tumor_cluster_dic[Tu]		
            if ClusterInfo != []:
           
                			  
                HitWithDecomSeq_build,convol_tuseq = self.get_candidate_decomposed_clones(Tu,ClusterInfo,Tu2Seq['#'+Tu])
                if convol_tuseq!='': 				
                    A1,HitWithDecomSeq_dic=Align.name2seq(HitWithDecomSeq_build)
                    AllhitWithDecom.update(HitWithDecomSeq_dic)	
                    All_convol_tuseq.append(convol_tuseq)
                    DecomLs.append(Tu)					
                  
                else:
                    Original_hit_seq_dic = self.extract_hitseq(self.ini_seq_builder,self.Tu2CloFre['T-'+Tu],self.freq_cutoff)				
                    AllhitWithDecom.update(Original_hit_seq_dic)
            else:
                    Original_hit_seq_dic = self.extract_hitseq(self.ini_seq_builder,self.Tu2CloFre['T-'+Tu],self.freq_cutoff)				
                    AllhitWithDecom.update(Original_hit_seq_dic)			   
            
        if DecomLs==[]:
          	
             return self.clone_seq,'no decomposed clone was made'
        else:
         	
            for ConvTuSeq in All_convol_tuseq:
                Redun_ls=Align.find_redundant(ConvTuSeq,AllhitWithDecom) 
                if Redun_ls!=[]:
                    				
                     return self.clone_seq,'tumor genotype that was decomposed was hit in different tumor: failed decomposition'	
          
            return AllhitWithDecom,'decomposed'+str(DecomLs)			
    def get_candidate_decomposed_clones(self, target_tumor, CluInf_tu,Tuseq):
        Align = MegaAlignment()	
     	
        NameOrder, Name2Seq = Align.name2seq(CluInf_tu[0])
	
        LenSeq = len(Name2Seq[NameOrder[0]])
    
        SigCluLs=[]		
        for Name in NameOrder: #Root is the first cluster or initial candidate clone
               if Name!='#Clu0' and Name.find('Clu')!=-1: SigCluLs.append(Name)
        CluCombLs,IDend=self.combinations([],SigCluLs,0,{})   
        print(target_tumor,'make cluster comb',SigCluLs,CluCombLs,NameOrder)
   	
        if CluCombLs!={}:   
			 
             CloCan2Seq={}
             Got_Candidate='n'			 
             for Root in NameOrder: #Root is the first cluster or initial candidate clone
               if Root=='#Clu0' or Root.find('Clu')==-1:			 
                RootSeq=Name2Seq[Root]
                if Root=='#Clu0': CloCan2Seq['#'+target_tumor+'Clu0']=RootSeq  #Root is candidate clone              				
                RootMut=Align.GetMutPos(RootSeq)
                Got_Candidate='y'
                if CluCombLs!={}:				
                 for ID in CluCombLs:
                    CluLs=CluCombLs[ID]	
            			
                    CluN=''
                    MutPosLs=[]						
                    for Clu in CluLs:  
                        Seq=Name2Seq[Clu]
                        CluMut=Align.GetMutPos(Seq)
                        MutPosLs+=	CluMut							
                        CluN+=Clu.replace('#','')

                    Good='y'					
                    for Mut in MutPosLs:
                         if RootMut.count(Mut)!=0: Good='n'
					 
                    if Good=='y':	
                         AllMutPosLs=MutPosLs+RootMut					
                         Seq=Align.ModSeq('A'*LenSeq,AllMutPosLs,'T',LenSeq)
                         Redun_ls=Align.find_redundant(Seq,self.clone_seq) #all other clones ####	
                       				
                         if Redun_ls==[]:			
                            CloCan2Seq['#'+target_tumor+Root.replace('#','')+CluN]=Seq
                   					
  
             if CloCan2Seq!={}:	  
	
                      CloCan2Seq.update(self.clone_seq) 
                      Can_list=list(CloCan2Seq.keys())					  
                            					  
                      new_seq = Align.UpMeg(CloCan2Seq,Can_list)
                   						   
                      clone_frequency_combo = CloneFrequencyComputer_cnv1(new_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff)
                      clone_frequency_combo.regress_cnv()					
                      CluComboHit=self.findcombohit(clone_frequency_combo.hitclone_seq_builder)
                      if CluComboHit=='y':
                            print('test the quality of clustercombo, by removing tumor seq (if any)')
                            hit_seq_ls,hit_seq_dic=Align.name2seq(clone_frequency_combo.hitclone_seq_builder) 							
                            Tuseq_ls=Align.find_redundant(Tuseq,hit_seq_dic)	
                            if Tuseq_ls==[]:
                                  print('tumor genotype did not hit, so clustercombo is good')							
                                  return clone_frequency_combo.hitclone_seq_builder,Tuseq
                            else:
                                  print('tumor genotype was hit, so test if clustercombo still hit without tumor genotype: testing if clustercombo genotypes fit well')
                                  Tuseq_ls=Align.find_redundant(Tuseq,CloCan2Seq)									  
                                  sub_hit_seq=[]
                                  for seqname in CloCan2Seq:
                                        if Tuseq_ls.count(seqname)==0:sub_hit_seq+=[seqname,CloCan2Seq[seqname]]
                            
                                  clone_frequency_combo_new = CloneFrequencyComputer_cnv1(sub_hit_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff)
                                  clone_frequency_combo_new.regress_cnv()					
                                  CluComboHit=self.findcombohit(clone_frequency_combo_new.hitclone_seq_builder)
                                  if CluComboHit=='y': 
                           					  
                                     return clone_frequency_combo_new.hitclone_seq_builder,Tuseq 
                                  else: 
                                     return CluInf_tu[0],''								  
                      else: return CluInf_tu[0] ,''                                 								  
                                							
                      	
             else: return CluInf_tu[0],''
        return CluInf_tu[0],''		
    def get_candidate_decomposed_clones(self, target_tumor):
        Align = MegaAlignment()
        CluInf_tu = self.ClusterInfo  #[target_tumor]
        NameOrder, Name2Seq = Align.name2seq(CluInf_tu[2])
        #  print target_tumor, CluInf_tu[0],CluInf_tu[1]
        HitCloCluLs = CluInf_tu[1]  #['T-'+target_tumor]
        TuIdentical_seq = self.identical_seq['T-' + target_tumor]
        LenSeq = len(Name2Seq[NameOrder[0]])
        TuSeq = self.T2Seq['#' + target_tumor]
        Clu2center = CluInf_tu[0]
        SigCluLs = []
        HitCloLs = []
        HitCloSeq_dic = {}
        RootClu = ''
        LarCen = 0.0
        for Hit in HitCloCluLs:
            if HitCloCluLs[Hit] > 0.02:
                if Hit[:len(target_tumor +
                            'Clu')] == target_tumor + 'Clu' and Hit.find(
                                'REP') == -1:
                    SigCluLs.append(Hit)
                    CluName = 'Clu' + Hit.split('Clu')[-1]
                    Center = float(Clu2center[CluName].split('-')[0])

                    for CluN in Clu2center:
                        Center2 = float(Clu2center[CluN].split('-')[0])
                        Sign = Clu2center[CluN].split('-')[1]
                        if Center == Center2 and CluName != CluN:
                            SigCluLs.append(target_tumor + CluN)
                        if LarCen < Center2:  # and Sign=='Pos':  #Pos for middle cut, Neg for K-means
                            LarCen = Center2
                            if Center == Center2: RootClu = target_tumor + CluN
                        elif LarCen <= Center2 and Sign == 'Pos':  #Pos for middle cut, Neg for K-means
                            LarCen = Center2
                            if Center == Center2: RootClu = target_tumor + CluN

                else:
                    HitCloLs.append(Hit)
                    HitCloSeq_dic['#' + Hit] = Name2Seq['#' + Hit]

    #  print 'cluls0',SigCluLs, HitCloLs, RootClu
        if RootClu != '':
            SigCluLs.remove(RootClu)
            HitCloLs.append(RootClu)
        # print 'cluls',SigCluLs, HitCloLs, RootClu
        if SigCluLs != []:
            CluCombLs, IDend = self.combinations([], SigCluLs, 0, {})
        else:
            CluCombLs = {}
        #  print CluCombLs
        if RootClu != '' or CluCombLs != {}:
            print 'make cluster comb'
            CloCan2Seq = {}
            Got_Candidate = 'n'
            for Root in HitCloLs:
                RootSeq = Name2Seq['#' + Root]
                LenSeq = len(RootSeq)
                RootMut = Align.GetMutPos(RootSeq)
                CloCan2Seq['#' + Root] = RootSeq
                Got_Candidate = 'y'
                if CluCombLs != {}:
                    for ID in CluCombLs:
                        CluLs = CluCombLs[ID]
                        #   print 'try make combo',Root,CluLs
                        CluN = ''
                        MutPosLs = []
                        for Clu in CluLs:
                            Seq = Name2Seq['#' + Clu]
                            CluMut = Align.GetMutPos(Seq)
                            MutPosLs += CluMut
                            CluN += Clu.replace(target_tumor + 'Clu', 'Clu')

                        MutPosLs = list(set(MutPosLs))
                        Go = 'y'
                        for Mut in MutPosLs:
                            if RootMut.count(Mut) != 0: Go = 'n'

                        if Go == 'y':
                            AllMutPosLs = MutPosLs + RootMut
                            Seq = Align.ModSeq('A' * LenSeq, AllMutPosLs, 'T',
                                               LenSeq)
                            Redun_ls = Align.find_redundant(Seq, HitCloSeq_dic)

                            if Redun_ls == []:
                                CloCan2Seq['#' + target_tumor + Root.replace(
                                    target_tumor + 'Clu', 'Clu') + CluN] = Seq
                                Got_Candidate = 'y'

            if Got_Candidate == 'y':
                Can_list = CloCan2Seq.keys()
                #   print 'find the good comb',Can_list

                new_seq = Align.UpMeg(CloCan2Seq, Can_list)
                alt_frequency = []
                CNVls = self.CNV_info[target_tumor]
                Len = len(CNVls)
                c = 0
                TuMatPosi = []
                tumor_genotype = ''
                while c < Len:
                    if CNVls[c] == 'normal':
                        alt_frequency.append(self.v_obs[target_tumor][c])
                        if self.v_obs[target_tumor][c] > 0:
                            TuMatPosi.append(c)
                            tumor_genotype += 'T'
                        else:
                            tumor_genotype += 'A'
                    c += 1

                clone_frequency = CloneFrequencyComputer_cnv1({}, {}, {},
                                                              self.freq_cutoff,
                                                              {})

                MutWildAlleleCount_noCNV = clone_frequency.make_mut_wild_allele_count_noCNV(
                    {}, Can_list,
                    CloCan2Seq)  #PreAbsCNV, clone_order, SNV_seq, Tu2CloFre
                Cmatrix_noCNV, Cmatrix_noCNV_dic = clone_frequency.make_Min(
                    Can_list, CloCan2Seq, MutWildAlleleCount_noCNV)
                Clone2Freq = clone_frequency.do_nnls0(Cmatrix_noCNV, Can_list,
                                                      alt_frequency)

                out2 = ['#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' ']
                AllMut = []
                NewClone2Freq = {}
                CluHit = 'n'
                for Clo0 in Clone2Freq:
                    NewClone2Freq[Clo0] = Clone2Freq[Clo0]
                    if Clone2Freq[Clo0] > 0.02:

                        SeqMutPos = Align.GetMutPos(CloCan2Seq['#' + Clo0])
                        TuSeq = 'y'
                        for Mut in SeqMutPos:
                            if TuMatPosi.count(Mut) != 0: AllMut.append(Mut)
                        for Mut in TuMatPosi:
                            if SeqMutPos.count(Mut) == 0: TuSeq = 'n'
                        Iden = 'n'
                        for OriClo in self.OriAnc2Seq0:
                            c = 0
                            Dif = 'n'
                            while c < Len:
                                if self.OriAnc2Seq0[OriClo][c] != CloCan2Seq[
                                        '#' + Clo0][c]:
                                    Dif = 'y'
                                c += 1
                            if Dif == 'n': Iden = OriClo
                        if Iden != 'n':
                            out2 += [Iden, self.OriAnc2Seq0[Iden]]
                            NewClone2Freq[Iden[1:]] = Clone2Freq[Clo0]
                            NewClone2Freq[Clo0] = 0
                        elif TuSeq == 'n':

                            out2 += [
                                '#' + Clo0.replace(target_tumor + target_tumor,
                                                   target_tumor),
                                CloCan2Seq['#' + Clo0]
                            ]
                            if Clo0.find('Clu') != -1 and Clo0.find(
                                    'REP') == -1:
                                CluHit = 'y'
                        else:
                            out2 += [
                                '#' + target_tumor, CloCan2Seq['#' + Clo0]
                            ]
                            NewClone2Freq[target_tumor] = Clone2Freq[Clo0]
                            NewClone2Freq[Clo0] = 0
                AllMut = list(set(AllMut))
                if len(AllMut) < len(TuMatPosi):
                    out2 += ['#' + target_tumor, tumor_genotype]
                if CluHit == 'y':

                    #  print 'Decomposed!'	,target_tumor,NewClone2Freq,out2
                    return out2, NewClone2Freq

        return [], {}