Esempio n. 1
0
 def clone_to_tumor_phylogeny(self, OriginalNwk, Tu2CloFre, CloSeqLs):
     KeepLs = ['hg19']
     Keep2TuLs = {'hg19': []}
     Align = MegaAlignment()
     CloOr, CloSeq = Align.name2seq(CloSeqLs)
     print Tu2CloFre
     for Tu in Tu2CloFre:
         CloFre = Tu2CloFre[Tu]
         CloLs = []
         for Clo in CloFre:
             if CloFre[Clo] > 0: CloLs.append(Clo)
         LarClo = ''
         LarMut = 0
         for Clo0 in CloLs:
             Seq0 = CloSeq['#' + Clo0]
             MutC = len(Align.GetMutPos(Seq0))
             if MutC > LarMut:
                 LarMut = MutC
                 LarClo = Clo0
             Keep = 'y'
             for Clo1 in CloLs:
                 if Clo0 != Clo1:
                     Seq1 = CloSeq['#' + Clo1]
                     UniMutNum = 0
                     Len = len(Seq1)
                     c = 0
                     while c < Len:
                         if Seq0[c] == 'T' and Seq1[c] == 'A':
                             UniMutNum += 1
                         c += 1
                     Pro = 1.0 * UniMutNum / Len
                     if Pro < 0.05: Keep = 'n'
             if Keep == 'y':
                 if KeepLs.count(Clo0) == 0:
                     KeepLs.append(Clo0)
                     Keep2TuLs[Clo0] = []
                 Keep2TuLs[Clo0].append(Tu)
         #KeepLs.append(LarClo)
         if KeepLs.count(LarClo) == 0:
             KeepLs.append(LarClo)
             Keep2TuLs[LarClo] = []
         Keep2TuLs[LarClo].append(Tu)
     RmLs = []
     for Clo in CloSeq:
         if KeepLs.count(Clo[1:]) == 0: RmLs.append(Clo[1:])
     print 'remove ancestral clones', RmLs
     print 'tumor ls for each clone', Keep2TuLs
     Pruned = self.PruneTree(OriginalNwk, KeepLs)
     Pruned_Root = self.RootTree(Pruned)
     return Pruned_Root, Keep2TuLs
    def get_candidate_decomposed_clones(self, target_tumor, CluInf_tu,Tuseq):
        Align = MegaAlignment()	
     	
        NameOrder, Name2Seq = Align.name2seq(CluInf_tu[0])
	
        LenSeq = len(Name2Seq[NameOrder[0]])
    
        SigCluLs=[]		
        for Name in NameOrder: #Root is the first cluster or initial candidate clone
               if Name!='#Clu0' and Name.find('Clu')!=-1: SigCluLs.append(Name)
        CluCombLs,IDend=self.combinations([],SigCluLs,0,{})   
        print(target_tumor,'make cluster comb',SigCluLs,CluCombLs,NameOrder)
   	
        if CluCombLs!={}:   
			 
             CloCan2Seq={}
             Got_Candidate='n'			 
             for Root in NameOrder: #Root is the first cluster or initial candidate clone
               if Root=='#Clu0' or Root.find('Clu')==-1:			 
                RootSeq=Name2Seq[Root]
                if Root=='#Clu0': CloCan2Seq['#'+target_tumor+'Clu0']=RootSeq  #Root is candidate clone              				
                RootMut=Align.GetMutPos(RootSeq)
                Got_Candidate='y'
                if CluCombLs!={}:				
                 for ID in CluCombLs:
                    CluLs=CluCombLs[ID]	
            			
                    CluN=''
                    MutPosLs=[]						
                    for Clu in CluLs:  
                        Seq=Name2Seq[Clu]
                        CluMut=Align.GetMutPos(Seq)
                        MutPosLs+=	CluMut							
                        CluN+=Clu.replace('#','')

                    Good='y'					
                    for Mut in MutPosLs:
                         if RootMut.count(Mut)!=0: Good='n'
					 
                    if Good=='y':	
                         AllMutPosLs=MutPosLs+RootMut					
                         Seq=Align.ModSeq('A'*LenSeq,AllMutPosLs,'T',LenSeq)
                         Redun_ls=Align.find_redundant(Seq,self.clone_seq) #all other clones ####	
                       				
                         if Redun_ls==[]:			
                            CloCan2Seq['#'+target_tumor+Root.replace('#','')+CluN]=Seq
                   					
  
             if CloCan2Seq!={}:	  
	
                      CloCan2Seq.update(self.clone_seq) 
                      Can_list=list(CloCan2Seq.keys())					  
                            					  
                      new_seq = Align.UpMeg(CloCan2Seq,Can_list)
                   						   
                      clone_frequency_combo = CloneFrequencyComputer_cnv1(new_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff)
                      clone_frequency_combo.regress_cnv()					
                      CluComboHit=self.findcombohit(clone_frequency_combo.hitclone_seq_builder)
                      if CluComboHit=='y':
                            print('test the quality of clustercombo, by removing tumor seq (if any)')
                            hit_seq_ls,hit_seq_dic=Align.name2seq(clone_frequency_combo.hitclone_seq_builder) 							
                            Tuseq_ls=Align.find_redundant(Tuseq,hit_seq_dic)	
                            if Tuseq_ls==[]:
                                  print('tumor genotype did not hit, so clustercombo is good')							
                                  return clone_frequency_combo.hitclone_seq_builder,Tuseq
                            else:
                                  print('tumor genotype was hit, so test if clustercombo still hit without tumor genotype: testing if clustercombo genotypes fit well')
                                  Tuseq_ls=Align.find_redundant(Tuseq,CloCan2Seq)									  
                                  sub_hit_seq=[]
                                  for seqname in CloCan2Seq:
                                        if Tuseq_ls.count(seqname)==0:sub_hit_seq+=[seqname,CloCan2Seq[seqname]]
                            
                                  clone_frequency_combo_new = CloneFrequencyComputer_cnv1(sub_hit_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff)
                                  clone_frequency_combo_new.regress_cnv()					
                                  CluComboHit=self.findcombohit(clone_frequency_combo_new.hitclone_seq_builder)
                                  if CluComboHit=='y': 
                           					  
                                     return clone_frequency_combo_new.hitclone_seq_builder,Tuseq 
                                  else: 
                                     return CluInf_tu[0],''								  
                      else: return CluInf_tu[0] ,''                                 								  
                                							
                      	
             else: return CluInf_tu[0],''
        return CluInf_tu[0],''		
Esempio n. 3
0
    def remove_insignificant_clones(self, v_obs, CloFre_clone,
                                    clone_seq_builder, Tu2CNV, Cut):
        Align = MegaAlignment()
        OutAncAll = 'SigTest.txt'
        outAncAll = 'tumor\tDecsendant-Ancestor\tSNV posi\tType\tObsFre\n'
        Clone_list, clone_seq_dic = Align.name2seq(clone_seq_builder)
        new_clone_freq = {}
        new_clone_seq_dic = {}
        for tumor in v_obs:
            CNV = Tu2CNV[tumor]
            Clo2Fre = CloFre_clone['T-' + tumor]
            ObsFre = v_obs[tumor]

            clone_order = []
            MutNum2Clo = {}
            MutNum_ls = []
            for Clo in Clo2Fre:
                if Clo2Fre[Clo] > 0:
                    MutPosLs = Align.GetMutPos(clone_seq_dic['#' + Clo])
                    MutNum = len(MutPosLs)
                    if MutNum2Clo.has_key(MutNum) != True:
                        MutNum2Clo[MutNum] = []
                    MutNum2Clo[MutNum].append(Clo)
                    MutNum_ls.append(MutNum)
            MutNum_ls = list(set(MutNum_ls))
            MutNum_ls.sort(reverse=True)
            for MutNum in MutNum_ls:

                clone_order += MutNum2Clo[MutNum]

            CloNum = len(clone_order)
            C1Max = CloNum - 1
            InsigLs = []

            C1 = 0
            while C1 < C1Max:
                Clo1 = clone_seq_dic['#' + clone_order[C1]]
                num_sites = len(Clo1)
                Min_num = 0.01 * num_sites
                C2 = C1 + 1
                while C2 < CloNum:
                    Clo2 = clone_seq_dic['#' + clone_order[C2]]

                    Share = []
                    Unique = []
                    c = 0
                    while c < num_sites:
                        if CNV[c] == 'normal':
                            if Clo1[c] == 'T' and Clo2[c] == 'T':
                                Share.append(ObsFre[c])
                                outAncAll += tumor + '\t' + clone_order[
                                    C1] + '-' + clone_order[C2] + '\t' + str(
                                        c) + '\tShare\t' + str(
                                            ObsFre[c]) + '\n'
                            elif Clo1[c] == 'T' and Clo2[c] == 'A':
                                Unique.append(ObsFre[c])
                                outAncAll += tumor + '\t' + clone_order[
                                    C1] + '-' + clone_order[C2] + '\t' + str(
                                        c) + '\tUnique\t' + str(
                                            ObsFre[c]) + '\n'

                        c += 1
                    if (len(Share) < 3
                            or len(Unique) < 3) or (len(Share) < Min_num
                                                    or len(Unique) < Min_num):
                        P = 1
                    else:
                        P = scipy.stats.ttest_ind(Share,
                                                  Unique,
                                                  equal_var=False)

                        P = P[-1]
                    if P > Cut:
                        if clone_order[C1].find('Clu') != -1 and clone_order[
                                C2].find('Clu') == -1:
                            InsigLs.append(clone_order[C1])
                        else:
                            InsigLs.append(clone_order[C2])

                    C2 += 1

                C1 += 1
            InsigLs = list(set(InsigLs))
            if InsigLs != []: print 'insignificant clones', tumor, InsigLs
            new_clone_fre_in = {}
            for Clo in Clo2Fre:
                if Clo2Fre[Clo] > 0 and InsigLs.count(Clo) == 0:
                    new_clone_fre_in[Clo] = Clo2Fre[Clo]
                    new_clone_seq_dic['#' + Clo] = clone_seq_dic['#' + Clo]
            new_clone_freq['T-' + tumor] = new_clone_fre_in
        new_seq_builder = Align.UpMeg(new_clone_seq_dic, [])

        return new_seq_builder, new_clone_freq
    def get_candidate_decomposed_clones(self, target_tumor):
        Align = MegaAlignment()
        CluInf_tu = self.ClusterInfo  #[target_tumor]
        NameOrder, Name2Seq = Align.name2seq(CluInf_tu[2])
        #  print target_tumor, CluInf_tu[0],CluInf_tu[1]
        HitCloCluLs = CluInf_tu[1]  #['T-'+target_tumor]
        TuIdentical_seq = self.identical_seq['T-' + target_tumor]
        LenSeq = len(Name2Seq[NameOrder[0]])
        TuSeq = self.T2Seq['#' + target_tumor]
        Clu2center = CluInf_tu[0]
        SigCluLs = []
        HitCloLs = []
        HitCloSeq_dic = {}
        RootClu = ''
        LarCen = 0.0
        for Hit in HitCloCluLs:
            if HitCloCluLs[Hit] > 0.02:
                if Hit[:len(target_tumor +
                            'Clu')] == target_tumor + 'Clu' and Hit.find(
                                'REP') == -1:
                    SigCluLs.append(Hit)
                    CluName = 'Clu' + Hit.split('Clu')[-1]
                    Center = float(Clu2center[CluName].split('-')[0])

                    for CluN in Clu2center:
                        Center2 = float(Clu2center[CluN].split('-')[0])
                        Sign = Clu2center[CluN].split('-')[1]
                        if Center == Center2 and CluName != CluN:
                            SigCluLs.append(target_tumor + CluN)
                        if LarCen < Center2:  # and Sign=='Pos':  #Pos for middle cut, Neg for K-means
                            LarCen = Center2
                            if Center == Center2: RootClu = target_tumor + CluN
                        elif LarCen <= Center2 and Sign == 'Pos':  #Pos for middle cut, Neg for K-means
                            LarCen = Center2
                            if Center == Center2: RootClu = target_tumor + CluN

                else:
                    HitCloLs.append(Hit)
                    HitCloSeq_dic['#' + Hit] = Name2Seq['#' + Hit]

    #  print 'cluls0',SigCluLs, HitCloLs, RootClu
        if RootClu != '':
            SigCluLs.remove(RootClu)
            HitCloLs.append(RootClu)
        # print 'cluls',SigCluLs, HitCloLs, RootClu
        if SigCluLs != []:
            CluCombLs, IDend = self.combinations([], SigCluLs, 0, {})
        else:
            CluCombLs = {}
        #  print CluCombLs
        if RootClu != '' or CluCombLs != {}:
            print 'make cluster comb'
            CloCan2Seq = {}
            Got_Candidate = 'n'
            for Root in HitCloLs:
                RootSeq = Name2Seq['#' + Root]
                LenSeq = len(RootSeq)
                RootMut = Align.GetMutPos(RootSeq)
                CloCan2Seq['#' + Root] = RootSeq
                Got_Candidate = 'y'
                if CluCombLs != {}:
                    for ID in CluCombLs:
                        CluLs = CluCombLs[ID]
                        #   print 'try make combo',Root,CluLs
                        CluN = ''
                        MutPosLs = []
                        for Clu in CluLs:
                            Seq = Name2Seq['#' + Clu]
                            CluMut = Align.GetMutPos(Seq)
                            MutPosLs += CluMut
                            CluN += Clu.replace(target_tumor + 'Clu', 'Clu')

                        MutPosLs = list(set(MutPosLs))
                        Go = 'y'
                        for Mut in MutPosLs:
                            if RootMut.count(Mut) != 0: Go = 'n'

                        if Go == 'y':
                            AllMutPosLs = MutPosLs + RootMut
                            Seq = Align.ModSeq('A' * LenSeq, AllMutPosLs, 'T',
                                               LenSeq)
                            Redun_ls = Align.find_redundant(Seq, HitCloSeq_dic)

                            if Redun_ls == []:
                                CloCan2Seq['#' + target_tumor + Root.replace(
                                    target_tumor + 'Clu', 'Clu') + CluN] = Seq
                                Got_Candidate = 'y'

            if Got_Candidate == 'y':
                Can_list = CloCan2Seq.keys()
                #   print 'find the good comb',Can_list

                new_seq = Align.UpMeg(CloCan2Seq, Can_list)
                alt_frequency = []
                CNVls = self.CNV_info[target_tumor]
                Len = len(CNVls)
                c = 0
                TuMatPosi = []
                tumor_genotype = ''
                while c < Len:
                    if CNVls[c] == 'normal':
                        alt_frequency.append(self.v_obs[target_tumor][c])
                        if self.v_obs[target_tumor][c] > 0:
                            TuMatPosi.append(c)
                            tumor_genotype += 'T'
                        else:
                            tumor_genotype += 'A'
                    c += 1

                clone_frequency = CloneFrequencyComputer_cnv1({}, {}, {},
                                                              self.freq_cutoff,
                                                              {})

                MutWildAlleleCount_noCNV = clone_frequency.make_mut_wild_allele_count_noCNV(
                    {}, Can_list,
                    CloCan2Seq)  #PreAbsCNV, clone_order, SNV_seq, Tu2CloFre
                Cmatrix_noCNV, Cmatrix_noCNV_dic = clone_frequency.make_Min(
                    Can_list, CloCan2Seq, MutWildAlleleCount_noCNV)
                Clone2Freq = clone_frequency.do_nnls0(Cmatrix_noCNV, Can_list,
                                                      alt_frequency)

                out2 = ['#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' ']
                AllMut = []
                NewClone2Freq = {}
                CluHit = 'n'
                for Clo0 in Clone2Freq:
                    NewClone2Freq[Clo0] = Clone2Freq[Clo0]
                    if Clone2Freq[Clo0] > 0.02:

                        SeqMutPos = Align.GetMutPos(CloCan2Seq['#' + Clo0])
                        TuSeq = 'y'
                        for Mut in SeqMutPos:
                            if TuMatPosi.count(Mut) != 0: AllMut.append(Mut)
                        for Mut in TuMatPosi:
                            if SeqMutPos.count(Mut) == 0: TuSeq = 'n'
                        Iden = 'n'
                        for OriClo in self.OriAnc2Seq0:
                            c = 0
                            Dif = 'n'
                            while c < Len:
                                if self.OriAnc2Seq0[OriClo][c] != CloCan2Seq[
                                        '#' + Clo0][c]:
                                    Dif = 'y'
                                c += 1
                            if Dif == 'n': Iden = OriClo
                        if Iden != 'n':
                            out2 += [Iden, self.OriAnc2Seq0[Iden]]
                            NewClone2Freq[Iden[1:]] = Clone2Freq[Clo0]
                            NewClone2Freq[Clo0] = 0
                        elif TuSeq == 'n':

                            out2 += [
                                '#' + Clo0.replace(target_tumor + target_tumor,
                                                   target_tumor),
                                CloCan2Seq['#' + Clo0]
                            ]
                            if Clo0.find('Clu') != -1 and Clo0.find(
                                    'REP') == -1:
                                CluHit = 'y'
                        else:
                            out2 += [
                                '#' + target_tumor, CloCan2Seq['#' + Clo0]
                            ]
                            NewClone2Freq[target_tumor] = Clone2Freq[Clo0]
                            NewClone2Freq[Clo0] = 0
                AllMut = list(set(AllMut))
                if len(AllMut) < len(TuMatPosi):
                    out2 += ['#' + target_tumor, tumor_genotype]
                if CluHit == 'y':

                    #  print 'Decomposed!'	,target_tumor,NewClone2Freq,out2
                    return out2, NewClone2Freq

        return [], {}