def get_candidate_decomposed_clones(self, target_tumor, CluInf_tu,Tuseq): Align = MegaAlignment() NameOrder, Name2Seq = Align.name2seq(CluInf_tu[0]) LenSeq = len(Name2Seq[NameOrder[0]]) SigCluLs=[] for Name in NameOrder: #Root is the first cluster or initial candidate clone if Name!='#Clu0' and Name.find('Clu')!=-1: SigCluLs.append(Name) CluCombLs,IDend=self.combinations([],SigCluLs,0,{}) print(target_tumor,'make cluster comb',SigCluLs,CluCombLs,NameOrder) if CluCombLs!={}: CloCan2Seq={} Got_Candidate='n' for Root in NameOrder: #Root is the first cluster or initial candidate clone if Root=='#Clu0' or Root.find('Clu')==-1: RootSeq=Name2Seq[Root] if Root=='#Clu0': CloCan2Seq['#'+target_tumor+'Clu0']=RootSeq #Root is candidate clone RootMut=Align.GetMutPos(RootSeq) Got_Candidate='y' if CluCombLs!={}: for ID in CluCombLs: CluLs=CluCombLs[ID] CluN='' MutPosLs=[] for Clu in CluLs: Seq=Name2Seq[Clu] CluMut=Align.GetMutPos(Seq) MutPosLs+= CluMut CluN+=Clu.replace('#','') Good='y' for Mut in MutPosLs: if RootMut.count(Mut)!=0: Good='n' if Good=='y': AllMutPosLs=MutPosLs+RootMut Seq=Align.ModSeq('A'*LenSeq,AllMutPosLs,'T',LenSeq) Redun_ls=Align.find_redundant(Seq,self.clone_seq) #all other clones #### if Redun_ls==[]: CloCan2Seq['#'+target_tumor+Root.replace('#','')+CluN]=Seq if CloCan2Seq!={}: CloCan2Seq.update(self.clone_seq) Can_list=list(CloCan2Seq.keys()) new_seq = Align.UpMeg(CloCan2Seq,Can_list) clone_frequency_combo = CloneFrequencyComputer_cnv1(new_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff) clone_frequency_combo.regress_cnv() CluComboHit=self.findcombohit(clone_frequency_combo.hitclone_seq_builder) if CluComboHit=='y': print('test the quality of clustercombo, by removing tumor seq (if any)') hit_seq_ls,hit_seq_dic=Align.name2seq(clone_frequency_combo.hitclone_seq_builder) Tuseq_ls=Align.find_redundant(Tuseq,hit_seq_dic) if Tuseq_ls==[]: print('tumor genotype did not hit, so clustercombo is good') return clone_frequency_combo.hitclone_seq_builder,Tuseq else: print('tumor genotype was hit, so test if clustercombo still hit without tumor genotype: testing if clustercombo genotypes fit well') Tuseq_ls=Align.find_redundant(Tuseq,CloCan2Seq) sub_hit_seq=[] for seqname in CloCan2Seq: if Tuseq_ls.count(seqname)==0:sub_hit_seq+=[seqname,CloCan2Seq[seqname]] clone_frequency_combo_new = CloneFrequencyComputer_cnv1(sub_hit_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff) clone_frequency_combo_new.regress_cnv() CluComboHit=self.findcombohit(clone_frequency_combo_new.hitclone_seq_builder) if CluComboHit=='y': return clone_frequency_combo_new.hitclone_seq_builder,Tuseq else: return CluInf_tu[0],'' else: return CluInf_tu[0] ,'' else: return CluInf_tu[0],'' return CluInf_tu[0],''
def BranchDecClone(self, seq_list, clone_frequency, Tu2CNV): Align = MegaAlignment() TumorSampleExtract = tsp_information(self.tsp_list) CloFreAna = CloneFrequencyAnalizer() CloOrder, Clo2Seq = Align.name2seq(seq_list) Align.save_mega_alignment_to_file('Test.meg', seq_list) tree_builder = MegaMP() tree_builder.mao_file = self.mao_file id = 'branchdec_mega_alignment' status = tree_builder.do_mega_mp(seq_list, id) if status == True: seqs_with_ancestor, tree, nade_map, mask_seq, Good_posi_info = tree_builder.alignment_least_back_parallel_muts( True ) # True will execute code to remove redundant seqs (True is default) else: print 'failed to run megaMP' BadPosiLs = [] #multiple mutations BadPosi2ChnageCloLs = {} for c in Good_posi_info: Posi_Inf = Good_posi_info[c] if Posi_Inf != ['Good']: if Posi_Inf[0] == 'ToWild': BadPosiLs.append(c) BadPosi2ChnageCloLs[c] = Posi_Inf[1][0] print 'bad positions', BadPosiLs #,BadPosi2ChnageCloLs if BadPosiLs != []: NewT2C2F = {} NewT2Cls = {} for Tu in clone_frequency: NewC2F = {} single_tsp_list = TumorSampleExtract.make_single_tsp_list(Tu) CloFreDic = clone_frequency[Tu] CNV = Tu2CNV[Tu[2:]] Tu = Tu[2:] TuSeq = self.tumor2seq['#' + Tu] NewCloLs = [] NewCloLs1 = [] for Clo in CloFreDic: #original hit clo for the tumor ChangeOptions = 'n' # print Tu,CloFreDic if CloFreDic[Clo] > 0: CSeq0 = Clo2Seq['#' + Clo] ChangePosi = [] #list to fix multiple mutaitons NewBadPosi = [ ] #remove fixed multiple mutations from BadExtMutPosi for Bad in BadPosi2ChnageCloLs: if BadPosi2ChnageCloLs[Bad].count( '#' + Clo) != 0 and (CNV[Bad] == 'normal' or CNV[Bad] == 'Bad-normal'): Change = 'n' for Oth in CloFreDic: #find multiple mutations at the external branch if Oth != Clo and CloFreDic[Oth] > 0: Soth = Clo2Seq['#' + Oth] if Soth[Bad] == 'T' and BadPosi2ChnageCloLs[ Bad].count('#' + Oth) == 0: Change = 'y' if Change == 'y': ChangePosi.append(Bad) else: NewBadPosi.append(Bad) print 'change positions', Tu, ChangePosi if ChangePosi != []: #fix multiple mutaitons # print 'hhh' CutCloSeq = Align.ModSeq(CSeq0, ChangePosi, 'A', self.Len) NewCloLs.append(Clo + 'Cut' + Tu) NewC2F[Clo + 'Cut' + Tu] = CloFreDic[Clo] Clo2Seq['#' + Clo + 'Cut' + Tu] = CutCloSeq ChangeOptions = 'y' if ChangeOptions == 'n': NewC2F[Clo] = 1 NewT2C2F[Tu] = NewC2F # print Clo2Seq hitseq_align, hitclone_frequency = CloFreAna.ListHitCloAndSeq( NewT2C2F, Clo2Seq) outSeqMaj, outSeqAmb, NewT2C2F = Align.CombSimClo( hitseq_align, hitclone_frequency, 0.0) # print outSeqMaj, NewT2C2F return outSeqMaj, NewT2C2F else: return seq_list, clone_frequency
def get_candidate_decomposed_clones(self, target_tumor): Align = MegaAlignment() CluInf_tu = self.ClusterInfo #[target_tumor] NameOrder, Name2Seq = Align.name2seq(CluInf_tu[2]) # print target_tumor, CluInf_tu[0],CluInf_tu[1] HitCloCluLs = CluInf_tu[1] #['T-'+target_tumor] TuIdentical_seq = self.identical_seq['T-' + target_tumor] LenSeq = len(Name2Seq[NameOrder[0]]) TuSeq = self.T2Seq['#' + target_tumor] Clu2center = CluInf_tu[0] SigCluLs = [] HitCloLs = [] HitCloSeq_dic = {} RootClu = '' LarCen = 0.0 for Hit in HitCloCluLs: if HitCloCluLs[Hit] > 0.02: if Hit[:len(target_tumor + 'Clu')] == target_tumor + 'Clu' and Hit.find( 'REP') == -1: SigCluLs.append(Hit) CluName = 'Clu' + Hit.split('Clu')[-1] Center = float(Clu2center[CluName].split('-')[0]) for CluN in Clu2center: Center2 = float(Clu2center[CluN].split('-')[0]) Sign = Clu2center[CluN].split('-')[1] if Center == Center2 and CluName != CluN: SigCluLs.append(target_tumor + CluN) if LarCen < Center2: # and Sign=='Pos': #Pos for middle cut, Neg for K-means LarCen = Center2 if Center == Center2: RootClu = target_tumor + CluN elif LarCen <= Center2 and Sign == 'Pos': #Pos for middle cut, Neg for K-means LarCen = Center2 if Center == Center2: RootClu = target_tumor + CluN else: HitCloLs.append(Hit) HitCloSeq_dic['#' + Hit] = Name2Seq['#' + Hit] # print 'cluls0',SigCluLs, HitCloLs, RootClu if RootClu != '': SigCluLs.remove(RootClu) HitCloLs.append(RootClu) # print 'cluls',SigCluLs, HitCloLs, RootClu if SigCluLs != []: CluCombLs, IDend = self.combinations([], SigCluLs, 0, {}) else: CluCombLs = {} # print CluCombLs if RootClu != '' or CluCombLs != {}: print 'make cluster comb' CloCan2Seq = {} Got_Candidate = 'n' for Root in HitCloLs: RootSeq = Name2Seq['#' + Root] LenSeq = len(RootSeq) RootMut = Align.GetMutPos(RootSeq) CloCan2Seq['#' + Root] = RootSeq Got_Candidate = 'y' if CluCombLs != {}: for ID in CluCombLs: CluLs = CluCombLs[ID] # print 'try make combo',Root,CluLs CluN = '' MutPosLs = [] for Clu in CluLs: Seq = Name2Seq['#' + Clu] CluMut = Align.GetMutPos(Seq) MutPosLs += CluMut CluN += Clu.replace(target_tumor + 'Clu', 'Clu') MutPosLs = list(set(MutPosLs)) Go = 'y' for Mut in MutPosLs: if RootMut.count(Mut) != 0: Go = 'n' if Go == 'y': AllMutPosLs = MutPosLs + RootMut Seq = Align.ModSeq('A' * LenSeq, AllMutPosLs, 'T', LenSeq) Redun_ls = Align.find_redundant(Seq, HitCloSeq_dic) if Redun_ls == []: CloCan2Seq['#' + target_tumor + Root.replace( target_tumor + 'Clu', 'Clu') + CluN] = Seq Got_Candidate = 'y' if Got_Candidate == 'y': Can_list = CloCan2Seq.keys() # print 'find the good comb',Can_list new_seq = Align.UpMeg(CloCan2Seq, Can_list) alt_frequency = [] CNVls = self.CNV_info[target_tumor] Len = len(CNVls) c = 0 TuMatPosi = [] tumor_genotype = '' while c < Len: if CNVls[c] == 'normal': alt_frequency.append(self.v_obs[target_tumor][c]) if self.v_obs[target_tumor][c] > 0: TuMatPosi.append(c) tumor_genotype += 'T' else: tumor_genotype += 'A' c += 1 clone_frequency = CloneFrequencyComputer_cnv1({}, {}, {}, self.freq_cutoff, {}) MutWildAlleleCount_noCNV = clone_frequency.make_mut_wild_allele_count_noCNV( {}, Can_list, CloCan2Seq) #PreAbsCNV, clone_order, SNV_seq, Tu2CloFre Cmatrix_noCNV, Cmatrix_noCNV_dic = clone_frequency.make_Min( Can_list, CloCan2Seq, MutWildAlleleCount_noCNV) Clone2Freq = clone_frequency.do_nnls0(Cmatrix_noCNV, Can_list, alt_frequency) out2 = ['#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' '] AllMut = [] NewClone2Freq = {} CluHit = 'n' for Clo0 in Clone2Freq: NewClone2Freq[Clo0] = Clone2Freq[Clo0] if Clone2Freq[Clo0] > 0.02: SeqMutPos = Align.GetMutPos(CloCan2Seq['#' + Clo0]) TuSeq = 'y' for Mut in SeqMutPos: if TuMatPosi.count(Mut) != 0: AllMut.append(Mut) for Mut in TuMatPosi: if SeqMutPos.count(Mut) == 0: TuSeq = 'n' Iden = 'n' for OriClo in self.OriAnc2Seq0: c = 0 Dif = 'n' while c < Len: if self.OriAnc2Seq0[OriClo][c] != CloCan2Seq[ '#' + Clo0][c]: Dif = 'y' c += 1 if Dif == 'n': Iden = OriClo if Iden != 'n': out2 += [Iden, self.OriAnc2Seq0[Iden]] NewClone2Freq[Iden[1:]] = Clone2Freq[Clo0] NewClone2Freq[Clo0] = 0 elif TuSeq == 'n': out2 += [ '#' + Clo0.replace(target_tumor + target_tumor, target_tumor), CloCan2Seq['#' + Clo0] ] if Clo0.find('Clu') != -1 and Clo0.find( 'REP') == -1: CluHit = 'y' else: out2 += [ '#' + target_tumor, CloCan2Seq['#' + Clo0] ] NewClone2Freq[target_tumor] = Clone2Freq[Clo0] NewClone2Freq[Clo0] = 0 AllMut = list(set(AllMut)) if len(AllMut) < len(TuMatPosi): out2 += ['#' + target_tumor, tumor_genotype] if CluHit == 'y': # print 'Decomposed!' ,target_tumor,NewClone2Freq,out2 return out2, NewClone2Freq return [], {}