def find_new_clone(self, new_seq_buil, old_seq_buil): Align = MegaAlignment() Ls, New_dic = Align.name2seq(new_seq_buil) Ls, Old_dic = Align.name2seq(old_seq_buil) # print 'old list',Old_dic.keys(), '\nnew list',New_dic.keys() Iden = 'y' for Clo in New_dic: if Clo != '#hg19': NewSeq = New_dic[Clo] Redun = Align.find_redundant(NewSeq, Old_dic) if Redun == []: Iden = 'n' # print 'new seq',Clo return Iden
def get_decomposed_seq(self): Align=MegaAlignment() TuLs, Tu2Seq = Align.name2seq(self.tumor_seqs) print('make SNV clusters') clusters = SNPClusterGenerator_cnv1(self.ini_seq_builder, self.v_obs, self.Tu2CloFre, self._CNV_file, self.freq_cutoff) Tumor_cluster_dic = clusters.cluster_cnv() #Tu2Cluster={tumor:[[seq_builder,{tumor:{clone frequency}}]]} print('Decompose incorrect sample genotype clones') AllhitWithDecom={} All_convol_tuseq=[] DecomLs=[] for Tu in Tumor_cluster_dic: ClusterInfo = Tumor_cluster_dic[Tu] if ClusterInfo != []: HitWithDecomSeq_build,convol_tuseq = self.get_candidate_decomposed_clones(Tu,ClusterInfo,Tu2Seq['#'+Tu]) if convol_tuseq!='': A1,HitWithDecomSeq_dic=Align.name2seq(HitWithDecomSeq_build) AllhitWithDecom.update(HitWithDecomSeq_dic) All_convol_tuseq.append(convol_tuseq) DecomLs.append(Tu) else: Original_hit_seq_dic = self.extract_hitseq(self.ini_seq_builder,self.Tu2CloFre['T-'+Tu],self.freq_cutoff) AllhitWithDecom.update(Original_hit_seq_dic) else: Original_hit_seq_dic = self.extract_hitseq(self.ini_seq_builder,self.Tu2CloFre['T-'+Tu],self.freq_cutoff) AllhitWithDecom.update(Original_hit_seq_dic) if DecomLs==[]: return self.clone_seq,'no decomposed clone was made' else: for ConvTuSeq in All_convol_tuseq: Redun_ls=Align.find_redundant(ConvTuSeq,AllhitWithDecom) if Redun_ls!=[]: return self.clone_seq,'tumor genotype that was decomposed was hit in different tumor: failed decomposition' return AllhitWithDecom,'decomposed'+str(DecomLs)
def get_candidate_decomposed_clones(self, target_tumor, CluInf_tu,Tuseq): Align = MegaAlignment() NameOrder, Name2Seq = Align.name2seq(CluInf_tu[0]) LenSeq = len(Name2Seq[NameOrder[0]]) SigCluLs=[] for Name in NameOrder: #Root is the first cluster or initial candidate clone if Name!='#Clu0' and Name.find('Clu')!=-1: SigCluLs.append(Name) CluCombLs,IDend=self.combinations([],SigCluLs,0,{}) print(target_tumor,'make cluster comb',SigCluLs,CluCombLs,NameOrder) if CluCombLs!={}: CloCan2Seq={} Got_Candidate='n' for Root in NameOrder: #Root is the first cluster or initial candidate clone if Root=='#Clu0' or Root.find('Clu')==-1: RootSeq=Name2Seq[Root] if Root=='#Clu0': CloCan2Seq['#'+target_tumor+'Clu0']=RootSeq #Root is candidate clone RootMut=Align.GetMutPos(RootSeq) Got_Candidate='y' if CluCombLs!={}: for ID in CluCombLs: CluLs=CluCombLs[ID] CluN='' MutPosLs=[] for Clu in CluLs: Seq=Name2Seq[Clu] CluMut=Align.GetMutPos(Seq) MutPosLs+= CluMut CluN+=Clu.replace('#','') Good='y' for Mut in MutPosLs: if RootMut.count(Mut)!=0: Good='n' if Good=='y': AllMutPosLs=MutPosLs+RootMut Seq=Align.ModSeq('A'*LenSeq,AllMutPosLs,'T',LenSeq) Redun_ls=Align.find_redundant(Seq,self.clone_seq) #all other clones #### if Redun_ls==[]: CloCan2Seq['#'+target_tumor+Root.replace('#','')+CluN]=Seq if CloCan2Seq!={}: CloCan2Seq.update(self.clone_seq) Can_list=list(CloCan2Seq.keys()) new_seq = Align.UpMeg(CloCan2Seq,Can_list) clone_frequency_combo = CloneFrequencyComputer_cnv1(new_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff) clone_frequency_combo.regress_cnv() CluComboHit=self.findcombohit(clone_frequency_combo.hitclone_seq_builder) if CluComboHit=='y': print('test the quality of clustercombo, by removing tumor seq (if any)') hit_seq_ls,hit_seq_dic=Align.name2seq(clone_frequency_combo.hitclone_seq_builder) Tuseq_ls=Align.find_redundant(Tuseq,hit_seq_dic) if Tuseq_ls==[]: print('tumor genotype did not hit, so clustercombo is good') return clone_frequency_combo.hitclone_seq_builder,Tuseq else: print('tumor genotype was hit, so test if clustercombo still hit without tumor genotype: testing if clustercombo genotypes fit well') Tuseq_ls=Align.find_redundant(Tuseq,CloCan2Seq) sub_hit_seq=[] for seqname in CloCan2Seq: if Tuseq_ls.count(seqname)==0:sub_hit_seq+=[seqname,CloCan2Seq[seqname]] clone_frequency_combo_new = CloneFrequencyComputer_cnv1(sub_hit_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff) clone_frequency_combo_new.regress_cnv() CluComboHit=self.findcombohit(clone_frequency_combo_new.hitclone_seq_builder) if CluComboHit=='y': return clone_frequency_combo_new.hitclone_seq_builder,Tuseq else: return CluInf_tu[0],'' else: return CluInf_tu[0] ,'' else: return CluInf_tu[0],'' return CluInf_tu[0],''
def get_candidate_decomposed_clones(self, target_tumor): Align = MegaAlignment() CluInf_tu = self.ClusterInfo #[target_tumor] NameOrder, Name2Seq = Align.name2seq(CluInf_tu[2]) # print target_tumor, CluInf_tu[0],CluInf_tu[1] HitCloCluLs = CluInf_tu[1] #['T-'+target_tumor] TuIdentical_seq = self.identical_seq['T-' + target_tumor] LenSeq = len(Name2Seq[NameOrder[0]]) TuSeq = self.T2Seq['#' + target_tumor] Clu2center = CluInf_tu[0] SigCluLs = [] HitCloLs = [] HitCloSeq_dic = {} RootClu = '' LarCen = 0.0 for Hit in HitCloCluLs: if HitCloCluLs[Hit] > 0.02: if Hit[:len(target_tumor + 'Clu')] == target_tumor + 'Clu' and Hit.find( 'REP') == -1: SigCluLs.append(Hit) CluName = 'Clu' + Hit.split('Clu')[-1] Center = float(Clu2center[CluName].split('-')[0]) for CluN in Clu2center: Center2 = float(Clu2center[CluN].split('-')[0]) Sign = Clu2center[CluN].split('-')[1] if Center == Center2 and CluName != CluN: SigCluLs.append(target_tumor + CluN) if LarCen < Center2: # and Sign=='Pos': #Pos for middle cut, Neg for K-means LarCen = Center2 if Center == Center2: RootClu = target_tumor + CluN elif LarCen <= Center2 and Sign == 'Pos': #Pos for middle cut, Neg for K-means LarCen = Center2 if Center == Center2: RootClu = target_tumor + CluN else: HitCloLs.append(Hit) HitCloSeq_dic['#' + Hit] = Name2Seq['#' + Hit] # print 'cluls0',SigCluLs, HitCloLs, RootClu if RootClu != '': SigCluLs.remove(RootClu) HitCloLs.append(RootClu) # print 'cluls',SigCluLs, HitCloLs, RootClu if SigCluLs != []: CluCombLs, IDend = self.combinations([], SigCluLs, 0, {}) else: CluCombLs = {} # print CluCombLs if RootClu != '' or CluCombLs != {}: print 'make cluster comb' CloCan2Seq = {} Got_Candidate = 'n' for Root in HitCloLs: RootSeq = Name2Seq['#' + Root] LenSeq = len(RootSeq) RootMut = Align.GetMutPos(RootSeq) CloCan2Seq['#' + Root] = RootSeq Got_Candidate = 'y' if CluCombLs != {}: for ID in CluCombLs: CluLs = CluCombLs[ID] # print 'try make combo',Root,CluLs CluN = '' MutPosLs = [] for Clu in CluLs: Seq = Name2Seq['#' + Clu] CluMut = Align.GetMutPos(Seq) MutPosLs += CluMut CluN += Clu.replace(target_tumor + 'Clu', 'Clu') MutPosLs = list(set(MutPosLs)) Go = 'y' for Mut in MutPosLs: if RootMut.count(Mut) != 0: Go = 'n' if Go == 'y': AllMutPosLs = MutPosLs + RootMut Seq = Align.ModSeq('A' * LenSeq, AllMutPosLs, 'T', LenSeq) Redun_ls = Align.find_redundant(Seq, HitCloSeq_dic) if Redun_ls == []: CloCan2Seq['#' + target_tumor + Root.replace( target_tumor + 'Clu', 'Clu') + CluN] = Seq Got_Candidate = 'y' if Got_Candidate == 'y': Can_list = CloCan2Seq.keys() # print 'find the good comb',Can_list new_seq = Align.UpMeg(CloCan2Seq, Can_list) alt_frequency = [] CNVls = self.CNV_info[target_tumor] Len = len(CNVls) c = 0 TuMatPosi = [] tumor_genotype = '' while c < Len: if CNVls[c] == 'normal': alt_frequency.append(self.v_obs[target_tumor][c]) if self.v_obs[target_tumor][c] > 0: TuMatPosi.append(c) tumor_genotype += 'T' else: tumor_genotype += 'A' c += 1 clone_frequency = CloneFrequencyComputer_cnv1({}, {}, {}, self.freq_cutoff, {}) MutWildAlleleCount_noCNV = clone_frequency.make_mut_wild_allele_count_noCNV( {}, Can_list, CloCan2Seq) #PreAbsCNV, clone_order, SNV_seq, Tu2CloFre Cmatrix_noCNV, Cmatrix_noCNV_dic = clone_frequency.make_Min( Can_list, CloCan2Seq, MutWildAlleleCount_noCNV) Clone2Freq = clone_frequency.do_nnls0(Cmatrix_noCNV, Can_list, alt_frequency) out2 = ['#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' '] AllMut = [] NewClone2Freq = {} CluHit = 'n' for Clo0 in Clone2Freq: NewClone2Freq[Clo0] = Clone2Freq[Clo0] if Clone2Freq[Clo0] > 0.02: SeqMutPos = Align.GetMutPos(CloCan2Seq['#' + Clo0]) TuSeq = 'y' for Mut in SeqMutPos: if TuMatPosi.count(Mut) != 0: AllMut.append(Mut) for Mut in TuMatPosi: if SeqMutPos.count(Mut) == 0: TuSeq = 'n' Iden = 'n' for OriClo in self.OriAnc2Seq0: c = 0 Dif = 'n' while c < Len: if self.OriAnc2Seq0[OriClo][c] != CloCan2Seq[ '#' + Clo0][c]: Dif = 'y' c += 1 if Dif == 'n': Iden = OriClo if Iden != 'n': out2 += [Iden, self.OriAnc2Seq0[Iden]] NewClone2Freq[Iden[1:]] = Clone2Freq[Clo0] NewClone2Freq[Clo0] = 0 elif TuSeq == 'n': out2 += [ '#' + Clo0.replace(target_tumor + target_tumor, target_tumor), CloCan2Seq['#' + Clo0] ] if Clo0.find('Clu') != -1 and Clo0.find( 'REP') == -1: CluHit = 'y' else: out2 += [ '#' + target_tumor, CloCan2Seq['#' + Clo0] ] NewClone2Freq[target_tumor] = Clone2Freq[Clo0] NewClone2Freq[Clo0] = 0 AllMut = list(set(AllMut)) if len(AllMut) < len(TuMatPosi): out2 += ['#' + target_tumor, tumor_genotype] if CluHit == 'y': # print 'Decomposed!' ,target_tumor,NewClone2Freq,out2 return out2, NewClone2Freq return [], {}