def remove_ancestral_decomposed(self, remove_tumor_and_rename_decomposed_seq, Error_rate, tumor_seqs): # print 'Tu',tumor_seqs Align = MegaAlignment() SeqOrderIni, Meg2Seq = Align.name2seq( remove_tumor_and_rename_decomposed_seq) TuLs, Tu2Seq = Align.name2seq(tumor_seqs) good_seq = ['#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' '] RmCluClo = [] for name1 in SeqOrderIni: if name1.find('Clu') != -1: seq1 = Meg2Seq[name1] for name2 in SeqOrderIni: if name2 != '#hg19' and name1 != name2: seq2 = Meg2Seq[name2] Additional_mut_num1 = Align.CountAdditionalMut( seq1, seq2) Der = 1.0 * Additional_mut_num1 / len(seq1) if name2.find('Clu') != -1: if Additional_mut_num1 == 0: RmCluClo.append(name1) else: if Der < Error_rate: RmCluClo.append(name1) AddedTuLs = [] for Name in SeqOrderIni: if RmCluClo.count(Name) == 0: good_seq += [Name, Meg2Seq[Name]] AddedTuLs.append(Name.split('Clu')[0]) for Tu in TuLs: if AddedTuLs.count(Tu) == 0: good_seq += [Tu, Tu2Seq[Tu]] good_seq += ['#hg19', 'A' * len(seq1)] return good_seq
def __init__(self, seqs, num_support_position, Cell2PPls, initial_seq_builder, OutFileName): self.cut = num_support_position Align = MegaAlignment() self.ini_seqs_builder = seqs self.CellLs, self.Cell2Seq = Align.name2seq(seqs) self.SNVnum = len(self.Cell2Seq[self.CellLs[0]]) self.InMeg = Align.AddNormal(seqs) IniCellLs, self.Cell2iniSeq = Align.name2seq(initial_seq_builder) self.Cell2PPls = Cell2PPls self.out_file_name = OutFileName
def remove_tumor_and_rename_decomposed( self, tumor2seqs_with_decompose, seqs_with_ancestor, tumor_seqs, REP, clone_frequency_for_seqs_with_ancestor): Align = MegaAlignment() SeqOrderIni, IniMeg2Seq = Align.name2seq(seqs_with_ancestor) TuLs, TuMeg2Seq = Align.name2seq(tumor_seqs) SNVNum = len(TuMeg2Seq[TuLs[0]]) IdenLs = Align.identify_similar_seq(tumor_seqs, 0) Tu2IdenTu = Align.make_similar_seq_dic(IdenLs) outAllSeq = ['MEGA', '!Title SNVs;', '!Format datatype=dna;', ' '] RmCloLs = [] for Tu in tumor2seqs_with_decompose: DeComCloLs = tumor2seqs_with_decompose[Tu] if DeComCloLs != []: IdenTu = Tu2IdenTu['T-' + Tu] RmCloLs += IdenTu RmCloLs = list(set(RmCloLs)) Done = [] for Tu in tumor2seqs_with_decompose: DeComCloLs = tumor2seqs_with_decompose[Tu] if RmCloLs.count(Tu) == 0: if Done.count('#' + Tu) == 0: outAllSeq += ['#' + Tu, TuMeg2Seq['#' + Tu]] Done.append('#' + Tu) if DeComCloLs != []: DecomCloOrder, Clu2Seq = Align.name2seq(DeComCloLs) for Clu in Clu2Seq: Seq = Clu2Seq[Clu] TuClu = Clu[1:].split('Clu')[0] Code = TuClu in RmCloLs if Code != True and Clu.find('#Node') == -1: if Clu.find('#Clu') != -1: Clu = '#' + Tu + Clu[1:] + 'REP' + str(REP) if Done.count(Clu) == 0: outAllSeq += [Clu, Seq] Done.append(Clu) else: HitCloLs = clone_frequency_for_seqs_with_ancestor['T-' + Tu] for Clo in HitCloLs: if HitCloLs[Clo] > 0: TuClo = Clo.split('Clu')[0] Code = TuClo in RmCloLs if Code != True and Clo[:4] != 'Node': if Done.count('#' + Clo) == 0: outAllSeq += ['#' + Clo, IniMeg2Seq['#' + Clo]] Done.append('#' + Clo) outAllSeq_without_redindant = Align.RmRedunSeq(outAllSeq) outAllSeq_without_redindant += ['#hg19', ('A' * SNVNum)] return outAllSeq_without_redindant
def EstimateSNVfre(self, Tu2CloFre, clone_seq0, ReadCount): Align = MegaAlignment() cloorder, clone_seq = Align.name2seq(clone_seq0) tumor2estSNV = {} tumor2diff = {} for tumor in Tu2CloFre: clone2frequency = Tu2CloFre[tumor] tumor = tumor.split('-')[-1] estSNVfreLs = [] DiffLs = [] snv_num = len(ReadCount[tumor + ':ref']) c = 0 while c < snv_num: estSNVfre = 0 for Clo in clone2frequency: S = clone_seq['#' + Clo] if str(clone2frequency[Clo]).find('e') != -1: F = 0 else: F = clone2frequency[Clo] / 2 if S[c] == 'T': estSNVfre += F estSNVfreLs.append(estSNVfre) Obs = 1.0 * float(ReadCount[tumor + ':alt'][c]) / ( float(ReadCount[tumor + ':alt'][c]) + float(ReadCount[tumor + ':ref'][c])) Dif = estSNVfre - Obs DiffLs.append(Dif) c += 1 tumor2estSNV[tumor] = estSNVfreLs tumor2diff[tumor] = DiffLs return tumor2estSNV, tumor2diff
def __init__(self, seqs_with_ancestor, v_obs, CNV_info, freq_cutoff): self.CutOff = freq_cutoff Align = MegaAlignment() self.ini_clone_order, self.ini_clone_seq = Align.name2seq( seqs_with_ancestor) self._CNV_file = CNV_info self.v_obs = v_obs
def findcombohit(self,seq_builder): Align=MegaAlignment() SeqLs,SeqDic=Align.name2seq(seq_builder) Find='n' for i in SeqLs: if i.find('Clu')!=-1: Find='y' return Find
def extract_hitseq(self,seq_buil,CloFre,Cut): Align=MegaAlignment() CloLs,Clo2Seq=Align.name2seq(seq_buil) Hit={} for Clo in CloFre: if CloFre[Clo]>Cut: Hit['#'+Clo]=Clo2Seq['#'+Clo] return Hit
def __init__(self, cluster_information, original_seq, tumor_seq, tsp_list, clone_frequency_cutoff, CNV_info, ReadCountTable): self.Tu2Cluster = cluster_information self.CNV_info = CNV_info self.ReadCountTable = ReadCountTable Align = MegaAlignment() self.OriAncOrder, self.OriAnc2Seq0 = Align.name2seq(original_seq) self.TOrder, self.T2Seq = Align.name2seq(tumor_seq) self.SharePosi = Align.GetSharePosi1(self.OriAnc2Seq0, 'T') self.all_tsp = tsp_information(tsp_list) self.CloFreCutOff = clone_frequency_cutoff self.v_obs = self.all_tsp.tumor2alt_frequency() identical_seq_list = Align.identify_similar_seq(tumor_seq, 0) self.identical_seq = Align.make_similar_seq_dic(identical_seq_list) self.freq_cutoff = self.CloFreCutOff
def finalize_results(self, decomposed_seq_builder, decomposed_Tumor2Clone_frequency, origianl_seq_builder, original_Tumor2Clone_frequency, REP): Align = MegaAlignment() Ls, DecomSeqDic = Align.name2seq(decomposed_seq_builder) # print Ls Ls, OriSeqDic = Align.name2seq(origianl_seq_builder) NewCloSeqDic = {} NewCloFre = {} # print decomposed_Tumor2Clone_frequency,original_Tumor2Clone_frequency for Tu in original_Tumor2Clone_frequency: if decomposed_Tumor2Clone_frequency.has_key(Tu) != True: CloFre = original_Tumor2Clone_frequency[Tu] for Clo in CloFre: if CloFre[Clo] > 0: NewCloSeqDic['#' + Clo] = OriSeqDic['#' + Clo] elif decomposed_Tumor2Clone_frequency[Tu] == {}: CloFre = original_Tumor2Clone_frequency[Tu] for Clo in CloFre: if CloFre[Clo] > 0: NewCloSeqDic['#' + Clo] = OriSeqDic['#' + Clo] else: CloFre0 = decomposed_Tumor2Clone_frequency[Tu] CloFre = {} for Clo in CloFre0: Fre = CloFre0[Clo] if Fre > 0: if (Clo.find('Clu') != -1 and Clo.find('REP') == -1) or Clo.find('REP' + str(REP - 1)) != -1: CloFre[Clo + 'REP' + str(REP)] = Fre NewCloSeqDic['#' + Clo + 'REP' + str(REP)] = DecomSeqDic['#' + Clo] else: CloFre[Clo] = Fre if OriSeqDic.has_key('#' + Clo) == True: NewCloSeqDic['#' + Clo] = OriSeqDic['#' + Clo] else: NewCloSeqDic['#' + Clo] = DecomSeqDic['#' + Clo] NewCloFre[Tu] = CloFre rename_seq_builder = Align.UpMeg(NewCloSeqDic, []) # open('AA','r').readlines() return rename_seq_builder, NewCloFre
def __init__(self, tumor_seq, tsp_list, mao_file): Align = MegaAlignment() self.tumor_list, self.tumor2seq = Align.name2seq(tumor_seq) self.Len = len(self.tumor2seq[self.tumor_list[0]]) self.mao_file = mao_file self.tsp_list = tsp_list TSPinfo = tsp_information(tsp_list) self.Tu2SNV = TSPinfo.tumor2alt_frequency()
def add_back_CNVSNV(self, DecomTu2Seq_builder_sub, CNV_information, original_seqs_builder_all, original_Tumor2Clone_frequency, tsp_list): all_tsp = tsp_information(tsp_list) v_obs = all_tsp.tumor2alt_frequency() Seq_all_dic = {} Align = MegaAlignment() Original_clols, Original_clodic_all = Align.name2seq( original_seqs_builder_all) for Tumor in DecomTu2Seq_builder_sub: Seq_builder_sub = DecomTu2Seq_builder_sub[Tumor] if Seq_builder_sub != []: SNVfre_list = v_obs[Tumor] CloLs, Clo2Seq = Align.name2seq(Seq_builder_sub) CNVinfo = CNV_information[Tumor] Len = len(CNVinfo) # print Tumor, Clo2Seq.keys() for Clo in Clo2Seq: Seq_sub = Clo2Seq[Clo] c_seq = 0 c_all = 0 Seq_all = '' while c_all < Len: if CNVinfo[c_all] == 'normal': Seq_all += Seq_sub[c_seq] c_seq += 1 else: if SNVfre_list[c_all] == 0: Seq_all += 'A' else: Seq_all += '?' c_all += 1 if Original_clodic_all.has_key(Clo) == True: Seq_all_dic[Clo] = Original_clodic_all[Clo] else: Seq_all_dic[Clo] = Seq_all else: CloFre = original_Tumor2Clone_frequency['T-' + Tumor] for Clo in CloFre: if CloFre[Clo] > 0: if Seq_all_dic.has_key('#' + Clo) != True: Seq_all_dic['#' + Clo] = Original_clodic_all['#' + Clo] decom_all_seq_builder = Align.UpMeg(Seq_all_dic, []) return decom_all_seq_builder
def ReNameCloFreMeg(self, seqs, CloFre, Name): Align = MegaAlignment() CloFreAnalize = CloneFrequencyAnalizer() NameOrder, Clo2Seq = Align.name2seq(seqs) if CloFre == {}: CloFre['T-A'] = {} for Clo in Clo2Seq: CloFre['T-A'][Clo[1:]] = 1 # print Clo2Seq,seqs Len = len(Clo2Seq[NameOrder[0]]) out = [ '#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' ', '#hg19', 'A' * Len ] TuLs = [] for Tu in CloFre: TuLs.append(Tu[3:]) TuLs.sort() Old2NewCloLs = {} Old2NewCloNum = {} CloOrder = [] Num = 1 for Tu in CloFre: Clo2Fre = CloFre[Tu] HitClo = [] for Clo in Clo2Fre: if Clo2Fre[Clo] > 0: HitClo.append(Clo) Tu = Tu[2:] C = 1 CloLs, Fre2Clo = CloFreAnalize.Sort(HitClo, Clo2Fre) #from large frequency for Clo in CloLs: Code = Clo in Old2NewCloLs if Code != True: Old2NewCloLs[Clo] = '' Old2NewCloNum[Clo] = 'Clone' + str(Num) CloOrder.append(Clo) Num += 1 Old2NewCloLs[Clo] += Tu + str(C) C += 1 if Name == 'list': Old2NewClo = Old2NewCloLs else: Old2NewClo = Old2NewCloNum NewCloOrder = [] NewT2C2F = {} for Clo in CloOrder: NewCloOrder.append(Old2NewClo[Clo]) out += ['#' + Old2NewClo[Clo], Clo2Seq['#' + Clo]] #+'\n' for Tu in CloFre: C2F = CloFre[Tu] NewC2F = {} for C in C2F: if C2F[C] > 0: NewC2F[Old2NewClo[C]] = C2F[C] NewT2C2F[Tu] = NewC2F return out, NewT2C2F, NewCloOrder
def find_decomposed_clone(self, no_back_para_mut_decomposed_seq, REP, Tree): Align = MegaAlignment() CloLs, Clo2Seq = Align.name2seq(no_back_para_mut_decomposed_seq) DecTipLs = [] DecLs = [] DecAncLs = [] RmDecom = [] for Clo in CloLs: # ClosestAnc = Align.find_closest_anc(Clo,Clo2Seq) # if ClosestAnc!='' : # DecAncLs.appned(Clo) if Clo.find('Clu') != -1: ID = 'REP' + str(REP) In = -1 * len(ID) if Clo[In:] == ID: DecLs.append(Clo) Posi = Tree.find(Clo[1:] + ':') + len(Clo) # print Tree[Posi] Go = 'y' BraLen = '' while Go == 'y': BraLen += Tree[Posi] if Tree[Posi] == ',' or Tree[Posi] == ')': Go = 'n' Posi += 1 # print Clo,BraLen if float(BraLen[:-1]) == 0: DecAncLs.append(Clo) ###### else: DecTipLs.append(Clo) # print DecLs,DecAncLs if DecLs == []: NewDecom = 'n' elif DecTipLs != []: NewDecom = 'y' for Tip in DecTipLs: TipSeq = Clo2Seq[Tip] OriTu = Tip.split('Clu')[0] # TipMutC Anc = 'n' for Clo in Clo2Seq: if Clo != Tip: # and OriTu!=Clo: UniNum = Align.CountAdditionalMut(TipSeq, Clo2Seq[Clo]) if UniNum == 0: Anc = 'y' if Anc == 'y': RmDecom.append(Tip) else: NewDecom = 'anc' # NewDecom='anc' # for Dclo in DecLs: # if DecAncLs.count(Dclo)==0: NewDecom='y' # print Clo2Seq.keys() if RmDecom == []: NewClo2Seq_buil = no_back_para_mut_decomposed_seq else: NewCloDic = {} for Clo in Clo2Seq: if RmDecom.count(Clo) == 0: NewCloDic[Clo] = Clo2Seq[Clo] NewClo2Seq_buil = Align.UpMeg(NewCloDic, []) return NewDecom, RmDecom, NewClo2Seq_buil
def add_back_anc(self, Sub_seq_builder, All_seq_builder): Align = MegaAlignment() Ls, Sub = Align.name2seq(Sub_seq_builder) Ls, All = Align.name2seq(All_seq_builder) Clo2Seq = {} for Clo in All: if Sub.has_key(Clo) == True: Clo2Seq[Clo] = Sub[Clo] else: Clo2Seq[Clo] = All[Clo] Seq_Buil = Align.UpMeg(Clo2Seq, []) return Seq_Buil
def __init__(self, ini_seq_builder, v_obs, clone_frequencies, CNV, freq_cutoff): self.freq_cutoff = freq_cutoff self.Tu2CloFre = clone_frequencies self.CloFreCutOff = self.freq_cutoff self.v_obs = v_obs Align = MegaAlignment() self.clone_order, self.clone_seq = Align.name2seq(ini_seq_builder) self._CNV_file = CNV self.snv_num = len(self.clone_seq[self.clone_order[0]])
def get_decomposed_seq(self): Align=MegaAlignment() TuLs, Tu2Seq = Align.name2seq(self.tumor_seqs) print('make SNV clusters') clusters = SNPClusterGenerator_cnv1(self.ini_seq_builder, self.v_obs, self.Tu2CloFre, self._CNV_file, self.freq_cutoff) Tumor_cluster_dic = clusters.cluster_cnv() #Tu2Cluster={tumor:[[seq_builder,{tumor:{clone frequency}}]]} print('Decompose incorrect sample genotype clones') AllhitWithDecom={} All_convol_tuseq=[] DecomLs=[] for Tu in Tumor_cluster_dic: ClusterInfo = Tumor_cluster_dic[Tu] if ClusterInfo != []: HitWithDecomSeq_build,convol_tuseq = self.get_candidate_decomposed_clones(Tu,ClusterInfo,Tu2Seq['#'+Tu]) if convol_tuseq!='': A1,HitWithDecomSeq_dic=Align.name2seq(HitWithDecomSeq_build) AllhitWithDecom.update(HitWithDecomSeq_dic) All_convol_tuseq.append(convol_tuseq) DecomLs.append(Tu) else: Original_hit_seq_dic = self.extract_hitseq(self.ini_seq_builder,self.Tu2CloFre['T-'+Tu],self.freq_cutoff) AllhitWithDecom.update(Original_hit_seq_dic) else: Original_hit_seq_dic = self.extract_hitseq(self.ini_seq_builder,self.Tu2CloFre['T-'+Tu],self.freq_cutoff) AllhitWithDecom.update(Original_hit_seq_dic) if DecomLs==[]: return self.clone_seq,'no decomposed clone was made' else: for ConvTuSeq in All_convol_tuseq: Redun_ls=Align.find_redundant(ConvTuSeq,AllhitWithDecom) if Redun_ls!=[]: return self.clone_seq,'tumor genotype that was decomposed was hit in different tumor: failed decomposition' return AllhitWithDecom,'decomposed'+str(DecomLs)
def __init__(self, ini_seq_builder, tsp_list, clone_frequencies, CNV, freq_cutoff): self.tsp_list = tsp_list self.freq_cutoff = freq_cutoff self.Tu2CloFre = clone_frequencies self.all_tsp = tsp_information(tsp_list) self.CloFreCutOff = self.freq_cutoff self.v_obs = self.all_tsp.tumor2alt_frequency() Align = MegaAlignment() self.clone_order, self.clone_seq = Align.name2seq(ini_seq_builder) self._CNV_file = CNV
def _remove_redund_seqs(self, Meg): print 'removing redundant seqs...' Align = MegaAlignment() NameOrder, Name2Seq = Align.name2seq(Meg) out2 = ['#MEGA', '!Title SNVs;', '!Format datatype=dna;'] c = 0 Name2IdenLs = {} SeqNum = len(NameOrder) Len = len(Name2Seq[NameOrder[0]]) while c < SeqNum: Ref = NameOrder[c] RefSeq = Name2Seq[Ref] Name2IdenLs[Ref] = [Ref] Tc = 0 while Tc < SeqNum: Tar = NameOrder[Tc] TarSeq = Name2Seq[Tar] DifC = self._count_diff_num(RefSeq, TarSeq) if DifC == 0: Name2IdenLs[Ref].append(Tar) Tc += 1 c += 1 Done = [] for Name in Name2Seq: Code = Name in Done if Code != True: IdenLs = Name2IdenLs[Name] GoodName = '' for i in IdenLs: if GoodName == '': GoodName = i elif i == '#hg19': GoodName = i elif GoodName == '#hg19': pass elif i.find('Node') == -1 and i.find('Clu') == -1: GoodName = i elif GoodName.find('Node') == -1 and GoodName.find( 'Clu') == -1: pass elif GoodName.find('Node') != -1: GoodName = i elif i.find('Node') != -1: pass elif i.find('Clu') != -1 and GoodName.find( 'Clu') != -1 and i.find( 'REP') != -1 and GoodName.find('REP') == -1: GoodName = i else: pass out2 += [GoodName, Name2Seq[Name]] Done += IdenLs return out2
def clone_to_tumor_phylogeny(self, OriginalNwk, Tu2CloFre, CloSeqLs): KeepLs = ['hg19'] Keep2TuLs = {'hg19': []} Align = MegaAlignment() CloOr, CloSeq = Align.name2seq(CloSeqLs) print Tu2CloFre for Tu in Tu2CloFre: CloFre = Tu2CloFre[Tu] CloLs = [] for Clo in CloFre: if CloFre[Clo] > 0: CloLs.append(Clo) LarClo = '' LarMut = 0 for Clo0 in CloLs: Seq0 = CloSeq['#' + Clo0] MutC = len(Align.GetMutPos(Seq0)) if MutC > LarMut: LarMut = MutC LarClo = Clo0 Keep = 'y' for Clo1 in CloLs: if Clo0 != Clo1: Seq1 = CloSeq['#' + Clo1] UniMutNum = 0 Len = len(Seq1) c = 0 while c < Len: if Seq0[c] == 'T' and Seq1[c] == 'A': UniMutNum += 1 c += 1 Pro = 1.0 * UniMutNum / Len if Pro < 0.05: Keep = 'n' if Keep == 'y': if KeepLs.count(Clo0) == 0: KeepLs.append(Clo0) Keep2TuLs[Clo0] = [] Keep2TuLs[Clo0].append(Tu) #KeepLs.append(LarClo) if KeepLs.count(LarClo) == 0: KeepLs.append(LarClo) Keep2TuLs[LarClo] = [] Keep2TuLs[LarClo].append(Tu) RmLs = [] for Clo in CloSeq: if KeepLs.count(Clo[1:]) == 0: RmLs.append(Clo[1:]) print 'remove ancestral clones', RmLs print 'tumor ls for each clone', Keep2TuLs Pruned = self.PruneTree(OriginalNwk, KeepLs) Pruned_Root = self.RootTree(Pruned) return Pruned_Root, Keep2TuLs
def compare_good_posi_number(self, Initial, After, IniSeq_buil, AftSeq_buil): IniCou = self.count_good_posi(Initial) AftCou = self.count_good_posi(After) Align = MegaAlignment() CloLs, IniSeq_dic = Align.name2seq(IniSeq_buil) CloLs, AftSeq_dic = Align.name2seq(AftSeq_buil) ShareIni = Align.GetSharePosi1(IniSeq_dic, 'A') ShareAft = Align.GetSharePosi1(AftSeq_dic, 'A') print IniCou, AftCou, len(ShareIni), len(ShareAft) if IniCou > AftCou or len(ShareAft) > len(ShareIni): AfterGood = 'n' else: AfterGood = 'y' return AfterGood
def __init__(self, seqs_with_ancestor, tsp_list, CNV_info, freq_cutoff, ReadCountTable): self.CutOff = freq_cutoff if seqs_with_ancestor != {}: Align = MegaAlignment() self.ini_seq_builder = seqs_with_ancestor self.ini_clone_order, self.ini_clone_seq = Align.name2seq( self.ini_seq_builder) self.tsp_list = tsp_list self.make_readcount() self._CNV_file = CNV_info self.ReadCountTable = ReadCountTable self.SNVnum = len(ReadCountTable[ReadCountTable.keys()[0]]) self.CNVnum = len(CNV_info[CNV_info.keys()[0]])
def find_new_clone(self, new_seq_buil, old_seq_buil): Align = MegaAlignment() Ls, New_dic = Align.name2seq(new_seq_buil) Ls, Old_dic = Align.name2seq(old_seq_buil) # print 'old list',Old_dic.keys(), '\nnew list',New_dic.keys() Iden = 'y' for Clo in New_dic: if Clo != '#hg19': NewSeq = New_dic[Clo] Redun = Align.find_redundant(NewSeq, Old_dic) if Redun == []: Iden = 'n' # print 'new seq',Clo return Iden
def add_cluster_Cmat(self, seq_list): Align = MegaAlignment() inCloLs, inClo2Seq = Align.name2seq(seq_list) Cmat_dic = {} Cmat_mat = '' for inClo1 in inCloLs: inClo = inClo1[1:] Seq = inClo2Seq['#' + inClo] Len = len(Seq) c = 0 C_val = [] while c < Len: if Seq[c] == 'T': C_val.append(0.5) else: C_val.append(0) c += 1 Cmat_dic[inClo] = C_val Cmat_mat, Cmat_clone_order = self.convert_Cmatdic_to_mat(Cmat_dic) return Cmat_mat, Cmat_dic, Cmat_clone_order
def MLancetor(self, seqs, Nwk): Align = MegaAlignment() # seqs = open(seqsFile,'r').readlines() self.CellLs, self.Cell2Seq = Align.name2seq(seqs) # print ('h',self.CellLs) # print (self.Cell2Seq) # open('A','r').readlines() self.SNVnum = len(self.Cell2Seq[self.CellLs[0]]) # self.InMeg = Align.AddNormal(seqs) InferAncestor = MegaAncestor() InferAncestor.alignment_file = seqs InferAncestor.input_tree_file = Nwk self.ancestor_states, self.offspring2ancestor, cell2code, self.code2cell = InferAncestor.retrieve_ancestor_states( ) self.RescaledTree = InferAncestor.get_scaledNWK() self.nodeid2seq = {} # print (self.ancestor_states) print('SNV count', self.SNVnum) # open('A','r').readlines() for node in self.ancestor_states: Seq = '' States = self.ancestor_states[node] c = 0 while c < self.SNVnum: # print (node) # print (c) # print (States) Nuc = States[c].split('\t')[0] Seq += Nuc c += 1 self.nodeid2seq[node] = Seq # print (RescaledTree) print(self.nodeid2seq) print(self.offspring2ancestor) print(self.code2cell) self.offspring2ancestor_withou_redunSeq = self.RemoveRedun()
def BranchDecClone(self, seq_list, clone_frequency, Tu2CNV): Align = MegaAlignment() TumorSampleExtract = tsp_information(self.tsp_list) CloFreAna = CloneFrequencyAnalizer() CloOrder, Clo2Seq = Align.name2seq(seq_list) Align.save_mega_alignment_to_file('Test.meg', seq_list) tree_builder = MegaMP() tree_builder.mao_file = self.mao_file id = 'branchdec_mega_alignment' status = tree_builder.do_mega_mp(seq_list, id) if status == True: seqs_with_ancestor, tree, nade_map, mask_seq, Good_posi_info = tree_builder.alignment_least_back_parallel_muts( True ) # True will execute code to remove redundant seqs (True is default) else: print 'failed to run megaMP' BadPosiLs = [] #multiple mutations BadPosi2ChnageCloLs = {} for c in Good_posi_info: Posi_Inf = Good_posi_info[c] if Posi_Inf != ['Good']: if Posi_Inf[0] == 'ToWild': BadPosiLs.append(c) BadPosi2ChnageCloLs[c] = Posi_Inf[1][0] print 'bad positions', BadPosiLs #,BadPosi2ChnageCloLs if BadPosiLs != []: NewT2C2F = {} NewT2Cls = {} for Tu in clone_frequency: NewC2F = {} single_tsp_list = TumorSampleExtract.make_single_tsp_list(Tu) CloFreDic = clone_frequency[Tu] CNV = Tu2CNV[Tu[2:]] Tu = Tu[2:] TuSeq = self.tumor2seq['#' + Tu] NewCloLs = [] NewCloLs1 = [] for Clo in CloFreDic: #original hit clo for the tumor ChangeOptions = 'n' # print Tu,CloFreDic if CloFreDic[Clo] > 0: CSeq0 = Clo2Seq['#' + Clo] ChangePosi = [] #list to fix multiple mutaitons NewBadPosi = [ ] #remove fixed multiple mutations from BadExtMutPosi for Bad in BadPosi2ChnageCloLs: if BadPosi2ChnageCloLs[Bad].count( '#' + Clo) != 0 and (CNV[Bad] == 'normal' or CNV[Bad] == 'Bad-normal'): Change = 'n' for Oth in CloFreDic: #find multiple mutations at the external branch if Oth != Clo and CloFreDic[Oth] > 0: Soth = Clo2Seq['#' + Oth] if Soth[Bad] == 'T' and BadPosi2ChnageCloLs[ Bad].count('#' + Oth) == 0: Change = 'y' if Change == 'y': ChangePosi.append(Bad) else: NewBadPosi.append(Bad) print 'change positions', Tu, ChangePosi if ChangePosi != []: #fix multiple mutaitons # print 'hhh' CutCloSeq = Align.ModSeq(CSeq0, ChangePosi, 'A', self.Len) NewCloLs.append(Clo + 'Cut' + Tu) NewC2F[Clo + 'Cut' + Tu] = CloFreDic[Clo] Clo2Seq['#' + Clo + 'Cut' + Tu] = CutCloSeq ChangeOptions = 'y' if ChangeOptions == 'n': NewC2F[Clo] = 1 NewT2C2F[Tu] = NewC2F # print Clo2Seq hitseq_align, hitclone_frequency = CloFreAna.ListHitCloAndSeq( NewT2C2F, Clo2Seq) outSeqMaj, outSeqAmb, NewT2C2F = Align.CombSimClo( hitseq_align, hitclone_frequency, 0.0) # print outSeqMaj, NewT2C2F return outSeqMaj, NewT2C2F else: return seq_list, clone_frequency
def AdjDecClo(self, ID, SeqLs, NodeMap, BackFor, Tree): Align = MegaAlignment() Ao, Anc2Seq = Align.name2seq(SeqLs) Len = len(Anc2Seq[Ao[0]]) outNew = ['MEGA', '!Title SNVs;', '!Format datatype=dna;', ' '] Clu2Change = {} for Name in Anc2Seq: if Name.find('Clu') != -1: Clu2Change[Name] = {} Posi = 0 while Posi < Len: i = BackFor[Posi] if i != ['Good']: Change = i[0] ChangeCloLs = i[1][0] #list # print i,Posi A = i[1][0][0].split('Clu')[0] B = i[1][1][0].split('Clu')[0] # print A,B if (len(i[1][0]) == 1 and len(i[1][1]) == 1 and i[1][0][0].find('Clu') == -1 and i[1][1][0].find('Clu') != -1): ChangeCloLs = i[1][1] #list if A == B: ChangeCloLs = i[1][0] else: ChangeCloLs = i[1][0] #list if A == B: ChangeCloLs = i[1][1] #print ChangeCloLs if Change == 'ToMut': #BC>0: #back for Clo in ChangeCloLs: # if Clo.find('Clu')!=-1: if Clu2Change.has_key(Clo) != True: Clu2Change[Clo] = {} Clu2Change[Clo][Posi] = 'T' if Change == 'ToWild': #MC>0: #multi for Clo in ChangeCloLs: # if Clo.find('Clu')!=-1: if Clu2Change.has_key(Clo) != True: Clu2Change[Clo] = {} Clu2Change[Clo][Posi] = 'A' Posi += 1 # print Clu2Change # open('AAA','r').readlines() for Clo in Anc2Seq: if Clo.find('#Node') == -1: TreeAna = TreeAnalizer() BraLen = TreeAna.Get_branch_lenghth(Tree, Clo[1:]) # if Clo.find('Clu')!=-1 and BraLen>=1: if BraLen >= 1 and Clu2Change.has_key(Clo): Change = Clu2Change[Clo] CluSeq = Anc2Seq[Clo] Len = len(CluSeq) c = 0 NewSeq = '' while c < Len: Code = c in Change if Code == True: NewSeq += Change[c] else: NewSeq += CluSeq[c] c += 1 outNew += [Clo, NewSeq] else: outNew += [Clo, Anc2Seq[Clo]] outNew_without_redundant_seq = Align.RmRedunSeq(outNew) outNew_without_redundant_seq += ['#hg19', 'A' * len(Anc2Seq[Clo])] # open('AA','r').readlines() return outNew_without_redundant_seq
Cell2PPselected = PP2.get_PP_for_selected_nuc_corr() Align.save_mega_alignment_to_file(OutMegFile[:-4]+'Correct2.meg', MEGAseqs_Corrected_1) print('Compute final PP') PP2 = PredictCellGenotype('Correct', MEGAseqs_Corrected_1, Cut2) MEGAseqs_Corrected_2 = PP2.Correct_error5() Cell2PPselected = PP2.get_PP_for_selected_nuc_corr() print('clone annotation') In0=InFile OutMegFile=In0[:-4]+'_BEAM.meg' dir = os.getcwd() MEGAseqs_Corrected=OutMegFile[:-4]+'Correct2.meg' MEGAseqs_Corrected_1 = open(MEGAseqs_Corrected,'r').readlines() In=open(In0,'r').readlines() CellLs, Cell2Seq = Align.name2seq(In) Cell2PPls={} dir=os.getcwd() CellC=1 for Cell in CellLs: PPoutF=dir+'\\All_alignment_PPseq-'+str(CellC)+'.csv' PPout=open(PPoutF,'r').readlines() CellN=PPout[0].split('\"')[1] PPout=PPout[3:] PPls=[] for i in PPout: i=i.strip().split(',') PP={'A':float(i[1]),'T':float(i[4])} PPls.append(PP) Cell2PPls[CellN]=PPls os.remove(PPoutF)
def get_candidate_decomposed_clones(self, target_tumor, CluInf_tu,Tuseq): Align = MegaAlignment() NameOrder, Name2Seq = Align.name2seq(CluInf_tu[0]) LenSeq = len(Name2Seq[NameOrder[0]]) SigCluLs=[] for Name in NameOrder: #Root is the first cluster or initial candidate clone if Name!='#Clu0' and Name.find('Clu')!=-1: SigCluLs.append(Name) CluCombLs,IDend=self.combinations([],SigCluLs,0,{}) print(target_tumor,'make cluster comb',SigCluLs,CluCombLs,NameOrder) if CluCombLs!={}: CloCan2Seq={} Got_Candidate='n' for Root in NameOrder: #Root is the first cluster or initial candidate clone if Root=='#Clu0' or Root.find('Clu')==-1: RootSeq=Name2Seq[Root] if Root=='#Clu0': CloCan2Seq['#'+target_tumor+'Clu0']=RootSeq #Root is candidate clone RootMut=Align.GetMutPos(RootSeq) Got_Candidate='y' if CluCombLs!={}: for ID in CluCombLs: CluLs=CluCombLs[ID] CluN='' MutPosLs=[] for Clu in CluLs: Seq=Name2Seq[Clu] CluMut=Align.GetMutPos(Seq) MutPosLs+= CluMut CluN+=Clu.replace('#','') Good='y' for Mut in MutPosLs: if RootMut.count(Mut)!=0: Good='n' if Good=='y': AllMutPosLs=MutPosLs+RootMut Seq=Align.ModSeq('A'*LenSeq,AllMutPosLs,'T',LenSeq) Redun_ls=Align.find_redundant(Seq,self.clone_seq) #all other clones #### if Redun_ls==[]: CloCan2Seq['#'+target_tumor+Root.replace('#','')+CluN]=Seq if CloCan2Seq!={}: CloCan2Seq.update(self.clone_seq) Can_list=list(CloCan2Seq.keys()) new_seq = Align.UpMeg(CloCan2Seq,Can_list) clone_frequency_combo = CloneFrequencyComputer_cnv1(new_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff) clone_frequency_combo.regress_cnv() CluComboHit=self.findcombohit(clone_frequency_combo.hitclone_seq_builder) if CluComboHit=='y': print('test the quality of clustercombo, by removing tumor seq (if any)') hit_seq_ls,hit_seq_dic=Align.name2seq(clone_frequency_combo.hitclone_seq_builder) Tuseq_ls=Align.find_redundant(Tuseq,hit_seq_dic) if Tuseq_ls==[]: print('tumor genotype did not hit, so clustercombo is good') return clone_frequency_combo.hitclone_seq_builder,Tuseq else: print('tumor genotype was hit, so test if clustercombo still hit without tumor genotype: testing if clustercombo genotypes fit well') Tuseq_ls=Align.find_redundant(Tuseq,CloCan2Seq) sub_hit_seq=[] for seqname in CloCan2Seq: if Tuseq_ls.count(seqname)==0:sub_hit_seq+=[seqname,CloCan2Seq[seqname]] clone_frequency_combo_new = CloneFrequencyComputer_cnv1(sub_hit_seq, {target_tumor:self.v_obs[target_tumor]}, {target_tumor:self._CNV_file[target_tumor]}, self.freq_cutoff) clone_frequency_combo_new.regress_cnv() CluComboHit=self.findcombohit(clone_frequency_combo_new.hitclone_seq_builder) if CluComboHit=='y': return clone_frequency_combo_new.hitclone_seq_builder,Tuseq else: return CluInf_tu[0],'' else: return CluInf_tu[0] ,'' else: return CluInf_tu[0],'' return CluInf_tu[0],''
def get_candidate_decomposed_clones(self, target_tumor): Align = MegaAlignment() CluInf_tu = self.ClusterInfo #[target_tumor] NameOrder, Name2Seq = Align.name2seq(CluInf_tu[2]) # print target_tumor, CluInf_tu[0],CluInf_tu[1] HitCloCluLs = CluInf_tu[1] #['T-'+target_tumor] TuIdentical_seq = self.identical_seq['T-' + target_tumor] LenSeq = len(Name2Seq[NameOrder[0]]) TuSeq = self.T2Seq['#' + target_tumor] Clu2center = CluInf_tu[0] SigCluLs = [] HitCloLs = [] HitCloSeq_dic = {} RootClu = '' LarCen = 0.0 for Hit in HitCloCluLs: if HitCloCluLs[Hit] > 0.02: if Hit[:len(target_tumor + 'Clu')] == target_tumor + 'Clu' and Hit.find( 'REP') == -1: SigCluLs.append(Hit) CluName = 'Clu' + Hit.split('Clu')[-1] Center = float(Clu2center[CluName].split('-')[0]) for CluN in Clu2center: Center2 = float(Clu2center[CluN].split('-')[0]) Sign = Clu2center[CluN].split('-')[1] if Center == Center2 and CluName != CluN: SigCluLs.append(target_tumor + CluN) if LarCen < Center2: # and Sign=='Pos': #Pos for middle cut, Neg for K-means LarCen = Center2 if Center == Center2: RootClu = target_tumor + CluN elif LarCen <= Center2 and Sign == 'Pos': #Pos for middle cut, Neg for K-means LarCen = Center2 if Center == Center2: RootClu = target_tumor + CluN else: HitCloLs.append(Hit) HitCloSeq_dic['#' + Hit] = Name2Seq['#' + Hit] # print 'cluls0',SigCluLs, HitCloLs, RootClu if RootClu != '': SigCluLs.remove(RootClu) HitCloLs.append(RootClu) # print 'cluls',SigCluLs, HitCloLs, RootClu if SigCluLs != []: CluCombLs, IDend = self.combinations([], SigCluLs, 0, {}) else: CluCombLs = {} # print CluCombLs if RootClu != '' or CluCombLs != {}: print 'make cluster comb' CloCan2Seq = {} Got_Candidate = 'n' for Root in HitCloLs: RootSeq = Name2Seq['#' + Root] LenSeq = len(RootSeq) RootMut = Align.GetMutPos(RootSeq) CloCan2Seq['#' + Root] = RootSeq Got_Candidate = 'y' if CluCombLs != {}: for ID in CluCombLs: CluLs = CluCombLs[ID] # print 'try make combo',Root,CluLs CluN = '' MutPosLs = [] for Clu in CluLs: Seq = Name2Seq['#' + Clu] CluMut = Align.GetMutPos(Seq) MutPosLs += CluMut CluN += Clu.replace(target_tumor + 'Clu', 'Clu') MutPosLs = list(set(MutPosLs)) Go = 'y' for Mut in MutPosLs: if RootMut.count(Mut) != 0: Go = 'n' if Go == 'y': AllMutPosLs = MutPosLs + RootMut Seq = Align.ModSeq('A' * LenSeq, AllMutPosLs, 'T', LenSeq) Redun_ls = Align.find_redundant(Seq, HitCloSeq_dic) if Redun_ls == []: CloCan2Seq['#' + target_tumor + Root.replace( target_tumor + 'Clu', 'Clu') + CluN] = Seq Got_Candidate = 'y' if Got_Candidate == 'y': Can_list = CloCan2Seq.keys() # print 'find the good comb',Can_list new_seq = Align.UpMeg(CloCan2Seq, Can_list) alt_frequency = [] CNVls = self.CNV_info[target_tumor] Len = len(CNVls) c = 0 TuMatPosi = [] tumor_genotype = '' while c < Len: if CNVls[c] == 'normal': alt_frequency.append(self.v_obs[target_tumor][c]) if self.v_obs[target_tumor][c] > 0: TuMatPosi.append(c) tumor_genotype += 'T' else: tumor_genotype += 'A' c += 1 clone_frequency = CloneFrequencyComputer_cnv1({}, {}, {}, self.freq_cutoff, {}) MutWildAlleleCount_noCNV = clone_frequency.make_mut_wild_allele_count_noCNV( {}, Can_list, CloCan2Seq) #PreAbsCNV, clone_order, SNV_seq, Tu2CloFre Cmatrix_noCNV, Cmatrix_noCNV_dic = clone_frequency.make_Min( Can_list, CloCan2Seq, MutWildAlleleCount_noCNV) Clone2Freq = clone_frequency.do_nnls0(Cmatrix_noCNV, Can_list, alt_frequency) out2 = ['#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' '] AllMut = [] NewClone2Freq = {} CluHit = 'n' for Clo0 in Clone2Freq: NewClone2Freq[Clo0] = Clone2Freq[Clo0] if Clone2Freq[Clo0] > 0.02: SeqMutPos = Align.GetMutPos(CloCan2Seq['#' + Clo0]) TuSeq = 'y' for Mut in SeqMutPos: if TuMatPosi.count(Mut) != 0: AllMut.append(Mut) for Mut in TuMatPosi: if SeqMutPos.count(Mut) == 0: TuSeq = 'n' Iden = 'n' for OriClo in self.OriAnc2Seq0: c = 0 Dif = 'n' while c < Len: if self.OriAnc2Seq0[OriClo][c] != CloCan2Seq[ '#' + Clo0][c]: Dif = 'y' c += 1 if Dif == 'n': Iden = OriClo if Iden != 'n': out2 += [Iden, self.OriAnc2Seq0[Iden]] NewClone2Freq[Iden[1:]] = Clone2Freq[Clo0] NewClone2Freq[Clo0] = 0 elif TuSeq == 'n': out2 += [ '#' + Clo0.replace(target_tumor + target_tumor, target_tumor), CloCan2Seq['#' + Clo0] ] if Clo0.find('Clu') != -1 and Clo0.find( 'REP') == -1: CluHit = 'y' else: out2 += [ '#' + target_tumor, CloCan2Seq['#' + Clo0] ] NewClone2Freq[target_tumor] = Clone2Freq[Clo0] NewClone2Freq[Clo0] = 0 AllMut = list(set(AllMut)) if len(AllMut) < len(TuMatPosi): out2 += ['#' + target_tumor, tumor_genotype] if CluHit == 'y': # print 'Decomposed!' ,target_tumor,NewClone2Freq,out2 return out2, NewClone2Freq return [], {}
def remove_insignificant_clones(self, v_obs, CloFre_clone, clone_seq_builder, Tu2CNV, Cut): Align = MegaAlignment() OutAncAll = 'SigTest.txt' outAncAll = 'tumor\tDecsendant-Ancestor\tSNV posi\tType\tObsFre\n' Clone_list, clone_seq_dic = Align.name2seq(clone_seq_builder) new_clone_freq = {} new_clone_seq_dic = {} for tumor in v_obs: CNV = Tu2CNV[tumor] Clo2Fre = CloFre_clone['T-' + tumor] ObsFre = v_obs[tumor] clone_order = [] MutNum2Clo = {} MutNum_ls = [] for Clo in Clo2Fre: if Clo2Fre[Clo] > 0: MutPosLs = Align.GetMutPos(clone_seq_dic['#' + Clo]) MutNum = len(MutPosLs) if MutNum2Clo.has_key(MutNum) != True: MutNum2Clo[MutNum] = [] MutNum2Clo[MutNum].append(Clo) MutNum_ls.append(MutNum) MutNum_ls = list(set(MutNum_ls)) MutNum_ls.sort(reverse=True) for MutNum in MutNum_ls: clone_order += MutNum2Clo[MutNum] CloNum = len(clone_order) C1Max = CloNum - 1 InsigLs = [] C1 = 0 while C1 < C1Max: Clo1 = clone_seq_dic['#' + clone_order[C1]] num_sites = len(Clo1) Min_num = 0.01 * num_sites C2 = C1 + 1 while C2 < CloNum: Clo2 = clone_seq_dic['#' + clone_order[C2]] Share = [] Unique = [] c = 0 while c < num_sites: if CNV[c] == 'normal': if Clo1[c] == 'T' and Clo2[c] == 'T': Share.append(ObsFre[c]) outAncAll += tumor + '\t' + clone_order[ C1] + '-' + clone_order[C2] + '\t' + str( c) + '\tShare\t' + str( ObsFre[c]) + '\n' elif Clo1[c] == 'T' and Clo2[c] == 'A': Unique.append(ObsFre[c]) outAncAll += tumor + '\t' + clone_order[ C1] + '-' + clone_order[C2] + '\t' + str( c) + '\tUnique\t' + str( ObsFre[c]) + '\n' c += 1 if (len(Share) < 3 or len(Unique) < 3) or (len(Share) < Min_num or len(Unique) < Min_num): P = 1 else: P = scipy.stats.ttest_ind(Share, Unique, equal_var=False) P = P[-1] if P > Cut: if clone_order[C1].find('Clu') != -1 and clone_order[ C2].find('Clu') == -1: InsigLs.append(clone_order[C1]) else: InsigLs.append(clone_order[C2]) C2 += 1 C1 += 1 InsigLs = list(set(InsigLs)) if InsigLs != []: print 'insignificant clones', tumor, InsigLs new_clone_fre_in = {} for Clo in Clo2Fre: if Clo2Fre[Clo] > 0 and InsigLs.count(Clo) == 0: new_clone_fre_in[Clo] = Clo2Fre[Clo] new_clone_seq_dic['#' + Clo] = clone_seq_dic['#' + Clo] new_clone_freq['T-' + tumor] = new_clone_fre_in new_seq_builder = Align.UpMeg(new_clone_seq_dic, []) return new_seq_builder, new_clone_freq