Ejemplo n.º 1
0
    def do_mega_mp(self, alignment_builder, mega_id):
        self._newick_trees = []
        print 'constructing MP tree'
        result = False
        self._update_file_names(mega_id)
        #  print self._alignment_file
        Align = MegaAlignment()
        Align.save_mega_alignment_to_file(self._alignment_file,
                                          alignment_builder)

        cl = self._command_line_string()
        os.system(cl)
        if os.path.isfile(self._newick_file) == True:
            result = True
            nf = open(self._newick_file, 'r')
            ns = nf.readlines()
            print 'MP tree(s):'
            for line in ns:
                print line
                self._newick_trees.append(line)
            nf.close()
            self._retrieve_ancestral_states()

    #  self._cleanup_temp_files()
        return result
Ejemplo n.º 2
0
    def do_mega_pp(self, alignment_builder, tree_builder, mega_id):

        print 'computing PP'
        result = False
        self._update_file_names(mega_id)

        Align = MegaAlignment()
        Align.save_mega_alignment_to_file(self._alignment_file,
                                          alignment_builder)  ###
        self.save_str_to_file(tree_builder, self._input_tree_file)

        cl = self._command_line_string()
        os.system(cl)

        PPfileLs = glob.glob(self._pp_file[:-4] + 'seq-*.csv')

        if PPfileLs != []:
            result = True
            for PPfile in PPfileLs:

                shutil.copyfile(PPfile, PPfile.split('\\')[-1])

    #    print result

        return result
Ejemplo n.º 3
0
    def do_mega_mp(self, alignment_builder, mega_id):
        self._newick_trees = []
        print('constructing MP tree')
        result = False
        self._update_file_names(mega_id)
        Align = MegaAlignment()
        Align.save_mega_alignment_to_file(self._alignment_file,
                                          alignment_builder)
        cl = self._command_line_string()
        os.system(cl)
        if os.path.isfile(self._newick_file) == True:
            result = True
            nf = open(self._newick_file, 'r')
            ns = nf.readlines()
            for line in ns:
                self._newick_trees.append(line)
            nf.close()
            files = self._get_ancestral_states_files()
            self._retrieve_ancestral_states()
            seq_maker = MakeAncSeqMPMin()

            self.best_align_result = seq_maker.get_best_alignment(
                files, self._mega_id, True, self.newick_trees)

        self._cleanup_temp_files()
        return result
Ejemplo n.º 4
0
 def save_without_cloneID(self, Cell2BestSeq):
     Align = MegaAlignment()
     tree_builder = MegaML()
     tree_analyzer = TreeAnalizer()
     BestSeq_builder_3 = Align.UpMeg(Cell2BestSeq, self.CellLs)
     Align.save_mega_alignment_to_file(self.out_file_name,
                                       BestSeq_builder_3)
Ejemplo n.º 5
0
    def do_mega_ml(self, alignment_builder, mega_id):

        print('constructing ML tree')
        result = False
        self._update_file_names(mega_id)
        print(self._alignment_file)
        Align = MegaAlignment()
        Align.save_mega_alignment_to_file(self._alignment_file,
                                          alignment_builder)

        cl = self._command_line_string()
        os.system(cl)
        if os.path.isfile(self._newick_file) == True:
            result = True
            nf = open(self._newick_file, 'r')
            # ns = nf.readlines()
            print('ML tree:')
            #  for line in ns:
            # print line
            self._newick_trees = nf.readlines()[0]
            print(self._newick_trees)
            nf.close()
        # self._retrieve_ancestral_states()

    # self._cleanup_temp_files()
        return result
Ejemplo n.º 6
0
    def save_with_cloneID(self, CellSeqDic):
        Align = MegaAlignment()
        outSeq_builder = ['#MEGA', '!Title SNVs;', '!Format datatype=dna;', ' ']
        for Clone in self.Clone2CellLs:
            CellLs = self.Clone2CellLs[Clone]
            for Cell in CellLs:
                outSeq_builder += ['#' + Cell + '_{' + Clone + '}', CellSeqDic['#' + Cell]] #######change

        Align.save_mega_alignment_to_file(self.out_file_name[:-4] + '_withCloneID.meg', outSeq_builder)
    def regress_cnv(self):
        Align = MegaAlignment()
        CloFreAna = CloneFrequencyAnalizer()
        self.Tumor2Clone_frequency = {}
        HitCloSeq_dic = {}
        self.tumor2CNVSNVposi = {}
        #  print 'nnls removing SNV-CNVs'
        for tumor in self.v_obs:
            #  print tumor

            v_obs_single = self.v_obs[tumor]
            v_obs_single_sub = []
            Seq_dic_sub = {}
            RmSNVPosi = []
            CNVls = self._CNV_file[tumor]
            Len = len(CNVls)
            c = 0
            while c < Len:
                if CNVls[c] == 'normal':
                    v_obs_single_sub.append(v_obs_single[c])

                else:
                    RmSNVPosi.append(c)
                c += 1
            for Clo in self.ini_clone_order:
                NewSeq = ''
                OldSeq = self.ini_clone_seq[Clo]
                c = 0
                while c < Len:
                    if RmSNVPosi.count(c) == 0: NewSeq += OldSeq[c]
                    c += 1
                Seq_dic_sub[Clo] = NewSeq

            self.tumor2CNVSNVposi[tumor] = RmSNVPosi
            MutWildAlleleCount_noCNV = self.make_mut_wild_allele_count_noCNV(
                {}, self.ini_clone_order,
                Seq_dic_sub)  #PreAbsCNV, clone_order, SNV_seq, Tu2CloFre
            Cmatrix_noCNV, Cmatrix_noCNV_dic = self.make_Min(
                self.ini_clone_order, Seq_dic_sub, MutWildAlleleCount_noCNV)
            self.Cmatrix_noCNV_mat = Cmatrix_noCNV
            self.Cmatrix_noCNV_dic = Cmatrix_noCNV_dic
            Clone2Freq = self.do_nnls0(Cmatrix_noCNV, self.ini_clone_order,
                                       v_obs_single_sub)
            self.Tumor2Clone_frequency['T-' + tumor] = Clone2Freq
            for Clo in Clone2Freq:
                if Clone2Freq[Clo] > 0:
                    if HitCloSeq_dic.has_key('#' + Clo) != True:
                        HitCloSeq_dic['#' + Clo] = self.ini_clone_seq['#' +
                                                                      Clo]
        self.hitclone_seq_builder = Align.UpMeg(HitCloSeq_dic, [])
        CloFreAna.save_frequency_table_to_file('Ini_freq.txt',
                                               self.Tumor2Clone_frequency, [])
        Align.save_mega_alignment_to_file('Ini.meg', self.hitclone_seq_builder)
Ejemplo n.º 8
0
    def do_mega_ancestor(self):

        print('infer ancestral sequences')
        result = False
        alignment_builder = self._alignment_file
        tree_builder = self._input_tree_file
        self._update_file_names('Ancestor')

        Align = MegaAlignment()
        Align.save_mega_alignment_to_file(self._alignment_file,
                                          alignment_builder)  ###
        self.save_str_to_file(tree_builder, self._input_tree_file)

        cl = self._command_line_string()
        os.system(cl)
        if os.path.isfile(self._ancestor_file) == True:
            result = True

        return result
Ejemplo n.º 9
0
    def BranchDecClone(self, seq_list, clone_frequency, Tu2CNV):
        Align = MegaAlignment()
        TumorSampleExtract = tsp_information(self.tsp_list)
        CloFreAna = CloneFrequencyAnalizer()
        CloOrder, Clo2Seq = Align.name2seq(seq_list)
        Align.save_mega_alignment_to_file('Test.meg', seq_list)
        tree_builder = MegaMP()
        tree_builder.mao_file = self.mao_file
        id = 'branchdec_mega_alignment'

        status = tree_builder.do_mega_mp(seq_list, id)
        if status == True:
            seqs_with_ancestor, tree, nade_map, mask_seq, Good_posi_info = tree_builder.alignment_least_back_parallel_muts(
                True
            )  # True will execute code to remove redundant seqs (True is default)
        else:
            print 'failed to run megaMP'
        BadPosiLs = []  #multiple mutations
        BadPosi2ChnageCloLs = {}
        for c in Good_posi_info:
            Posi_Inf = Good_posi_info[c]
            if Posi_Inf != ['Good']:
                if Posi_Inf[0] == 'ToWild':
                    BadPosiLs.append(c)
                    BadPosi2ChnageCloLs[c] = Posi_Inf[1][0]
        print 'bad positions', BadPosiLs  #,BadPosi2ChnageCloLs
        if BadPosiLs != []:
            NewT2C2F = {}
            NewT2Cls = {}
            for Tu in clone_frequency:
                NewC2F = {}
                single_tsp_list = TumorSampleExtract.make_single_tsp_list(Tu)
                CloFreDic = clone_frequency[Tu]
                CNV = Tu2CNV[Tu[2:]]
                Tu = Tu[2:]
                TuSeq = self.tumor2seq['#' + Tu]
                NewCloLs = []
                NewCloLs1 = []

                for Clo in CloFreDic:  #original hit clo for the tumor
                    ChangeOptions = 'n'
                    #   print Tu,CloFreDic
                    if CloFreDic[Clo] > 0:
                        CSeq0 = Clo2Seq['#' + Clo]
                        ChangePosi = []  #list to fix multiple mutaitons
                        NewBadPosi = [
                        ]  #remove fixed multiple mutations from BadExtMutPosi
                        for Bad in BadPosi2ChnageCloLs:
                            if BadPosi2ChnageCloLs[Bad].count(
                                    '#' + Clo) != 0 and (CNV[Bad] == 'normal'
                                                         or CNV[Bad]
                                                         == 'Bad-normal'):
                                Change = 'n'
                                for Oth in CloFreDic:  #find multiple mutations at the external branch
                                    if Oth != Clo and CloFreDic[Oth] > 0:
                                        Soth = Clo2Seq['#' + Oth]
                                        if Soth[Bad] == 'T' and BadPosi2ChnageCloLs[
                                                Bad].count('#' + Oth) == 0:
                                            Change = 'y'
                                if Change == 'y':
                                    ChangePosi.append(Bad)
                                else:
                                    NewBadPosi.append(Bad)
                        print 'change positions', Tu, ChangePosi
                        if ChangePosi != []:  #fix multiple mutaitons
                            #  print 'hhh'
                            CutCloSeq = Align.ModSeq(CSeq0, ChangePosi, 'A',
                                                     self.Len)
                            NewCloLs.append(Clo + 'Cut' + Tu)
                            NewC2F[Clo + 'Cut' + Tu] = CloFreDic[Clo]
                            Clo2Seq['#' + Clo + 'Cut' + Tu] = CutCloSeq
                            ChangeOptions = 'y'

                    if ChangeOptions == 'n':
                        NewC2F[Clo] = 1
                NewT2C2F[Tu] = NewC2F

    #  print Clo2Seq
            hitseq_align, hitclone_frequency = CloFreAna.ListHitCloAndSeq(
                NewT2C2F, Clo2Seq)
            outSeqMaj, outSeqAmb, NewT2C2F = Align.CombSimClo(
                hitseq_align, hitclone_frequency, 0.0)
            #   print outSeqMaj, NewT2C2F
            return outSeqMaj, NewT2C2F
        else:
            return seq_list, clone_frequency
Ejemplo n.º 10
0
Align=MegaAlignment()
In=sys.argv[1]
OutMegFile=In[:-4]+'_BEAM.meg'

Cut2=0.7 #PP cut-off

dir = os.getcwd()
InFile=In
In = open(In,'r').readlines()

print('correct FPs and FNs')
PP2 = PredictCellGenotype('Correct', In, Cut2)
MEGAseqs_Corrected = PP2.Correct_error5()
Cell2PPselected = PP2.get_PP_for_selected_nuc_corr()
Align.save_mega_alignment_to_file(OutMegFile[:-4]+'Correct1.meg', MEGAseqs_Corrected)

print('correct FPs and FNs 2')
PP2 = PredictCellGenotype('Correct', MEGAseqs_Corrected, Cut2)
MEGAseqs_Corrected_1 = PP2.Correct_error5()
Cell2PPselected = PP2.get_PP_for_selected_nuc_corr()
Align.save_mega_alignment_to_file(OutMegFile[:-4]+'Correct2.meg', MEGAseqs_Corrected_1)

print('Compute final PP')
PP2 = PredictCellGenotype('Correct', MEGAseqs_Corrected_1, Cut2)
MEGAseqs_Corrected_2 = PP2.Correct_error5()
Cell2PPselected = PP2.get_PP_for_selected_nuc_corr()

print('clone annotation')
In0=InFile
OutMegFile=In0[:-4]+'_BEAM.meg'
Ejemplo n.º 11
0
        print('clone decomposition is complete!')

        final_seq1, final_clone_frequency1, final_clone_order1 = OutFile.ReNameCloFreMeg(
            final_seq, final_clofre, 'number')  ###

        print('test clone hit and remove insignificant clones')
        significant_clone = cluster_test()
        final_seq, final_clofre = significant_clone.remove_insignificant_clones_add(
            v_obs, final_clone_frequency1, final_seq1, CNV_information_test,
            Significant_cutoff)
        print('making output files')

        final_seq1, final_clone_frequency1, final_clone_order1 = OutFile.ReNameCloFreMeg(
            final_seq, final_clofre, 'number')

        Align.save_mega_alignment_to_file(params.input_id + '_CloneFinder.meg',
                                          final_seq1)
        CloFreAna.save_frequency_table_to_file(
            params.input_id + '_CloneFinder.txt', final_clone_frequency1, [])

        id = 'final'
        status = tree_builder.do_mega_mp(final_seq1, id)
        if status == True:
            A1, tree = tree_builder.alignment_least_back_parallel_muts()
            Rooted = AnalyzeTree.RootTree(tree)
            InferAncestor = MegaAncestor()
            InferAncestor.alignment_file = final_seq1
            InferAncestor.input_tree_file = Rooted

            ancestor_states, offspring2ancestor, cell2code, code2cell = InferAncestor.retrieve_ancestor_states(
            )
            RescaledTree = InferAncestor.get_scaledNWK()
Ejemplo n.º 12
0
    status = tree_builder.do_mega_mp(final_seq1, id)
    if status == True:
        seqs_with_ancestor, tree, nade_map, mask_seq, Initial_Good_posi_info = tree_builder.alignment_least_back_parallel_muts(
            True
        )  # True will execute code to remove redundant seqs (True is default)
        print 'best alignment'
    else:
        print 'failed to run megaMP'
    AA, mask_seq_comb, clone_freq = Align.CombSimClo(mask_seq,
                                                     final_clone_frequency1, 0)
    print 'test clone hit and remove insignificant clones'
    significant_clone = cluster_test()
    significant_seq, significant_clone_frequency = significant_clone.remove_insignificant_clones(
        v_obs, clone_freq, mask_seq_comb, CNV_information_test,
        Significant_cutoff)
    Align.save_mega_alignment_to_file(params.input_id + '_CloneFinder.meg',
                                      significant_seq)
    CloFreAna.save_frequency_table_to_file(
        params.input_id + '_CloneFinder.txt', significant_clone_frequency, [])

#######################
os.remove(params.input_id + '.txt')
os.remove(params.input_id + '-CNV.txt')
os.remove('Ini.meg')
os.remove('Ini_freq.txt')
os.remove('Test.meg')

timeFile = params.input_id + '_summary.txt'
endTime = datetime.datetime.now()
print endTime
totalTime = (endTime - startTime)
print totalTime
Ejemplo n.º 13
0
 def get_tree_with_branchLen(self, ID):
     Align = MegaAlignment()
     self.GetOut(ID + '.nwk', self.RescaledTree)
     SeqLs = Align.UpMeg(self.nodeid2seq, [])
     Align.save_mega_alignment_to_file(ID + '_NodeSeq.meg', SeqLs)