コード例 #1
0
    def make_trees(self, force=False):
        for i, (root, _, files) in enumerate(os.walk(self.seed_directory)):
            if i==0: #skip base path
                continue
            hist_type = os.path.basename(root)
            print "Creating tree for", hist_type

            final_tree_name = os.path.join(self.trees_path, "{}_no_features.xml".format(hist_type))
            if not force and os.path.isfile(final_tree_name):
                continue

            if not os.path.exists(self.trees_path):
                os.makedirs(self.trees_path)

            #Combine all variants for a core histone type into one unaligned fasta file
            combined_seed_file = os.path.join(self.trees_path, "{}.fasta".format(hist_type))
            combined_seed_aligned = os.path.join(self.trees_path, "{}_aligned.fasta".format(hist_type))
            with open(combined_seed_file, "w") as combined_seed:
                for seed in files: 
                    if not seed.endswith(".fasta"): continue
                    for s in SeqIO.parse(os.path.join(self.seed_directory, hist_type, seed), "fasta"):
                        s.seq = s.seq.ungap("-")
                        SeqIO.write(s, combined_seed, "fasta")

            #Create trees and convert them to phyloxml
            tree = os.path.join(self.trees_path, "{}_aligned.ph".format(hist_type))
            subprocess.call(["muscle", "-in", combined_seed_file, '-out', combined_seed_aligned])
            print " ".join(["clustalw2", "-infile={}".format(combined_seed_aligned), "-outfile={}".format(final_tree_name), '-tree'])
            subprocess.call(["clustalw2", "-infile={}".format(combined_seed_aligned), "-outfile={}".format(final_tree_name), '-tree'])
            Phylo.convert(tree, 'newick', final_tree_name, 'phyloxml')
コード例 #2
0
	def gen_bootstrap_tree(self):
		if os.path.isfile(self.bootstrap_tree) == False:
			if self.model == True:
				options = ['-m', 'GTRGAMMA']
			else:
				options = ['-V','-m', 'GTRCAT']
			if self.no_recombination_filter == True:
				options += ["-n","bootstrap","-s",os.path.join(self.core_align, 'core.trimmed.aln')]
			else:
				options += ["-n","bootstrap","-s",os.path.join(self.recomb_filter, "filtered_core_aln.filtered_polymorphic_sites.fasta")]
			self.logger.info("Running bootstrap analysis...")
			ec = snpiphy.run_command([
									"raxmlHPC-PTHREADS",
									"-T",str(self.threads) ] +
									options +
									[ "-p",str(random.randint(10000,99999)),
									"-b",str(random.randint(10000,99999)),
									"-#","100",
									'-w', self.phylogenetic_trees
									])
			if ec != 0:
				self.logger.error("RAxML bootstrap has failed.")
				sys.exit(1)
			Phylo.convert(os.path.join(self.phylogenetic_trees, "RAxML_bootstrap.bootstrap"), 'newick', self.bootstrap_tree, 'nexus')
		else:
			self.logger.info("Bootstrap RAxML trees have already been generated. Skipping this step...")
コード例 #3
0
def buildTree(tree, qualifier, rooted):
    with open("temp", "w+") as temper:
        temper.writelines(tree)
    Phylo.convert('temp', 'newick', 'temp2', 'phyloxml')
    specFile = open("temp2", "r")
    lines = specFile.readlines()
    lines.pop(0)  #remove first line
    lines.pop()  #remove last line
    specFile.close()
    os.remove("temp")
    os.remove("temp2")

    #Inserting the recPhylo information to the gene/species tree
    if rooted:
        lines.pop(0)
        lines.insert(0, '\t<phylogeny rooted="true">\n')

    for line in lines:
        line = "\t" + line
    if (qualifier == "s"):
        lines.insert(0, "<recPhylo>\n\t<spTree>\n")
        lines.append("\t</spTree>\n")
    elif (qualifier == "g"):
        lines.insert(0, "\t<recGeneTree>\n")
        lines.append("\t</recGeneTree>\n</recPhylo>\n")

    return lines
コード例 #4
0
	def gen_final_tree(self):
		if os.path.isfile(self.final_unrooted_tree) == False:
			if self.model == True:
				options = ['-m', 'GTRGAMMA']
			else:
				options = ['-V','-m', 'GTRCAT']
			if self.no_recombination_filter == True:
				options += ["-n","bootstrap","-t",os.path.join(self.initial_trees, "RAxML_bestTree.initial_trees")]
			else:
				options += ["-n","bootstrap","-t",os.path.join(self.recomb_filter, "filtered_core_aln.final_tree.tre")]
			self.logger.info("Generating final tree...")
			ec = snpiphy.run_command([
									"raxmlHPC-PTHREADS",
									"-T",str(self.threads) ] +
									options +
									[ "-p",str(random.randint(10000,99999)),
									"-f", "b",
									"-z", os.path.join(self.phylogenetic_trees, "RAxML_bootstrap.bootstrap"),
									"-n", "final",
									'-w', self.phylogenetic_trees
									])
			if ec != 0:
				self.logger.error("Final RAxML tree generation has failed.")
				sys.exit(1)
			Phylo.convert(os.path.join(self.phylogenetic_trees, 'RAxML_bipartitions.final'), 'newick', self.final_unrooted_tree, 'nexus')
		else:
			self.logger.info("Final RAxML tree has already been generated. Skipping this step...")
コード例 #5
0
    def download1(self):
        import time, re
        self.unique = str(int(time.time()))

        try:

            open_file1 = self.filepath['input1']
            open_file2 = self.filepath['input2']
            save_file = self.outpath['output']
            out_table = open(
                os.path.join(save_file, f"{self.unique}_out.table.txt"), "w+")
            out_table1 = open(
                os.path.join(save_file, f"{self.unique}_out.table_tr2.spart"),
                "w+")
            out_tree = open(
                os.path.join(save_file, f"{self.unique}_out.tree.tre"), "w+")

            if open_file2 == "":
                print("run tree search + guide search")
                ctr = build_consensus(open_file1)
                with open(open_file1 + "_rtc", "w+") as f:
                    f.write(ctr.decode())
                from Bio import Phylo
                Phylo.convert(open_file1 + "_rtc", "newick",
                              open_file1[0:-4] + "guide", "newick")
                res = search(open_file1, open_file1[0:-4] + "guide")
                print("write: %s" % out_tree.name)
                print(res, file=out_tree)
                print("write: %s" % out_table.name)
                print(create_table(res), file=out_table)
                print(create_table_spart(res, open_file1), file=out_table1)

            else:
                res = search(open_file1, open_file2)
                print("write: %s" % out_tree.name)
                print(res, file=out_tree)
                print("write: %s" % out_table.name)
                print(create_table(res), file=out_table)

                print(create_table_spart(res, open_file1), file=out_table1)

            out_table.close()
            out_table1.close()
            out_tree.close()
            self.toolButton_3.setEnabled(True)
            onlyfiles = [
                self.listWidget.addItem(f) for f in os.listdir(save_file)
                if os.path.isfile(os.path.join(save_file, f))
            ]

        except Exception as e:
            QMessageBox.warning(
                self, "Warning",
                f"Please check data type, analysis is failed because {e}")
            return

        QMessageBox.information(self, "Information",
                                "The analysis is successfully")
コード例 #6
0
    def populate_from_newick(self, phylo_file_buffer):
        """
        Creates an basic graph taking a newick file as input.
        """
        phyloxml_out_stream = StringIO()
        Phylo.convert(phylo_file_buffer, "newick", phyloxml_out_stream, "phyloxml")
        phyloxml_out_stream.seek(0)

        return self.populate_from_phyloxml(phylo_file_buffer=phyloxml_out_stream)
コード例 #7
0
ファイル: treestore.py プロジェクト: bendmorris/rdf-treestore
    def add_trees(self, tree_file, format, tree_uri=None, rooted=False, 
        taxonomy=None, tax_root=None):
        '''Convert trees residing in a text file into RDF, and add them to the
        underlying RDF store with a context node for retrieval.
        
        Example:
        >>> treestore.add_trees('test.newick', 'newick', 'http://www.example.org/test/')
        '''
        
        if tree_uri is None: tree_uri = os.path.basename(tree_file)
        else: tree_uri = self.uri_from_id(tree_uri)

        hash = sha.sha()
        hash.update(str(time.time()))
        tempfile_name = '%s.cdao' % hash.hexdigest()

        if taxonomy:
            # label higher-order taxa before adding
            phylogeny = bp.read(tree_file, format)
            if isinstance(taxonomy, basestring):
                taxonomy = self.get_trees(self.uri_from_id(taxonomy))[0]
            phylolabel.label_tree(phylogeny, taxonomy, tax_root=tax_root)
            with open(os.path.join(self.load_dir, tempfile_name), 'w') as output_file:
                bp._io.write([phylogeny], output_file, 'cdao')
            
        else:
            if format == 'cdao':
                # if it's already in CDAO format, just copy it
                f1, f2 = tree_file, os.path.join(self.load_dir, tempfile_name)
                if not os.path.abspath(f1) == os.path.abspath(f2):
                    shutil.copy(f1, f2)
            else:
                # otherwise, convert to CDAO
                bp.convert(tree_file, format, os.path.join(self.load_dir, tempfile_name), 'cdao', 
                           tree_uri=tree_uri, rooted=rooted)
        
        # run the bulk loader to load the CDAO tree into Virtuoso
        cursor = self.get_cursor()
        
        update_stmt = 'sparql load <file://%s> into %s' % (
            os.path.abspath(os.path.join(self.load_dir, tempfile_name)), rdflib.URIRef(tree_uri).n3())
        
        load_stmt = "ld_dir ('%s', '%s', '%s')" % (
            os.path.abspath(self.load_dir), tempfile_name, tree_uri)
        print load_stmt
        cursor.execute(load_stmt)
        
        update_stmt = "rdf_loader_run()"
        print update_stmt
        cursor.execute(update_stmt)
        
        # the next treestore add may not work if you don't explicitly delete 
        # the bulk load list from the Virtuoso db after it's done
        cursor.execute('DELETE FROM DB.DBA.load_list')
        
        os.remove(os.path.join(self.load_dir, tempfile_name))
コード例 #8
0
    def add_trees(self, tree_file, format, tree_uri=None, bulk_loader=None, puid=False, rooted=False):
        '''Convert trees residing in a text file into RDF, and add them to the
        underlying RDF store with a context node for retrieval.
        
        Example:
        >>> treestore.add_trees('test.newick', 'newick', 'http://www.example.org/test/')
        '''
        
        if tree_uri is None: tree_uri = os.path.basename(tree_file)

        if puid:
            # Create a pseudo-unique URI for trees, if the tree name is not a URI already:
            if not re.match(r'\w+://', tree_uri):
                puid = sha.new(open(tree_file).read()).hexdigest()
                tree_uri = 'http://phylotastic.org/hack2/%s/%s' % (puid, tree_uri)

        if bulk_loader:
            if format == 'cdao':
                f1, f2 = tree_file, os.path.join(treestore_dir, 'temp.cdao')
                if not os.path.abspath(f1) == os.path.abspath(f2):
                    shutil.copy(f1, f2)
            else:
                bp.convert(tree_file, format, os.path.join(treestore_dir, 'temp.cdao'), 'cdao', 
                           tree_uri=tree_uri, rooted=rooted)
        
            cursor = self.get_cursor()
        
            update_stmt = 'sparql load <file://%s> into <%s>' % (
                os.path.abspath(os.path.join(treestore_dir, 'temp.cdao')), tree_uri)
        
            load_stmt = "ld_dir ('%s', 'temp.cdao', '%s')" % (
                os.path.abspath(treestore_dir), tree_uri)
            print load_stmt
            cursor.execute(load_stmt)
        
            update_stmt = "rdf_loader_run()"
            print update_stmt
            cursor.execute(update_stmt)

            cursor.execute('DELETE FROM DB.DBA.load_list')
        
        else:
            bp.convert(tree_file, format, RDF.Model(self.rdf_store), 'cdao', 
                       tree_uri=tree_uri, context=tree_uri, rooted=rooted)
コード例 #9
0
ファイル: main.py プロジェクト: ragnarlodbrok1992/obiekt122
 def convertTreeFile(self, inputTextEdit, outputTextEdit):
     if self.chosenFileName is '':
         self.showOpenFileDialog(inputTextEdit)
     
     # convert
     if self.chosenInputFormat != self.chosenOutputFormat:
         if self.chosenFileName != '' and self.chosenInputFormat != '':
             
             self.convertedFileName = str(self.chosenFileName).replace(
                 '.' + str(self.chosenInputFormat),'.' + str(self.chosenOutputFormat))
             
             Phylo.convert(str(self.chosenFileName), str(self.chosenInputFormat),
                           self.convertedFileName, str(self.chosenOutputFormat))
             
             f = open(self.convertedFileName, 'r')
     
             with f:        
                 data = f.read()
                 outputTextEdit.setText(data)
コード例 #10
0
	def build_initial_tree(self):
		if os.path.isfile(self.init_tree) == False:
			if self.model == True:
				options = ['-m', 'GTRGAMMA']
			else:
				options = ['-V','-m', 'GTRCAT']
			self.logger.info("Building inital phylogenetic tree...")
			ec = snpiphy.run_command([
									"raxmlHPC-PTHREADS",
									"-T",str(self.threads) ] +
									options +
									[ "-p",str(random.randint(10000,99999)),
									"-#","20",
									"-s",os.path.join(self.core_align, 'core.trimmed.aln'),
									"-n","initial_trees",
									'-w', self.initial_trees
									])
			if ec != 0:
				self.logger.error("RAxML initial tree building has failed.")
				sys.exit(1)
			Phylo.convert(os.path.join(self.initial_trees, "RAxML_bestTree.initial_trees"), 'newick', self.init_tree, 'nexus')
		else:
			self.logger.info("Initial RAxML tree has already been generated. Skipping this step...")
コード例 #11
0
ファイル: buildtrees.py プロジェクト: Klortho/HistoneDB
    def make_trees(self):
        for i, (root, _, files) in enumerate(os.walk(self.seed_directory)):
            core_histone = os.path.basename(root)
            print "Creating tree for", core_histone
            if i==0:
                #Skip parent directory, only allow variant hmms to be built/searched
                continue

            #Combine all varaints for a core histone type into one unaligned fasta file
            combined_seed_file = os.path.join(self.trees_path, "{}.fasta".format(core_histone))
            combined_seed_aligned = os.path.join(self.trees_path, "{}_aligned.fasta".format(core_histone))
            with open(combined_seed_file, "w") as combined_seed:
                for seed in files: 
                    if not seed.endswith(".fasta"): continue
                    for s in SeqIO.parse(os.path.join(self.seed_directory, core_histone, seed), "fasta"):
                        s.seq = s.seq.ungap("-")
                        SeqIO.write(s, combined_seed, "fasta")

            tree = os.path.join(self.trees_path, "{}_aligned.ph".format(core_histone))
            subprocess.call(["muscle", "-in", combined_seed_file, '-out', combined_seed_aligned])
            subprocess.call(["clustalw2", "-infile={}".format(combined_seed_aligned), '-tree'])
            Phylo.convert(tree, 'newick',
                          os.path.join(self.trees_path, "{}_no_features.xml".format(core_histone)), 'phyloxml')
コード例 #12
0
    def make_trees(self):
        for i, (root, _, files) in enumerate(os.walk(self.seed_directory)):
            core_histone = os.path.basename(root)
            print("Creating tree for", core_histone)
            if i==0:
                #Skip parent directory, only allow variant hmms to be built/searched
                continue

            #Combine all varaints for a core histone type into one unaligned fasta file
            combined_seed_file = os.path.join(self.trees_path, "{}.fasta".format(core_histone))
            combined_seed_aligned = os.path.join(self.trees_path, "{}_aligned.fasta".format(core_histone))
            with open(combined_seed_file, "w") as combined_seed:
                for seed in files: 
                    if not seed.endswith(".fasta"): continue
                    for s in SeqIO.parse(os.path.join(self.seed_directory, core_histone, seed), "fasta"):
                        s.seq = s.seq.ungap("-")
                        SeqIO.write(s, combined_seed, "fasta")

            tree = os.path.join(self.trees_path, "{}_aligned.ph".format(core_histone))
            subprocess.call(["muscle", "-in", combined_seed_file, '-out', combined_seed_aligned])
            subprocess.call(["clustalw2", "-infile={}".format(combined_seed_aligned), '-tree'])
            Phylo.convert(tree, 'newick',
                          os.path.join(self.trees_path, "{}_no_features.xml".format(core_histone)), 'phyloxml')
コード例 #13
0
 def test_convert(self):
     """Convert a tree between all supported formats."""
     mem_file_2 = StringIO()
     mem_file_3 = StringIO()
     Phylo.convert(EX_NEWICK, 'newick', self.mem_file, 'nexus')
     self.mem_file.seek(0)
     Phylo.convert(self.mem_file, 'nexus', mem_file_2, 'phyloxml')
     mem_file_2.seek(0)
     Phylo.convert(mem_file_2, 'phyloxml', mem_file_3, 'newick')
     mem_file_3.seek(0)
     tree = Phylo.read(mem_file_3, 'newick')
     self.assertEqual(len(tree.get_terminals()), 28)
コード例 #14
0
ファイル: test_Phylo.py プロジェクト: zellera93/biopython
 def test_convert(self):
     """Convert a tree between all supported formats."""
     mem_file_1 = StringIO()
     mem_file_2 = BytesIO()
     mem_file_3 = StringIO()
     Phylo.convert(EX_NEWICK, 'newick', mem_file_1, 'nexus')
     mem_file_1.seek(0)
     Phylo.convert(mem_file_1, 'nexus', mem_file_2, 'phyloxml')
     mem_file_2.seek(0)
     Phylo.convert(mem_file_2, 'phyloxml', mem_file_3, 'newick')
     mem_file_3.seek(0)
     tree = Phylo.read(mem_file_3, 'newick')
     self.assertEqual(len(tree.get_terminals()), 28)
コード例 #15
0
 def test_convert(self):
     """Convert a tree between all supported formats."""
     mem_file_1 = StringIO()
     mem_file_2 = StringIO()
     mem_file_3 = StringIO()
     Phylo.convert(EX_NEWICK, "newick", mem_file_1, "nexus")
     mem_file_1.seek(0)
     Phylo.convert(mem_file_1, "nexus", mem_file_2, "phyloxml")
     mem_file_2.seek(0)
     Phylo.convert(mem_file_2, "phyloxml", mem_file_3, "newick")
     mem_file_3.seek(0)
     tree = Phylo.read(mem_file_3, "newick")
     self.assertEqual(len(tree.get_terminals()), 28)
コード例 #16
0
ファイル: test_Phylo.py プロジェクト: kehey/biopython
 def test_convert(self):
     """Convert a tree between all supported formats."""
     mem_file_1 = StringIO()
     mem_file_2 = StringIO()
     mem_file_3 = StringIO()
     Phylo.convert(EX_NEWICK, "newick", mem_file_1, "nexus")
     mem_file_1.seek(0)
     Phylo.convert(mem_file_1, "nexus", mem_file_2, "phyloxml")
     mem_file_2.seek(0)
     Phylo.convert(mem_file_2, "phyloxml", mem_file_3, "newick")
     mem_file_3.seek(0)
     tree = Phylo.read(mem_file_3, "newick")
     self.assertEqual(len(tree.get_terminals()), 28)
コード例 #17
0
 def test_convert(self):
     """Convert a tree between all supported formats."""
     mem_file_1 = StringIO()
     mem_file_3 = StringIO()
     if sys.version_info[0] == 3:
         from io import BytesIO
         mem_file_2 = BytesIO()
     else:
         mem_file_2 = StringIO()
     Phylo.convert(EX_NEWICK, 'newick', mem_file_1, 'nexus')
     mem_file_1.seek(0)
     Phylo.convert(mem_file_1, 'nexus', mem_file_2, 'phyloxml')
     mem_file_2.seek(0)
     Phylo.convert(mem_file_2, 'phyloxml', mem_file_3, 'newick')
     mem_file_3.seek(0)
     tree = Phylo.read(mem_file_3, 'newick')
     self.assertEqual(len(tree.get_terminals()), 28)
コード例 #18
0
ファイル: PrepareTree.py プロジェクト: marghederiu/protwis
    def treeDo(self,
               d,
               infile,
               branches,
               family,
               Additional_info,
               famdict=None):
        self.famdict = famdict
        d = '/'.join(d.split('/'))
        z = infile
        w = open(d + '/rong', 'w').write(z.replace('-', ''))
        raw = open(d + '/raw.xml', 'w')
        Phylo.convert(d + '/rong', 'newick', raw, 'phyloxml')
        raw.close()
        xml = open(d + '/raw.xml', 'r').readlines()
        out = open(d + '/out.xml', 'w')
        self.get_tree_data(Additional_info)
        if self.build != False:
            self.rings['class']['include'] = False
            #self.rings['ligand']['include']=False
            #self.rings['family']['include']=False
        self.get_family_meta(family)
        charts = self.get_charts()
        self.get_colours()
        self.build_legend()
        self.get_styles()
        flag = False
        flag2 = ''
        stylesflag = False
        for line in xml:
            if stylesflag == True:
                out.write("<render>" + charts + "<styles>" + self.styles +
                          "</styles></render>")
                stylesflag = False
            ################# Remove header trash #######################
            if 'phyloxml' in line:
                line = line.split('phyloxml')[0] + 'phyloxml>'
            line = line.replace('\"', '\'').replace('phy:', '')
            ################# Remove forced rooting #####################
            if flag == True:
                if '>1.0<' in line:
                    line = line.replace('>1.0<', '>0.0<')
                    flag = False
            if "rooted='false'" in line:
                flag = True
                stylesflag = True
            ################# Force even branch lengths #################
            if branches == True:
                if '<branch_length>' in line:
                    if '<branch_length>0.0</branch_length>' not in line:
                        number = line.split('>')[1].split('<')[0]
                        line = line.replace(number, '0.1')
            ################# Reformat names ############################
            if '<name>' in line:
                name = line.split('<')[1].split('>')[1]
                chart = '<chart>'
                for ring in self.rings:
                    if self.rings[ring]['include'] == 'True':
                        if self.rings[ring]['color_type'] == 'single':
                            if self.prots[name]['acc'] in self.rings[ring][
                                    'items']:
                                chart += '<%s>%s_true</%s>' % (ring, ring,
                                                               ring)
                            else:
                                chart += '<%s>%s_false</%s>' % (ring, ring,
                                                                ring)
                        else:
                            chart += '<' + ring + '>' + self.prots[name][
                                ring] + '</' + ring + '>'
                chart += '</chart>'
                flag2 = [name, chart]
                line = line.replace(name, self.prots[name]['name']).replace(
                    '<name', "<name bgStyle='%s'" % self.prots[name]['acc'])
            ############## Add annotations and descriptions #############
            if '<branch_length>' in line:
                line = line.replace('>1E05<', '>0.00001<').replace('-', '')
                if '>0.0<' in line and flag == True:
                    line = line.replace('>0.0<', '>0.00001<')
                if flag2 != '':
                    line = line.strip(
                        '\n'
                    ) + ' <annotation><desc>' + self.prots[
                        flag2[0]]['desc'] + ' (' + self.prots[flag2[0]][
                            'species'] + ')' + '</desc><uri>/protein/' + self.prots[
                                flag2[0]][
                                    'link'] + '</uri> </annotation>' + flag2[1]
                    flag2 = ''
            out.write(line)

        self.box = self.drawColorPanel()
コード例 #19
0
parser.add_argument(
    "i",
    help = 'input tree file')

parser.add_argument(
    "-o", help = 'output tree file',
    type=str,
    default='output.tre')

parser.add_argument(
    "-formatIn", help = 'input tree format',
    type=str,
    default='newick')

parser.add_argument(
    "-formatOut", help = 'tree format for output',
    type=str,
    default='nexus')

args = parser.parse_args()


if1 = args.i
if2 = args.formatIn
of1 = args.o
of2 = args.formatOut

Phylo.convert(if1, if2, of1, of2)

コード例 #20
0
from Bio import Phylo

tree = Phylo.read("simple.dnd", "newick")
print(tree)

Phylo.draw_ascii(tree)

tree2 = Phylo.read("int_node_labels.nwk", "newick")
Phylo.draw_ascii(tree2)

Phylo.convert("int_node_labels.nwk", "newick", "tree.xml", "phyloxml")
trees = Phylo.parse("tree.xml", "phyloxml")
for t in trees:
    print(t)

from Bio.Phylo.PhyloXML import Phylogeny

treep = Phylogeny.from_tree(tree)
Phylo.draw(treep)

treep.root.color = "gray"
mrca = treep.common_ancestor({"name": "E"}, {"name": "F"})
mrca.color = "salmon"
treep.clade[0, 1].color = "blue"
Phylo.draw(treep)
コード例 #21
0
def map_seqs(record_list,
             tree_file,
             file_format,
             subset_size,
             overlapping,
             binary='dcm'):
    """
    Generate a map of the sequences in sets, of at most 'subset_size', with the
    specified overlapping using the padded-Recursive-DMC3 decomposition (PRD)
    from DACTAL system. If 'tree_file' contains a relative path, the current
    working directory will be used to get the absolute path.

    Arguments :
        record_list  ( list )
            List of SeqRecord objects (from Biopython).
        tree_file  ( string )
            Input tree file.
        file_format  ( string )
            Tree file format.
        subset_size  ( int )
            Maximum subset size.
        overlapping  ( int )
            Number of overlapping sequences between any two resultant subsets.
        binary  ( Optional[string] )
            Name or path of the DCM binary file.

    Returns :
        dict
            Dictionary with the set identifiers as keys and the corresponding
            sequences as values in lists of SeqRecord objects.

    Raises :
        ValueError
            When 'subset_size' < (4 * 'overlapping').
        RuntimeError
            If the call to the dcm command raises an exception.
        IOError
            If the dcm tool can't generate a decomposition for the 'subset_size'
            and 'overlapping' values given.

    * The tree file format must be supported by Bio.Phylo.
    """
    if (subset_size < (4 * overlapping)):
        raise ValueError('The maximum subset size must be greater than or ' \
                         'equal to 4 times the overlapping value')
    # else : # subset_size >= (4 * overlapping)
    # If the input file format is not supported by the PRD process, convert it
    # to a temporary supported file
    infile_path = get_abspath(tree_file)
    if (file_format.lower() != 'newick'):
        tmpfile = tempfile.NamedTemporaryFile()
        Phylo.convert(infile_path, file_format, tmpfile.name, 'newick')
        infile_path = tmpfile.name
    # The first decomposition process will be always executed, so there is no
    # need to overload this stage with the multiprocess generation
    set_list, further_decomp = _prd_decomposition(infile_path, subset_size,
                                                  str(overlapping), binary)
    # Parallelization of the recursive decomposition of the different subtrees.
    # All new subtrees are attached to 'further_decomp' file list so we can
    # launch at most one process per core, speeding up the whole process
    start = 0
    to_process = len(further_decomp[start:])
    pool = multiprocessing.Pool(processes=NUMCORES)
    while (to_process > 0):
        end = start + min(to_process, NUMCORES)
        results = [
            pool.apply_async(_prd_decomposition,
                             args=(
                                 further_decomp[i],
                                 subset_size,
                                 str(overlapping),
                                 binary,
                             )) for i in range(start, end)
        ]
        # Collect the results of all the processes launched
        for pool_result in results:
            output = pool_result.get()
            set_list += output[0]
            further_decomp += output[1]
        start = end
        to_process = len(further_decomp[start:])
    # Remove all the temporal files created for the multirpocessing stage
    for file_path in further_decomp:
        os.remove(file_path)
    record_dict = {record.id: record for record in record_list}
    # Map all the resultant sets with an unique set id and replace the sequence
    # ids by their corresponding Bio.SeqRecord object
    set_dict = {}
    num_zeros = len(str(len(set_list)))
    for index, seq_id_list in enumerate(set_list, 1):
        set_id = 'prdset{}'.format(str(index).zfill(num_zeros))
        set_dict[set_id] = []
        for seq_id in seq_id_list:
            set_dict[set_id].append(record_dict[seq_id])
    return (set_dict)
コード例 #22
0
def reconcile_tree(gene_tree_file, reconciled_file, rec_tag, pfam_id, db):
    if (os.path.isfile(rec_tag + 'ids.pickle')) and (pplacer_flag == 1):
        id_information = pickle.load(open(rec_tag + 'ids.pickle', 'rb'))
        existing_genes = id_information['existing_genes']
        Sequnces = []
        p_ids = []
        new_genes = set([w['id'] for w in pplacer_queries[pfam_id]])
        if not (new_genes - set(existing_genes)):
            print "All %s Genes for family %s have already been placed in the reconciled tree." % (
                len(new_genes), pfam_id)
            print "Skip Reconciliation for %s" % pfam_id
            return

    txid_file = rec_tag + 'txid.xml'
    if not (os.path.isfile(rec_tag + 'ids.pickle')) or not (
            os.path.isfile(reconciled_file + '.gz')) or (pplacer_flag == 1):
        print "Running Reconciliation for: %s" % pfam_id

        rand_id = random.randint(1000000, 9999999)
        subprocess.check_call("gunzip -c %s/%s.nw.gz > %s.%d" %
                              (tree_folder, pfam_id, gene_tree_file, rand_id),
                              shell=True)
        tree = ete2.PhyloTree('%s.%d' % (gene_tree_file, rand_id), format=0)
        tree.resolve_polytomy()
        tree.write(format=0, outfile=txid_file + '.tmp.nw')
        if os.path.exists('%s.%d' % (gene_tree_file, rand_id)):
            subprocess.check_call("rm  %s.%d" % (gene_tree_file, rand_id),
                                  shell=True)

        Phylo.convert(txid_file + '.tmp.nw', 'newick', txid_file + '.tmp.xml',
                      'phyloxml')
        treexml = PhyloXMLIO.read(open(txid_file + '.tmp.xml', 'r'))
        tree = treexml[0]
        treexml.attributes.pop('schemaLocation',
                               None)  # not supported by Forester
        tree.rooted = True
        my_ids = set([])
        my_query_by_taxid = {}
        for leaf in tree.clade.find_clades(terminal=True):
            up_name = leaf.name.split('/')[0]
            tax_id, tax_name = find_tax_id_unip(up_name, db)
            if tax_id not in all_species_txids:
                if tax_id in merged_taxid.keys():
                    tax_id = merged_taxid[tax_id]
                    tax_name = find_tax_name(tax_id, db)
                if tax_id in best_taxid_map.keys():
                    tax_id = best_taxid_map[tax_id]
                    tax_name = find_tax_name(tax_id, db)
                else:
                    tax_id0 = tax_id
                    tax_id, tax_name = find_best_taxid(tax_id, db)
                    if tax_id > 0:
                        best_taxid_map[tax_id0] = tax_id
            if tax_id < 0:
                if (-tax_id) in merged_taxid.keys():
                    tax_id = merged_taxid[-tax_id]
                    tax_name = find_tax_name(tax_id, db)
            if tax_id in my_query_by_taxid:
                my_query_by_taxid[tax_id].append(up_name)
            else:
                my_query_by_taxid[tax_id] = [up_name]
            my_ids.add(tax_id)
            my_tax_id = PhyloXML.Id(tax_id, provider='ncbi_taxonomy')
            taxon = PhyloXML.Taxonomy(id=my_tax_id)
            taxon.scientific_name = tax_name
            leaf._set_taxonomy(taxon)
        PhyloXMLIO.write(treexml, open(txid_file, 'w'))
        os.system('rm ' + txid_file + '.tmp.nw')
        os.system('rm ' + txid_file + '.tmp.xml')
        print "Taxid file done for: %s" % pfam_id
        existing_ids = list(set(my_ids) & set(all_species_txids))
        existing_genes = [
            g for txid in my_query_by_taxid.keys()
            for g in my_query_by_taxid[txid] if txid in existing_ids
        ]
        pickle.dump(
            {
                'pfam_id': pfam_id,
                'existing_ids': existing_ids,
                'existing_genes': existing_genes
            }, open(rec_tag + 'ids.pickle', 'wb'))
        print "Pickle file done for: %s" % pfam_id

    if os.path.exists(reconciled_file):
        os.system('rm ' + reconciled_file)
    os.system(
        "java -Xmx4g -cp %s/forester_1038.jar org.forester.application.gsdi -g %s %s/ncbi_2_fixed.xml %s"
        % (lib_path, txid_file, species_tree_data_path, reconciled_file))
    if os.path.exists(reconciled_file):
        if os.path.exists(reconciled_file + '.gz'):
            subprocess.check_call("rm  %s.gz" % (reconciled_file), shell=True)
        subprocess.check_call("gzip %s" % (reconciled_file), shell=True)
    os.system('rm ' + rec_tag + 'reconciled_species_tree_used.xml')
    os.system('rm ' + rec_tag + 'reconciled_gsdi_log.txt')
    os.system('rm ' + txid_file)
    print "Reconciliation file done for: %s" % pfam_id
コード例 #23
0
...     print(tree.name)


from cStringIO import StringIO

treedata = "(A, (B, C), (D, E))"
handle = StringIO(treedata)
tree = Phylo.read(handle, "newick")

#tree = Phylo.read(StringIO("(A, (B, C), (D, E))"), "newick")

tree1 = Phylo.read('example1.xml', 'phyloxml')
tree2 = Phylo.read('example2.xml', 'phyloxml')
Phylo.write([tree1, tree2], 'example-both.xml', 'phyloxml')

Phylo.convert('example.nhx', 'newick', 'example2.nex', 'nexus')

tree = Phylo.parse('phyloxml_examples.xml', 'phyloxml').next()
print(tree)
Phylogeny(description='phyloXML allows to use either a "branch_length"
attribute or element to indicate branch lengths.', name='example from
Prof. Joe Felsenstein s book "Inferring Phylogenies"')
    Clade()
        Clade(branch_length=0.06)
            Clade(branch_length=0.102, name='A')
            Clade(branch_length=0.23, name='B')
        Clade(branch_length=0.4, name='C')
...
<img src="Phylo-draw-apaf1.png" title="fig:Rooted phylogram, via Phylo.draw" alt="Rooted phylogram, via Phylo.draw" width="500" />

tree = Phylo.read('apaf.xml', 'phyloxml')
コード例 #24
0
    sys.stdout.flush()
    
    start_time = time.time()
    t.add_trees('tests/bird%s.new' % s, 'newick', 'test%s' % s)
    add_times[n] = time.time() - start_time
    print '\t', ti(add_times[n]),
    sys.stdout.flush()

    start_time = time.time()
    tree = t.serialize_trees('test%s' % s)
    retrieve_times[n] = time.time() - start_time
    print '\t', ti(retrieve_times[n]),
    sys.stdout.flush()

    start_time = time.time()
    bp.convert('tests/bird%s.new' % s, 'newick', 'tests/bird%s.cdao' % s, 'cdao')
    write_times[n] = time.time() - start_time
    print '\t', ti(write_times[n]),
    sys.stdout.flush()

    stringio = StringIO()
    start_time = time.time()
    bp.write(bp.read('tests/bird%s.cdao' % s, 'cdao'), stringio, 'newick')
    parse_times[n] = time.time() - start_time
    print '\t', ti(parse_times[n])
    sys.stdout.flush()

data = {}
for term in ('add', 'retrieve', 'write', 'parse'):
    data[term] = eval('%s_times' % term)
コード例 #25
0
import sys
from Bio import SeqIO, AlignIO, Phylo
from Bio.Alphabet import generic_protein, generic_dna

options = sys.argv[1:]
incheck = options[0]
infile = options[1]
outfile = options[2]
intype = options[3]
outtype = options[4]

if incheck == 'seq':
    SeqIO.convert(infile, intype, outfile, outtype, generic_dna)
elif incheck == 'align':
    AlignIO.convert(infile, intype, outfile, outtype, generic_dna)
elif incheck == 'tree':
    Phylo.convert(infile, intype, outfile, outtype)
コード例 #26
0
ファイル: nexus2newick.py プロジェクト: wangdi2014/bioconvert
 def _method_biopython(self, *args, **kwargs):
     _log.warning("biopython methods rounds up values (5 digits)")
     from Bio import Phylo
     Phylo.convert(self.infile, "nexus", self.outfile, "newick")
コード例 #27
0
ファイル: bayesian.py プロジェクト: Chamalie-UOM/BackEnd
    def bayesian(self, file_name, data_type):

        nex_file = self.converter(file_name, data_type)
        base = os.path.splitext(file_name)[0]
        bat_file = base + '_batch.txt'
        f = open(bat_file, "w+")

        if data_type == 'DNA':
            seq = [
                'set autoclose=yes nowarn=yes\n',
                'set usebeagle=yes beagledevice=cpu beagleprecision=double\n',
                'set  beaglescaling=dynamic beaglesse=yes\n',
                'execute ' + base + '.nex\n', 'lset nst=6 rates=gamma\n',
                'mcmc ngen=10000 savebrlens=no samplefreq=10\n',
                'sump burnin = 250\n', 'sumt burnin = 250\n', 'quit'
            ]
        else:
            seq = [
                'set autoclose=yes nowarn=yes\n',
                'set usebeagle=yes beagledevice=gpu\n',
                'set beagleprecision=single beaglescaling=dynamic\n',
                'execute ' + base + '.nex\n', 'lset nst=6 rates=gamma\n',
                'mcmc ngen=10000 savebrlens=no samplefreq=10\n',
                'sump burnin = 250\n', 'sumt burnin = 250\n', 'quit'
            ]

        f.writelines(seq)
        f.close()

        batch_file = bat_file
        # start_time = time.time()
        mrbayes_cline = MrBayesCommandline(execute=batch_file,
                                           log='log.txt',
                                           end='')
        # print(mrbayes_cline)

        stdout, stderr = mrbayes_cline()
        # end_time = time.time()

        # print('time taken', (end_time - start_time))
        os.remove(bat_file)
        os.rename(base + '.nex.con.tre', base + '_tree.nexus')

        tree_file = base + '_tree.nexus'

        with open(
                tree_file,
                'r',
        ) as file:  # file name
            article_text = file.read()
            article_text = re.sub(r'\[&prob=.*?\]:', ':', article_text)
            article_text = re.sub(r'\[&length_mean=.*?}\]', '', article_text)
        # print(article_text)

        with open(
                tree_file,
                'w',
        ) as f:
            f.write(article_text)
            f.close()

        tree_base = os.path.splitext(tree_file)[0]

        Phylo.convert(tree_file, 'nexus', tree_base + '_bayesian.nw', 'newick')
        os.remove(tree_file)
        for filename in glob.glob(base + '.nex*'):
            os.remove(filename)
コード例 #28
0
from Bio import Phylo
import sys

infile = sys.argv[1]
intype = sys.argv[2]
outfile = sys.argv[3]
outtype = sys.argv[4]

Phylo.convert(infile,intype, outfile, outtype)
コード例 #29
0
def reconcile_tree(gene_tree_file,reconciled_file,rec_tag,pfam_id,db):
    if (os.path.isfile(rec_tag+'ids.pickle')) and  (pplacer_flag==1): 
        id_information = pickle.load(open(rec_tag+'ids.pickle', 'rb'))      
        existing_genes=id_information['existing_genes']
        Sequnces=[]
        p_ids=[]
        new_genes=set([w['id'] for w in pplacer_queries[pfam_id]])
        if not (new_genes-set(existing_genes)):
            print "All %s Genes for family %s have already been placed in the reconciled tree."%(len(new_genes),pfam_id)
            print "Skip Reconciliation for %s"%pfam_id
            return

    txid_file=rec_tag+'txid.xml'       
    if not(os.path.isfile(rec_tag+'ids.pickle')) or not(os.path.isfile(reconciled_file+'.gz')) or  (pplacer_flag==1): 
        print "Running Reconciliation for: %s"%pfam_id
        
        rand_id=random.randint(1000000,9999999)        
        subprocess.check_call("gunzip -c %s/%s.nw.gz > %s.%d"%(tree_folder,pfam_id,gene_tree_file,rand_id),shell=True)
        tree = ete2.PhyloTree('%s.%d'%(gene_tree_file,rand_id), format=0)
        tree.resolve_polytomy()
        tree.write(format=0, outfile=txid_file+'.tmp.nw')
        if os.path.exists('%s.%d'%(gene_tree_file,rand_id)):
            subprocess.check_call("rm  %s.%d"%(gene_tree_file,rand_id),shell=True)

        Phylo.convert(txid_file+'.tmp.nw', 'newick', txid_file+'.tmp.xml', 'phyloxml')
        treexml = PhyloXMLIO.read(open(txid_file+'.tmp.xml','r'))
        tree = treexml[0]
        treexml.attributes.pop('schemaLocation', None)  # not supported by Forester
        tree.rooted = True
        my_ids=set([])
        my_query_by_taxid={}
        for leaf in tree.clade.find_clades(terminal=True):
            up_name = leaf.name.split('/')[0]
            tax_id,tax_name=find_tax_id_unip(up_name,db)
            if tax_id not in all_species_txids:
                if tax_id in merged_taxid.keys():
                    tax_id=merged_taxid[tax_id]
                    tax_name=find_tax_name(tax_id,db)
                if tax_id in best_taxid_map.keys():
                    tax_id=best_taxid_map[tax_id]
                    tax_name=find_tax_name(tax_id,db)
                else:
                    tax_id0=tax_id
                    tax_id,tax_name=find_best_taxid(tax_id,db)
                    if tax_id>0:
                        best_taxid_map[tax_id0]=tax_id
            if tax_id<0:
                if (-tax_id) in merged_taxid.keys():
                    tax_id=merged_taxid[-tax_id]
                    tax_name=find_tax_name(tax_id,db)
            if tax_id in my_query_by_taxid:
               my_query_by_taxid[tax_id].append(up_name)
            else:
               my_query_by_taxid[tax_id]=[up_name]
            my_ids.add(tax_id)
            my_tax_id = PhyloXML.Id(tax_id, provider='ncbi_taxonomy')
            taxon=PhyloXML.Taxonomy(id=my_tax_id)
            taxon.scientific_name = tax_name
            leaf._set_taxonomy(taxon)
        PhyloXMLIO.write(treexml, open(txid_file,'w'))    
        os.system('rm '+txid_file+'.tmp.nw')
        os.system('rm '+txid_file+'.tmp.xml')
        print "Taxid file done for: %s"%pfam_id
        existing_ids=list(set(my_ids)&set(all_species_txids))
        existing_genes=[g for txid in my_query_by_taxid.keys() for g in my_query_by_taxid[txid] if txid in existing_ids]        
        pickle.dump({'pfam_id':pfam_id,'existing_ids':existing_ids,'existing_genes':existing_genes}, open(rec_tag+'ids.pickle', 'wb'))      
        print "Pickle file done for: %s"%pfam_id
        
       
    if os.path.exists(reconciled_file):
        os.system('rm '+reconciled_file)
    os.system("java -Xmx4g -cp %s/forester_1038.jar org.forester.application.gsdi -g %s %s/ncbi_2_fixed.xml %s"%(lib_path, txid_file, species_tree_data_path, reconciled_file))
    if os.path.exists(reconciled_file):
        if os.path.exists(reconciled_file+'.gz'):
            subprocess.check_call("rm  %s.gz"%(reconciled_file),shell=True)
        subprocess.check_call("gzip %s"%(reconciled_file),shell=True)
    os.system('rm '+rec_tag+'reconciled_species_tree_used.xml')
    os.system('rm '+rec_tag+'reconciled_gsdi_log.txt')
    os.system('rm '+txid_file)
    print "Reconciliation file done for: %s"%pfam_id
コード例 #30
0
    def collapsing_nodes(self, 
                    collapse_node_by_branch=True,
                    collapse_node_by_pattern=True,
                    collapse_branch_len=0):

        def mean(array):
            return sum(array)/float(len(array))

        def cache_distances(tree):
            ''' precalculate distances of all nodes to the root''' 
            node2rootdist = {tree: 0}
            for node in tree.iter_descendants('preorder'):
                node2rootdist[node] = node.dist + node2rootdist[node.up]
                node.add_features(dist=node2rootdist[node])
            return node2rootdist


        def cache_heatmap(tree):
            ''' precalculate heatmap patterns of all nodes ''' 
            node2pattern = {tree: 'X'}
            df = self.mt_df

            for node in tree.iter_descendants('postorder'):
                if node.is_leaf():
                    ptn = 'X'
                    try:
                        name = node.name
                        if '=' in node.name:
                            name = node.name.split('=')[0]
                        text = "".join(df[name].astype(str).to_list())
                        if text:
                            ptn = text
                    except:
                        pass

                    node2pattern[node] = ptn
                    node.add_features(ptn = ptn)

                ptn = node2pattern[node]

                if node.up in node2pattern:
                    if node2pattern[node.up]:
                        # if the node has a different pattern, change concensus pattern to 'X'.
                        if node2pattern[node.up] != ptn:
                            node2pattern[node.up] = 'X'
                    else:
                        node2pattern[node.up] = ptn
                else:
                    # init pattern of internal node
                    node2pattern[node.up] = ptn

            return node2pattern


        def collapse_by_len(tree, min_dist):
            # cache the tip content of each node to reduce the number of times the tree is traversed
            node2tips = tree.get_cached_content()
            root_distance = cache_distances(tree)

            for node in tree.get_descendants('preorder'):
                if not node.is_leaf():
                    avg_distance_to_tips = mean([root_distance[tip]-root_distance[node]
                                                for tip in node2tips[node]])

                    if avg_distance_to_tips <= min_dist:
                        # rename
                        node.name += '='.join([tip.name for tip in node2tips[node]])
                        # label
                        node.add_features(collapsed_dist=True)


        def collapse_by_pattern(tree):
            # cache the tip content of each node to reduce the number of times the tree is traversed
            node2tips = tree.get_cached_content()
            node_pattern = cache_heatmap(tree)
            
            for node in tree.get_descendants('preorder'):
                if not node.is_leaf():
                    if node_pattern[node] != 'X':
                        # rename
                        node.name += '='.join([tip.name for tip in node2tips[node]])
                        # label
                        node.add_features(collapsed_ptn=True)


        # increase recursion limit
        sys.setrecursionlimit(100000)
        t = self.tree_obj
        R = t.get_midpoint_outgroup()
        t.set_outgroup(R)

        if collapse_node_by_branch:
            # label nodes that will be collapsed
            collapse_by_len(t, collapse_branch_len)
            # collapsed nodes are labeled, so you locate them and prune them
            for n in t.search_nodes(collapsed_dist=True):
                for ch in n.get_children():
                    ch.detach()

        if collapse_node_by_pattern:
            # label nodes that will be collapsed
            collapse_by_pattern(t)
            # collapsed nodes are labeled, so you locate them and prune them
            for n in t.search_nodes(collapsed_ptn=True):
                for ch in n.get_children():
                    ch.detach()

        # write to 
        t.write(outfile=f'{self.xml_tree}.temp_newick', format=5)
        proc_tree = f'{self.xml_tree}.temp_newick'            

        # converting to phyxml format
        Phylo.convert(proc_tree, 'newick', self.xml_tree, 'phyloxml')
        
        # parse phyloXML tree to DOM
        self.dom = minidom.parse(self.xml_tree)
コード例 #31
0
ファイル: import_tree.py プロジェクト: WebValley2014/DataViz
	ts.layout_fn = my_layout # Use custom layout
	return ts
#


# ts = TreeStyle()
ts = build_vis()
# ts.show_leaf_name = True
# ts.mode = "c"
# ts.arc_start = -180 # 0 degrees = 3 o'clock
# ts.arc_span = 180

for node in t.traverse("postorder"):
	if node.name == "NoName":
		pass
	else:
	# print node
		temp = node.name
  		node.name = node.alias
  		node.alias = temp

t.write(features=["name", "dist", "alias"], outfile="prova2.nw", format=1)
A = tree_to_phyloxml(t)
text_file = open("prova.xml", "w")
text_file.write(A)
text_file.close()
from Bio import Phylo
Phylo.convert('prova2.nw', 'newick', 'prova3.xml', 'phyloxml')

t.show(tree_style=ts)
コード例 #32
0
__author__ = 'Lovecraft'
#!/usr/bin/python

import sys
filename = sys.argv[1]

from Bio import Phylo

Phylo.convert(filename, 'newick', filename + ".tre", 'nexus')
コード例 #33
0
	def filter_recombinant_positions(self):
		if os.path.isfile(self.init_filtered_tree) == False:
			if self.model == True:
				options = ['-r', 'GTRGAMMA']
			else:
				options = []
			with snpiphy.cd(self.recomb_filter):
				self.logger.info("Scanning and filtering recombination positions with gubbins...")
				if self.tree_builder == 'fasttree':
					ec = snpiphy.run_command([
											"run_gubbins.py",
											"-v", 
											'--tree_builder', 'fasttree', 
											'-s', self.init_tree,
											] +
											[ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), 
											'-c', str(self.threads), 
											os.path.join(self.core_align, 'core.trimmed.aln')
											])
					if ec != 0:
						self.logger.error("Running gubbins using fasttree method has failed. Please examine your alignment or consider removing highly divergent sequences. Additionally consider using a different reference sequence.")
						sys.exit(1)
				else:
					ec = snpiphy.run_command([
											"run_gubbins.py",
											"-v"] +
											'-s', self.init_tree,
											options +
											[ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'),
											'-c', str(self.threads),
											os.path.join(self.core_align, 'core.trimmed.aln')
											])
					if ec != 0:
						self.logger.warn("Recombination filtering using the RAxML only method has failed. Retrying with FastTree for first iteration.")
						for file in os.listdir(self.recomb_filter):
							if file.startswith('core.trimmed.aln.'):
								os.remove(file)
						ec = snpiphy.run_command([
												"run_gubbins.py",
												"-v", 
												'--tree_builder', 'hybrid', 
												'-s', self.init_tree,
												] +
												options +
												[ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), 
												'-c', str(self.threads), 
												os.path.join(self.core_align, 'core.trimmed.aln')
												])
						if ec != 0:
							self.logger.warn("Recombination filtering using hybrid RAxML/FastTree method has failed. Retrying with FastTree for all iterations.")
							for file in os.listdir(self.recomb_filter):
								if file.startswith('core.trimmed.aln.'):
									os.remove(file)
							ec = snpiphy.run_command([
													"run_gubbins.py",
													"-v", 
													'--tree_builder', 'fasttree', 
													'-s', self.init_tree,
													] +
													[ '-p', os.path.join(self.recomb_filter, 'filtered_core_aln'), 
													'-c', str(self.threads), 
													os.path.join(self.core_align, 'core.trimmed.aln')
													])
							if ec != 0:
								self.logger.error("Running gubbins using all methods have failed. Please examine your alignment or consider removing highly divergent sequences. Additionally consider using a different reference sequence.")
								sys.exit(1)
			Phylo.convert(os.path.join(self.recomb_filter, "filtered_core_aln.final_tree.tre"), 'newick', self.init_filtered_tree, 'nexus')
			self.logger.info("Recombination filtering by gubbins has completed successfully.")
		else:
			self.logger.info("Recombination filtering by gubbins has already been done. Skipping this step...")
コード例 #34
0
ファイル: Spetree.py プロジェクト: yuzhenpeng/TREEasy
def run_Spetree(pwd, species_namefile, gene_namefile, boot_value, roottaxon, Net_num, cross_value, thread_number,
                message_queue=None):
    os.system('cat %saln_seqs/*.contree > %sall_iqtree.contree' % (pwd, pwd))

    infile = open(pwd + "all_iqtree.contree", 'r')

    ###change gene name#####
    global control_mpest
    global taxonmap_phylonet
    control_mpest = ['', 0, 0]
    n_gene = 0
    taxonmap_phylonet = ['']
    dic_name = {}
    dic_name = gene_namechange(pwd, gene_namefile)
    infile = open(pwd + 'all_iqtree_spename.contree', 'r')

    outfile_btstraped = open(pwd + 'all_iqtree_btstraped.txt', 'w')  # Input For ASTRAL, SNAQ
    outfile_rooted = open(pwd + 'all_iqtree_rooted.txt', 'w')  # Input For MPEST,PhyloNet
    outfile_namechange_non_branch = open(pwd + 'all_iqtree_namechange_nonbranch.txt', 'w')  # Input For STELLS2

    ###check bootstrap and rootted tree###
    for line in infile:
        t = Tree(line)
        if bootstrap_check(t, boot_value) == False:
            continue
        n_gene += 1
        s = t.write(format=3)
        s = s.replace('NoName', '')
        outfile_btstraped.write(s + '\n')
        print line
        if "," in roottaxon:
            root_taxon = roottaxon.strip().split(',')
            root_taxon = t.get_common_ancestor(root_taxon)
            try:
                t.set_outgroup(root_taxon)
            except:
                continue
        else:
            t.set_outgroup(t & roottaxon)
        s = t.write(format=3)
        s = s.replace('NoName', '')
        outfile_rooted.write(s + '\n')
        s = t.write(format=8)
        s = s.replace('NoName', '')
        outfile_namechange_non_branch.write(spe_namechange(species_namefile, s) + '\n')
    control_mpest[2] = n_gene
    outfile_btstraped.close()
    outfile_rooted.close()
    outfile_namechange_non_branch.close()

    ######inputs for species tree inferring######

    ####Concat RUNNING####
    random_seed_number = randint(0, 2**32)
    for key in dic_name:
        os.system(
            """ sed -i "s/%s/%s/g" `grep "%s" -rl %saln_seqs/*.aln` """ % (">" + key, ">" + dic_name[key], ">" + key, pwd))

    os.system("rm -rf %sCONCAT" % (pwd))
    os.system("mkdir %sCONCAT" % (pwd))
    os.system("python AMAS.py concat -i %saln_seqs/*.aln -f fasta -d dna" % (pwd))
    os.system("mv concatenated.out %sCONCAT/" % (pwd))
    os.system("iqtree -s %s -bb 1000 -redo -nt %d -m MFP -seed %d 1>%s_ML_iqtree.log" % (
    pwd + "CONCAT/concatenated.out", thread_number, random_seed_number, pwd + "CONCAT/concatenated.out"))

    if message_queue:
        message_queue.put("Raxml running...")

    ####Raxml RUNNING####
    os.system("raxml-ng --all --msa %s  --model GTR+G+FO --tree pars{10} --bs-trees autoMRE{1000} --seed %d --threads %d --prefix %s_raxml_ng --bs-cutoff 0.01" %(pwd+"CONCAT/concatenated.out", random_seed_number,thread_number,pwd + "CONCAT/concatenated.out"))

    if message_queue:
        message_queue.put("ASTRAL running...")

    ###ASTRAL RUNNING###
    os.system('rm -rf %sASTRAL' % (pwd))
    os.system('mkdir %sASTRAL' % (pwd))
    outfile = open(pwd + 'ASTRAL/species_name_ASTRAL.txt', 'w')
    outfile.write(control_mpest[0])
    outfile.close()
    os.system(
        'java -jar astral.5.6.3.jar -i %s -o %sASTRAL/%s -a %sASTRAL/species_name_ASTRAL.txt 2> %sASTRAL/run_ASTRAL.log' \
        % (pwd + "all_iqtree_btstraped.txt", pwd, "ASTRAL_output.txt", pwd, pwd))
    if message_queue:
        message_queue.put("ASTRAL done")
        message_queue.put("MP_EST running...")

    ###MP_EST RUNNING###

    os.system('rm -rf %sMP_EST' % (pwd))
    os.system('mkdir %sMP_EST' % (pwd))
    outfile = open(pwd + 'MP_EST/control.file', 'w')
    outfile.write('%s\n0\n%s\n5\n%s %s\n%s0' % (
    pwd + 'all_iqtree_rooted.txt', str(int(random() * 10000000)), str(control_mpest[2]), str(control_mpest[1]),
    control_mpest[0]))
    outfile.close()
    os.system('mpest %sMP_EST/control.file 1> %sMP_EST/run_MPEST.log' % (pwd, pwd))
    os.system('mv %sall_iqtree_rooted.txt_* %sMP_EST/' % (pwd, pwd))

    Phylo.convert(pwd+'MP_EST/all_iqtree_rooted.txt_besttree.tre', 'nexus', pwd+'MP_EST/all_iqtree_rooted.txt_besttree.tmp.nex', 'newick')
    os.system('head -n1 %sMP_EST/all_iqtree_rooted.txt_besttree.tmp.nex >%sMP_EST/all_iqtree_rooted.txt_besttree.nex' % (pwd, pwd)) 
    if message_queue:
        message_queue.put("MP_EST done")
        message_queue.put("STELLS2 running...")

    ###STELLS2 RUNNING###

    os.system('rm -rf %sSTELLS2' % (pwd))
    os.system('mkdir %sSTELLS2' % (pwd))
    os.system('stells-v2 -t %d -g %sall_iqtree_namechange_nonbranch.txt > %sSTELLS2/STELLS2_output.txt' % (
    thread_number, pwd, pwd))
    os.system('mv %sall_iqtree_namechange_nonbranch.txt-nearopt.trees %sSTELLS2/' % (pwd, pwd))
    os.system('grep "the inferred MLE species tree" %sSTELLS2/STELLS2_output.txt | cut -f 2- -d ":" | sed "s/$/;/" > %sSTELLS2/STELLS2_output_tree.txt' %(pwd, pwd))

    if message_queue:
        message_queue.put("STELLS2 done")
        message_queue.put("SNAQ running...")

    ###TREE COMPARE###
    tree_files = [pwd+'CONCAT/concatenated.out.contree', pwd+'CONCAT/concatenated.out_raxml_ng.raxml.bestTree', pwd+'ASTRAL/ASTRAL_output.txt',pwd+'MP_EST/all_iqtree_rooted.txt_besttree.nex', pwd+'STELLS2/STELLS2_output_tree.txt']
    trees = map(Tree, tree_files)
    n_trees = len(tree_files)
    mat = np.zeros((n_trees, n_trees), dtype=np.float)
    for i in range(n_trees):
        for j in range(n_trees):
            mat[i][j] = (trees[i].robinson_foulds(trees[j], unrooted_trees=True))[0]
    np.savetxt(pwd+'tree_compare.csv', mat, delimiter=",", fmt='%.3e')

    ###SNAQ RUNNING###

    os.system('rm -rf %sSNAQ' % (pwd))
    os.system('mkdir %sSNAQ' % (pwd))
    outfile = open(pwd + 'SNAQ/snaq_con.jl', 'w')
    outfile.write("""using Pkg\nPkg.add("PhyloNetworks")\n#Pkg.update()\nusing PhyloNetworks\nd=readTrees2CF("%sall_iqtree_btstraped.txt");\n 
    T=readTopology("%sASTRAL/ASTRAL_output.txt");\n 
    net2=snaq!(T,d,hmax=%d, filename="net2_snaq");""" % (pwd, pwd, Net_num))
    outfile.close()
    os.system('julia %sSNAQ/snaq_con.jl 1> %sSNAQ/run_SNAQ.log' % (pwd, pwd))
    os.system('mv net2* %sSNAQ/' % (pwd))
    os.system('mv summaryTreesQuartets.txt %sSNAQ/' % (pwd))
    os.system('mv tableCF.txt %sSNAQ/' % (pwd))
    if message_queue:
        message_queue.put("SNAQ done")
        message_queue.put("PHYLONET running...")


    ###PHYLONET RUNNING###

    os.system('rm -rf %sPHYLONET' % (pwd))
    os.system('mkdir %sPHYLONET' % (pwd))
    outfile = open(pwd + 'PHYLONET/phylonet_con.txt', 'w')
    outfile.write('#NEXUS\n\nBEGIN TREES;\n\n')
    infile = open(pwd + 'all_iqtree_rooted.txt', 'r')
    n = 0
    for line in infile:
        outfile.write('TREE gt%d = %s' % (n, line))
        n += 1
    infile.close()
    print n
    outfile.write('\nEND;\n\nBEGIN PHYLONET;\nInferNetwork_ML_CV (all) %d -pl %d -cv %d -a <%s>; \n\nEND;' % (
    Net_num, thread_number, cross_value, taxonmap_phylonet[0][:-1]))
    outfile.close()
    os.system('java -jar PhyloNet_3.6.8.jar %sPHYLONET/phylonet_con.txt 1>%sPHYLONET/PHYLONET_output.txt' % (pwd, pwd))
    if message_queue:
        message_queue.put("PHYLONET done")
        message_queue.put("ALL DONE")
コード例 #35
0
def convert_tree(input_file, input_filetype, output_file, output_filetype):
    #tree = Phylo.read(input_file, input_filetype)
    Phylo.convert(input_file, input_filetype, output_file, output_filetype)
コード例 #36
0
ファイル: PrepareTree.py プロジェクト: protwis/protwis
    def treeDo(self, d, infile, branches, family, Additional_info, famdict=None):
        self.famdict = famdict
        d = "/".join(d.split("/"))
        z = infile
        w = open(d + "/rong", "w").write(z.replace("-", ""))
        raw = open(d + "/raw.xml", "w")
        Phylo.convert(d + "/rong", "newick", raw, "phyloxml")
        raw.close()
        xml = open(d + "/raw.xml", "r").readlines()
        out = open(d + "/out.xml", "w")
        self.get_tree_data(Additional_info)
        if self.build != False:
            self.rings["class"]["include"] = False
            # self.rings['ligand']['include']=False
            # self.rings['family']['include']=False
        self.get_family_meta(family)
        charts = self.get_charts()
        self.get_colours()
        self.build_legend()
        self.get_styles()
        flag = False
        flag2 = ""
        stylesflag = False
        for line in xml:
            if stylesflag == True:
                out.write("<render>" + charts + "<styles>" + self.styles + "</styles></render>")
                stylesflag = False
            ################# Remove header trash #######################
            if "phyloxml" in line:
                line = line.split("phyloxml")[0] + "phyloxml>"
            line = line.replace('"', "'").replace("phy:", "")
            ################# Remove forced rooting #####################
            if flag == True:
                if ">1.0<" in line:
                    line = line.replace(">1.0<", ">0.0<")
                    flag = False
            if "rooted='false'" in line:
                flag = True
                stylesflag = True
            ################# Force even branch lengths #################
            if branches == True:
                if "<branch_length>" in line:
                    if "<branch_length>0.0</branch_length>" not in line:
                        number = line.split(">")[1].split("<")[0]
                        line = line.replace(number, "0.1")
            ################# Reformat names ############################
            if "<name>" in line:
                name = line.split("<")[1].split(">")[1]
                chart = "<chart>"
                for ring in self.rings:
                    if self.rings[ring]["include"] == "True":
                        if self.rings[ring]["color_type"] == "single":
                            if self.prots[name]["acc"] in self.rings[ring]["items"]:
                                chart += "<%s>%s_true</%s>" % (ring, ring, ring)
                            else:
                                chart += "<%s>%s_false</%s>" % (ring, ring, ring)
                        else:
                            chart += "<" + ring + ">" + self.prots[name][ring] + "</" + ring + ">"
                chart += "</chart>"
                flag2 = [name, chart]
                line = line.replace(name, self.prots[name]["name"]).replace(
                    "<name", "<name bgStyle='%s'" % self.prots[name]["acc"]
                )
            ############## Add annotations and descriptions #############
            if "<branch_length>" in line:
                line = line.replace(">1E05<", ">0.00001<").replace("-", "")
                if ">0.0<" in line and flag == True:
                    line = line.replace(">0.0<", ">0.00001<")
                if flag2 != "":
                    line = (
                        line.strip("\n")
                        + " <annotation><desc>"
                        + self.prots[flag2[0]]["desc"]
                        + " ("
                        + self.prots[flag2[0]]["species"]
                        + ")"
                        + "</desc><uri>/protein/"
                        + self.prots[flag2[0]]["link"]
                        + "</uri> </annotation>"
                        + flag2[1]
                    )
                    flag2 = ""
            out.write(line)

        self.box = self.drawColorPanel()
コード例 #37
0
ファイル: tree_v2.3.py プロジェクト: KonstantinovDK/-NGSchool
#3_здесь можно указать какие узлы стоит удалить
dell_node=["PPE","OSI"]

#3_здесь можно указать какие узлы стоит сохранить
save_node=["AT","SL","OL","OS","ZM","PP"]


list_legend=["a1","a2","a3","a4","a5","a6","a7","a8","a9","a11","a12","a13","a14","a15","a16","a17","a18","a19","a20","a21","a22","a23",]
seq_seq="MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM"
files_all_in = os.listdir(adres+"/for_pic/1_tree_nwk/") 
for file in files_all_in:
    #print(file)
    if not(file.startswith(".")):
        #print(file)
        Phylo.convert(adres+"/for_pic/1_tree_nwk/"+file, 'newick', adres+"/for_pic/4_tree_nwk/"+file, 'newick')

def get_example_tree(File):
    adres=os.getcwd()
    file_out_supliment = open(adres+"/out_spliment/"+File, 'w')
    node_file = open(adres+"/node/"+File, 'w')
    # Create a random tree and add to each leaf a random set of motifs
    # from the original set
    #t = Tree("( (A, B, C, D, E, F, G), H, I);")
    #Считываем все домены
    domain_all_legend={}
    file_all_domen=os.listdir(adres+"/for_pic/1_tree_nwk/") 
    file_all_domen.remove(".DS_Store")
    file_all_domen.sort()
    i=0
    for file_domain in file_all_domen:
コード例 #38
0
 def extract_newick_string(self):
     with io.StringIO() as s:
         Phylo.convert(self.tree_sim_result_path, 'nexus', s, 'newick')
         newick_string = s.getvalue().strip()
     return newick_string
コード例 #39
0
ファイル: do_analysis.py プロジェクト: mjeltsch/VEGFC
def gettree(sequence_dictionary):
    def replace_node_name(filename_old, filename_new, regex, new_nodename):
        with open(filename_old, 'r') as f_old, open(filename_new,
                                                    'w') as f_new:
            content = f_old.read()
            new_content = re.sub(regex, new_nodename, content, flags=re.M)
            f_new.write(new_content)
        os.remove(filename_old)
        os.rename(filename_new, filename_old)
        if content == new_content:
            return False
        else:
            return True

    def remove_key(dictionary, key):
        new_dictionary = dict(dictionary)
        del new_dictionary[key]
        return new_dictionary

    def add_outgroup(TREEFILE_FINAL, TREEFILE_FINAL_WITH_OUTGROUP,
                     outgroup_key):
        with open(TREEFILE_FINAL,
                  'r') as f_old, open(TREEFILE_FINAL_WITH_OUTGROUP,
                                      'w') as f_new:
            content = f_old.read()
            # Add opening parenthesis to the beginning
            content = "(" + content
            # Remove last semicolon and add outgroup
            content = content[:-1] + "," + outgroup_key + ");"
            f_new.write(content)

    # Vertebrate tree from open tree of life (download is dysfunctional since end of 2018, use a local copy)
    TREEFILE_IN = 'tree.tre'
    if not os.path.isfile(TREEFILE_IN):
        URL = "https://tree.opentreeoflife.org/opentree/default/download_subtree/ottol-id/801601/Vertebrata"
        execute_subprocess("Download phylogenetic species tree",
                           "wget -O tree.tre " + URL)
    TREEFILE_OUT = 'tree.newick'
    TREEFILE_INDENT = 'tree_indented.newick'
    TREEFILE_TMP = 'tree_indented_tmp.newick'
    TREEFILE_FINAL = 'tree_final.newick'
    TREEFILE_FINAL_WITH_OUTGROUP = 'tree_final_outgroup.newick'
    # This conversion from newick to newick format should not be necessary, but without
    # it, ETE chokes on the newick file...
    Phylo.convert(TREEFILE_IN, 'newick', TREEFILE_OUT, 'newick')
    # Indents nodes and puts each node on a single line to facilitate checking of string replacements
    execute_subprocess(
        "Indenting newick format:\n",
        "nw_indent " + TREEFILE_OUT + " > " + TREEFILE_INDENT + "\n")
    # Replace tree of life nodenames with own nodenames
    for key, value in sequence_dictionary.items():
        if replace_node_name(TREEFILE_INDENT, TREEFILE_TMP,
                             value[2].replace(' ', '_') + "_ott[0-9]*",
                             key) == True:
            #print("Replacing " + value[2] + " with " + key)
            pass
        else:
            print(value[2] +
                  " not found in tree! Removing from sequence dictionary.")
            # The only seuence not in the tree file should be the outgroup. Store it in a
            # temporary variable, remove it (for tree pruning), and add it later back.
            outgroup_key = key
            outgroup_value = value
            sequence_dictionary = remove_key(sequence_dictionary, key)

    t = Tree(TREEFILE_INDENT, format=1, quoted_node_names=True)
    # Remove from the full phylogenetic tree all species except those in the prune_list
    prune_list = []
    for key, value in sequence_dictionary.items():
        prune_list.append(key)
    #print("Prune list:\n" + str(prune_list))
    t.prune(prune_list)
    # Write the tree (format = 9 means only leave names)
    t.write(format=9, outfile=TREEFILE_FINAL)
    # Add the outgroup back.
    sequence_dictionary[outgroup_key] = outgroup_value
    # Add manually the outgroup to the tree file
    add_outgroup(TREEFILE_FINAL, TREEFILE_FINAL_WITH_OUTGROUP, outgroup_key)
    # Re-read modified tree file
    t = Tree(TREEFILE_FINAL_WITH_OUTGROUP, format=1)
    # Set the outgroup
    t.set_outgroup(outgroup_key)
    # Display tree
    print(t)
コード例 #40
0
        print("By Lee Bergstrand\n")
        print("Usage: " + sys.argv[0] + " <PhyloTree.nwk>")
        print("Examples: " + sys.argv[0] + " PhyloTree.nwk\n")
        exit(1)  # Aborts program. (exit(1) indicates that an error occurred)


# ===========================================================================================================
# Main program code:

# House keeping...
argsCheck(2)  # Checks if the number of arguments are correct.

# Stores file one for input checking.
print(">> Opening Newicktree...")
inFile = sys.argv[1]
outFile = inFile.split(".")[0] + ".xml"

# File extension check
if not inFile.endswith(".nwk"):
    print("[Warning] " + inFile + " may not be a Newick file!")

print(">> Converting to PhyloXML...")
# Converts Newick to PhyloXML.
try:
    Phylo.convert(inFile, 'newick', outFile, 'phyloxml')
except IOError:
    print("Failed to open " + inFile + "or" + outFile)
    exit(1)

print(">> Done...")
コード例 #41
0
tree.rooted = True
# Phylo.draw(tree)

# Now to color the tree:
tree = tree.as_phyloxml()
print(type(tree))
tree.root.color = "blue"
Phylo.draw(tree)
mrca = tree.common_ancestor({"name": "A"}, {"name": "D"})
mrca.color = "salmon"
Phylo.draw(tree)

Phylo.write(tree, "tree_phyloxml", "phyloxml")

# Converting a tree to another format:
Phylo.convert("simple.dnd", "newick", "simple.xml", "nexml")

# Format function - doesn't create a new file
tree1 = tree.format("newick")
print(tree1)

tree2 = tree.format("nexml")
print("\n \n \n \n \n")
print(tree2)

# Number of leafs (ends):
number = tree.count_terminals()
print(number)

# Length of branches:
lengths = tree.depths(unit_branch_lengths=True)
コード例 #42
0
ファイル: import.py プロジェクト: WebValley2014/Pipeline
		if header:
			header = False
			continue
		i = i.strip()
		i = i.split("\t")
		j = i[1].split(";")
		current = t
		# associations[i[0]] = 
		for k in range(0,len(j)):
			# print k
			pos = search_by_name(current, j[k])
			if pos == None:
				print "Adding " + j[k]
				# print pos
				pos = current.add_child(TreeNode())
				pos.dist=0.2
				pos.name = j[k]
				pos.alias = i[0]
			current = pos
	h.close()

	for node in t.traverse("postorder"):
		if node.name == "NoName":
			pass
		else:
	  		node.name = "_" + str(node.alias)

	t.write(features=["name"], outfile="temp.nw", format=1)
	Phylo.convert('temp.nw', 'newick', sys.argv[2], 'phyloxml')
#
コード例 #43
0
ファイル: newick2nexus.py プロジェクト: AnotherSimon/phyloma
#!/usr/bin/env python

import sys
from Bio import Phylo

Phylo.convert(sys.argv[1], 'newick', sys.stdout, 'nexus')
コード例 #44
0
ファイル: PrepareTree.py プロジェクト: 25352697/protwis
 def treeDo(self,d, infile,branches,family,Additional_info, famdict=None):
     self.famdict=famdict
     d = '/'.join(d.split('/'))
     z = infile
     w = open(d+'/rong','w').write(z.replace('-',''))
     raw = open(d+'/raw.xml','w')
     Phylo.convert(d+'/rong','newick',raw,'phyloxml')
     raw.close()
     xml = open(d+'/raw.xml','r').readlines()
     out = open(d+'/out.xml','w')
     self.get_tree_data(Additional_info)
     if self.build !=False:
         self.rings['class']['include']=False
         #self.rings['ligand']['include']=False
         #self.rings['family']['include']=False
     self.get_family_meta(family)
     charts=self.get_charts()
     self.get_colours()
     self.build_legend()
     self.get_styles()
     flag = False
     flag2 = ''
     stylesflag=False
     for line in xml:
         if stylesflag == True:
             out.write("<render>"+charts+"<styles>"+self.styles+"</styles></render>")
             stylesflag = False
         ################# Remove header trash #######################
         if 'phyloxml' in line:
             line = line.split('phyloxml')[0]+'phyloxml>'
         line = line.replace('\"','\'').replace('phy:','')
         ################# Remove forced rooting #####################
         if flag == True:
             if '>1.0<' in line:
                 line=line.replace('>1.0<','>0.0<')
                 flag = False
         if "rooted='false'" in line:
             flag = True
             stylesflag=True
         ################# Force even branch lengths #################
         if branches == True:
             if '<branch_length>' in line:
                 if '<branch_length>0.0</branch_length>' not in line:
                     number = line.split('>')[1].split('<')[0]
                     line = line.replace(number,'0.1')
         ################# Reformat names ############################
         if '<name>' in line:
             name = line.split('<')[1].split('>')[1]
             chart = '<chart>'
             for ring in self.rings:
                 if self.rings[ring]['include']=='True':
                     if self.rings[ring]['color_type']=='single':
                         if self.prots[name]['acc'] in self.rings[ring]['items']:
                             chart += '<%s>%s_true</%s>' %(ring,ring,ring)
                         else:
                             chart += '<%s>%s_false</%s>' %(ring,ring,ring)
                     else:
                         chart += '<'+ring+'>'+self.prots[name][ring]+'</'+ring+'>'
             chart += '</chart>'
             flag2 = [name,chart]
             line = line.replace(name,self.prots[name]['name']).replace('<name', "<name bgStyle='%s'" %self.prots[name]['acc'])
         ############## Add annotations and descriptions #############
         if '<branch_length>' in line:
             line=line.replace('>1E05<','>0.00001<').replace('-','')
             if '>0.0<' in line and flag == True:
                 line=line.replace('>0.0<','>0.00001<')
             if flag2 != '':
                 line = line.strip('\n')+' <annotation><desc>'+self.prots[flag2[0]]['desc']+' ('+self.prots[flag2[0]]['species']+')'+'</desc><uri>/protein/'+self.prots[flag2[0]]['link']+'</uri> </annotation>'+flag2[1]
                 flag2=''
         out.write(line)
     
     self.box = self.drawColorPanel()
コード例 #45
0
ファイル: convert_tree.py プロジェクト: JinfengChen/Rice_pop
from Bio import Phylo
Phylo.convert('test.3000.nhx', 'newick', 'test.3000.newick', 'newick')
コード例 #46
0
ファイル: PRD.py プロジェクト: JAlvarezJarreta/MEvoLib
def map_seqs ( record_list, tree_file, file_format, subset_size, overlapping,
               binary = 'dcm' ) :
    """
    Generate a map of the sequences in sets, of at most 'subset_size', with the
    specified overlapping using the padded-Recursive-DMC3 decomposition (PRD)
    from DACTAL system. If 'tree_file' contains a relative path, the current
    working directory will be used to get the absolute path.

    Arguments :
        record_list  ( list )
            List of SeqRecord objects (from Biopython).
        tree_file  ( string )
            Input tree file.
        file_format  ( string )
            Tree file format.
        subset_size  ( int )
            Maximum subset size.
        overlapping  ( int )
            Number of overlapping sequences between any two resultant subsets.
        binary  ( Optional[string] )
            Name or path of the DCM binary file.

    Returns :
        dict
            Dictionary with the set identifiers as keys and the corresponding
            sequences as values in lists of SeqRecord objects.

    Raises :
        ValueError
            When 'subset_size' < (4 * 'overlapping').
        RuntimeError
            If the call to the dcm command raises an exception.
        IOError
            If the dcm tool can't generate a decomposition for the 'subset_size'
            and 'overlapping' values given.

    * The tree file format must be supported by Bio.Phylo.
    """
    if ( subset_size < (4 * overlapping) ) :
        raise ValueError('The maximum subset size must be greater than or ' \
                         'equal to 4 times the overlapping value')
    # else : # subset_size >= (4 * overlapping)
    # If the input file format is not supported by the PRD process, convert it
    # to a temporary supported file
    infile_path = get_abspath(tree_file)
    if ( file_format.lower() != 'newick' ) :
        tmpfile = tempfile.NamedTemporaryFile()
        Phylo.convert(infile_path, file_format, tmpfile.name, 'newick')
        infile_path = tmpfile.name
    # The first decomposition process will be always executed, so there is no
    # need to overload this stage with the multiprocess generation
    set_list, further_decomp = _prd_decomposition(infile_path, subset_size,
                                                  str(overlapping), binary)
    # Parallelization of the recursive decomposition of the different subtrees.
    # All new subtrees are attached to 'further_decomp' file list so we can
    # launch at most one process per core, speeding up the whole process
    start = 0
    to_process = len(further_decomp[start:])
    pool = multiprocessing.Pool(processes=NUMCORES)
    while ( to_process > 0 ) :
        end = start + min(to_process, NUMCORES)
        results = [pool.apply_async(_prd_decomposition,
                                    args=(further_decomp[i], subset_size,
                                          str(overlapping), binary,))
                           for i in range(start, end)]
        # Collect the results of all the processes launched
        for pool_result in results :
            output = pool_result.get()
            set_list += output[0]
            further_decomp += output[1]
        start = end
        to_process = len(further_decomp[start:])
    # Remove all the temporal files created for the multirpocessing stage
    for file_path in further_decomp :
        os.remove(file_path)
    record_dict = {record.id: record  for record in record_list}
    # Map all the resultant sets with an unique set id and replace the sequence
    # ids by their corresponding Bio.SeqRecord object
    set_dict = {}
    num_zeros = len(str(len(set_list)))
    for index, seq_id_list in enumerate(set_list, 1) :
        set_id = 'prdset{}'.format(str(index).zfill(num_zeros))
        set_dict[set_id] = []
        for seq_id in seq_id_list :
            set_dict[set_id].append(record_dict[seq_id])
    return ( set_dict )
コード例 #47
0
ファイル: main.py プロジェクト: ArtyomKaltovich/Phylo
import sys

import matplotlib.pyplot as plt
from Bio import Phylo

from utils import save_data_to_file

INPUT_FILE_NAME = "data/data.txt"
OUT_FILE_NAME = "data/data.phyloxml"


if __name__ == '__main__':
    sys.path.append(".")
    # save data as file, coz Phylo doesn't accept strings
    # tempfile is also doesn't work and always empty
    save_data_to_file(INPUT_FILE_NAME)
    data = Phylo.parse(INPUT_FILE_NAME, "newick")
    fig = plt.figure(figsize=(25, 30))
    axes = fig.add_subplot(1, 1, 1)
    for tree in data:
        tree.ladderize()
        Phylo.draw_ascii(tree)
        Phylo.draw(tree, do_show=False, axes=axes)
        fig.savefig("plot/tree.svg")
        fig.savefig("plot/tree.png")
    Phylo.convert(INPUT_FILE_NAME, "newick", OUT_FILE_NAME, "phyloxml")
コード例 #48
0
ファイル: __init__.py プロジェクト: JAlvarezJarreta/MEvoLib
def get_consensus_tree ( binary, infile, infile_format, args = 'default',
                         outfile = None, outfile_format = 'newick' ) :
    """
    Calculate the consensus tree of the input trees file with the given
    arguments. The resultant consensus tree is returned as a Bio.Phylo.BaseTree
    object and saved in the ouput file (if provided). If 'infile' or 'outfile'
    contain a relative path, the current working directory will be used to get
    the absolute path. If the output file already exists, the old file will be
    overwritten without any warning.

    Arguments :
        binary  ( string )
            Name or path of the consensus tool.
        infile  ( string )
            Input phylogenetic trees file.
        infile_format  ( string )
            Input file format.
        args  ( Optional[string] )
            Keyword or arguments to use in the call of the consensus tool,
            excluding infile and outfile arguments. By default, 'default'
            arguments are used.
            * For Consense, the second character will be used as separator of
            the different arguments. 
        outfile  ( Optional[string] )
            Consensus tree output file.
        outfile_format  ( Optional[string] )
            Output file format. By default, NEWICK format.

    Returns :
        Bio.Phylo.BaseTree
            Resultant consensus tree.

    Raises :
        ValueError
            If the tool introduced isn't included in MEvoLib.
        IOError
            If the input path or the input file provided doesn't exist.
        RuntimeError
            If the call to the phylogenetic inference tool command raises an
            exception.
        IOError
            If the consensus tool didn't generate a consensus tree (indicated by
            user's options/arguments).

    * The input file format must be supported by Bio.Phylo.
    * The output file format must be supported by Bio.Phylo.
    """
    # Get the variables associated with the given consensus tool
    bin_path, bin_name = os.path.split(binary)
    bin_name = bin_name.lower()
    if ( bin_name in _CONS_TOOL_TO_LIB ) :
        tool_lib = _CONS_TOOL_TO_LIB[bin_name]
        sprt_infile_formats = tool_lib.SPRT_INFILE_FORMATS
        gen_args = tool_lib.gen_args
        gen_stdin_content = tool_lib.gen_stdin_content
        get_results = tool_lib.get_results
        cleanup = tool_lib.cleanup
    else : # bin_name not in _CONS_TOOL_TO_LIB
        message = 'The consensus tool "{}" isn\'t included in ' \
                  'MEvoLib.PhyloAssemble'.format(bin_name)
        raise ValueError(message)
    # Get the command line to run in order to get the consensus tree
    infile_path = get_abspath(infile)
    # If the input file format is not supported by the consensus tool, convert
    # it to a temporary supported file
    if ( infile_format.lower() not in sprt_infile_formats ) :
        tmpfile = tempfile.NamedTemporaryFile()
        Phylo.convert(infile_path, infile_format, tmpfile.name,
                      sprt_infile_formats[0])
        infile_path = tmpfile.name
    # Create full command line list
    command = [binary] + gen_args(args, infile_path, outfile)
    # Generate the standard input file content
    stdin_content = gen_stdin_content(args)
    # Create the input file with the given options
    with tempfile.NamedTemporaryFile(mode='w+') as stdin_file :
        stdin_file.write(stdin_content)
        stdin_file.seek(0)
        # Run the consensus process handling any Runtime exception
        try :
            subprocess.check_call(command, stdin=stdin_file, stdout=DEVNULL,
                                  stderr=DEVNULL, universal_newlines=True)
        except subprocess.CalledProcessError as e :
            cleanup(command)
            message = 'Running "{}" raised an exception'.format(' '.join(e.cmd))
            raise RuntimeError(message)
        else :
            consensus_tree = get_results(command)
            cleanup(command)
            # Return the resultant consensus tree as a Bio.Phylo.BaseTree object
            return ( consensus_tree )
コード例 #49
0
ファイル: BioMagick.py プロジェクト: LeeBergstrand/BioMagick
def direct_convert(settings, id_results, out_path, out_formats, alphabet):
	if out_path is None:
		out_file = "./conv.tmp"
		in_path, in_format = list(id_results.items())[0]
		out_format = out_formats[0]

		if in_format == "unidentified":
			raise Exception("Failed to identify the file")

		try:
			format_setting = settings[in_format]
			if format_setting.bioclass == "seq":
				SeqIO.convert(in_path, in_format.lower(), out_file, out_format, alphabet)
			elif format_setting.bioclass == "phylo":
				Phylo.convert(in_path, in_format.lower(), out_file, out_format)
			elif format_setting.bioclass == "align":
				AlignIO.convert(in_path, in_format.lower(), out_file, out_format)
			else:
				print("Error: invalid BioPython conversion class: %s" % format_setting.bioclass)
				sys.exit(1)
		except ValueError as e:
			print("Error in conversion of " + in_path + " to " + out_format + ": " + str(e))
			sys.exit(1)

		with open(out_file, "r") as tmp_file:
			print(tmp_file.read())

		os.remove(out_file)  # Is this really necessary?
	else:
		for out_format in out_formats:
			for in_path, in_format in id_results.items():
				out_file = out_path
				if sys.platform == "win32":
					if out_file[-1] != "\\":
						out_file += "\\"

					out_file += ntpath.basename(in_path).split('.')[0]
				else:
					if out_file[-1] != "/":
						out_file += "/"

					out_file += os.path.basename(in_path).split('.')[0]

				out_extension = settings[out_format].extension
				out_file = out_file + "." + out_extension
				print("\nConverting %s file %s to %s file %s" % (in_format, in_path, out_format, out_file))

				try:
					format_setting = settings[in_format]
					if format_setting.bioclass == "seq":
						SeqIO.convert(in_path, in_format.lower(), out_file, out_format, alphabet)
					elif format_setting.bioclass == "phylo":
						Phylo.convert(in_path, in_format.lower(), out_file, out_format)
					elif format_setting.bioclass == "align":
						AlignIO.convert(in_path, in_format.lower(), out_file, out_format)
					else:
						print("Error: invalid BioPython conversion class: %s" % format_setting.bioclass)
						sys.exit(1)
				except ValueError as e:
					print("\nError in conversion of " + in_path + " to " + out_format + ": " + str(e))
					print("Skipping " + in_path + " ...\n")
					continue