Example #1
0
    def _data_to_object(self, data, schema=None):
        """Attaches a DendroPy tree object"""
        schemas = [
            "newick",
            "nexus"
        ]
        # If schemas is not given, try each schema until one works.
        if schema is None:
            for s in schemas:
                # Try different schemas until a tree works
                try:
                    tree = DendropyTree.get(data=data, schema=s)
                except:
                    pass
        else:
            tree = DendropyTree.get(data=data, schema=schema)

        # Check that a tree was made.
        try:
            self._Tree._DendroPy = tree
            self._Tree._construct()
        except NameError:
            raise Exception("""Tree data doesn't seem to be in a format that DendroPy can read.""")

        return self._Tree
Example #2
0
def add_outgroup(tree, relative_additional_height):
    desired_height = (
        1 + relative_additional_height) * tree.seed_node.distance_from_tip()

    outgroup = Node(taxon=Taxon("outgroup"), edge_length=desired_height)
    tns = deepcopy(tree.taxon_namespace)
    tns.add_taxon(outgroup.taxon)
    new_root = Node()
    new_root.add_child(outgroup)
    new_root.add_child(tree.seed_node)
    new_tree = Tree(taxon_namespace=tns)
    new_tree.seed_node = new_root
    # Despite my best efforts, I was getting taxon namespace errors. So we round trip
    # from Newick. ¯\_(ツ)_/¯
    new_newick = str(new_tree) + ";"
    return Tree.get(data=new_newick, schema="newick")
Example #3
0
 def get_alignment_decomposition_tree(self, p_tree):
     assert isinstance(p_tree, PhylogeneticTree)
     if self.options.alignment_decomposition_tree is None:
         return PhylogeneticTree(Tree(p_tree.den_tree))
     elif p_tree.count_leaves() != self.root_problem.subtree.count_leaves():
         raise ValueError("Alignment decomposition tree can be different from placement tree only if placement subset size is set to the number of taxa (i.e. entire tree)")
     else:
         _LOG.info("Reading alignment decomposition input tree: %s" %(self.options.alignment_decomposition_tree))        
         d_tree = PhylogeneticTree( dendropy.Tree.get_from_stream(self.options.alignment_decomposition_tree, 
                                            schema="newick", 
                                            preserve_underscores=True,
                                            taxon_set=self.root_problem.subtree.get_tree().taxon_set))               
         return d_tree
Example #4
0
    def _data_to_object(self, data, schema=None):
        """Attaches a DendroPy tree object"""
        schemas = ["newick", "nexus"]
        # If schemas is not given, try each schema until one works.
        if schema is None:
            for s in schemas:
                # Try different schemas until a tree works
                try:
                    tree = DendropyTree.get(data=data, schema=s)
                except:
                    pass
        else:
            tree = DendropyTree.get(data=data, schema=schema)

        # Check that a tree was made.
        try:
            self._Tree._DendroPy = tree
            self._Tree._construct()
        except NameError:
            raise Exception(
                """Tree data doesn't seem to be in a format that DendroPy can read."""
            )

        return self._Tree
Example #5
0
    def build_subproblems(self):
        (alignment, tree) = self.read_alignment_and_tree()

        if options().distance != 1:
            self.compute_distances(alignment)

        assert isinstance(tree, PhylogeneticTree)
        assert isinstance(alignment, MutableAlignment)

        tree.get_tree().resolve_polytomies()
        # Label edges with numbers so that we could assemble things back
        # at the end
        tree.lable_edges()
        ''' Make sure size values are set, and are meaningful. '''
        self.check_and_set_sizes(alignment.get_num_taxa())

        self._create_root_problem(tree, alignment)
        ''' Decompose the tree based on placement subsets'''
        placement_tree_map = PhylogeneticTree(Tree(
            tree.den_tree)).decompose_tree(
                self.options.placement_size,
                strategy=self.strategy,
                minSize=self.options.placement_size /
                int(self.options.exhaustive.placementminsubsetsizefacotr),
                tree_map={},
                pdistance=1,
                decomp_strategy=self.decomp_strategy,
                distances=self.distances,
                maxDiam=None)
        assert len(placement_tree_map) > 0, (
            "Tree could not be decomposed"
            " given the following settings; strategy:%s minsubsetsize:%s"
            " placement_size:%s" %
            (self.strategy, self.minsubsetsize, self.options.placement_size))
        _LOG.info("Breaking into %d placement subsets." %
                  len(placement_tree_map))
        ''' For placement subsets create a placement subproblem,
            and decompose further'''
        for (p_key, p_tree) in placement_tree_map.items():
            assert isinstance(p_tree, PhylogeneticTree)
            placement_problem = SeppProblem(p_tree.leaf_node_names(),
                                            self.root_problem)
            placement_problem.subtree = p_tree
            placement_problem.label = "P_%s" % str(p_key)
            _LOG.debug(
                "Placement subset %s has %d nodes" %
                (placement_problem.label, len(p_tree.leaf_node_names())))
            ''' Further decompose to alignment subsets '''
            alignment_tree_map = PhylogeneticTree(Tree(
                p_tree.den_tree)).decompose_tree(
                    self.options.alignment_size,
                    strategy=self.strategy,
                    minSize=self.minsubsetsize,
                    tree_map={},
                    decomp_strategy=self.options.decomp_strategy,
                    pdistance=options().distance,
                    distances=self.distances,
                    maxDiam=self.options.maxDiam)
            assert len(alignment_tree_map) > 0, (
                "Tree could not be decomposed"
                " given the following settings; strategy:%s"
                " minsubsetsize:%s alignmet_size:%s" %
                (self.strategy, self.minsubsetsize,
                 self.options.alignment_size))

            _LOG.debug("Placement subset %s has %d alignment subsets: %s" %
                       (placement_problem.label, len(alignment_tree_map),
                        str(sorted(alignment_tree_map.keys()))))
            _LOG.debug("Placement subset %s has %d taxa:" %
                       (placement_problem.label,
                        sum([
                            len(a_tree.leaf_node_names())
                            for a_tree in alignment_tree_map.values()
                        ])))
            for (a_key, a_tree) in alignment_tree_map.items():
                assert isinstance(a_tree, PhylogeneticTree)
                self.modify_tree(a_tree)
                alignment_problem = SeppProblem(a_tree.leaf_node_names(),
                                                placement_problem)
                alignment_problem.subtree = a_tree
                alignment_problem.label = "A_%s_%s" % (str(p_key), str(a_key))

        _LOG.info("Breaking into %d alignment subsets." %
                  (len(list(self.root_problem.iter_leaves()))))
        ''' Divide fragments into chunks, to help achieve better parallelism'''
        fragment_chunk_files = self.create_fragment_files()
        self.root_problem.fragment_chunks = len(fragment_chunk_files)
        for alignment_problem in self.root_problem.iter_leaves():
            for afc in range(0, self.root_problem.fragment_chunks):
                frag_chunk_problem = SeppProblem(alignment_problem.taxa,
                                                 alignment_problem)
                frag_chunk_problem.subtree = alignment_problem.subtree
                frag_chunk_problem.label = alignment_problem.label.replace(
                    "A_", "FC_") + "_" + str(afc)
                frag_chunk_problem.fragments = fragment_chunk_files[afc]

        _LOG.info("Breaking each alignment subset into %d fragment chunks." %
                  self.root_problem.fragment_chunks)
        _LOG.debug("Subproblem structure: %s" % str(self.root_problem))
        return self.root_problem
Example #6
0
    def launch_alignment(self, context_str=None):
        '''
        '''
        if self.killed:
            raise RuntimeError("PastaAligner Job killed")

        self._reset_jobs()
        self.context_str = context_str
        if self.context_str is None:
            self.context_str = ''
        node_count = self.tree.count_nodes()
        _LOG.debug("Recursive merge on a branch with %d subsets" %
                   (node_count))
        prefix = "subsets tree: %s" % self.tree.compose_newick()[0:200]
        if node_count == 2:
            nodes = self.tree._tree.nodes()
            _LOG.debug("%s ... pairwise merge " % prefix)
            self.skip_merge = False
            self.subjob1 = self.pasta_team.subsets[nodes[0].label]
            self.subjob2 = self.pasta_team.subsets[nodes[1].label]

            self.subjob1.add_parent(self)
            self.add_child(self.subjob1)

            self.subjob2.add_parent(self)
            self.add_child(self.subjob2)
        else:
            _LOG.debug("%s ... recursing further " % prefix)
            self.skip_merge = True

            # Reroot near centroid edge
            ce = self.tree.get_centroid_edge(spanning=True)
            nr = ce.head_node if not ce.head_node.is_leaf() else ce.tail_node
            self.tree._tree.reroot_at_node(nr, suppress_unifurcations=False)
            _LOG.debug("rerooted to: %s ..." %
                       self.tree.compose_newick()[0:200])
            # For each path from root to its children, create a new merge job
            merge_job_list = []
            nr = self.tree._tree.seed_node
            children = nr.child_nodes()
            for keepchild in children:
                remchilds = []
                for remchild in children:
                    if remchild != keepchild:
                        remchilds.append(
                            nr.reversible_remove_child(
                                remchild, suppress_unifurcations=False))
                t1 = PhylogeneticTree(Tree(self.tree._tree))
                remchilds.reverse()
                for child in remchilds:
                    nr.reinsert_nodes(child)
                _LOG.debug("child = %s ..." % t1.compose_newick()[0:200])
                multilocus_dataset1 = self.multilocus_dataset.new_with_shared_meta(
                )

                if t1.count_nodes() == 2:
                    ns = t1._tree.nodes()
                    tmp_dir_par = self.get_pairwise_temp_dir(
                        ns[0].label, ns[1].label)
                else:
                    tmp_dir_par = self.tmp_base_dir
                configuration = self.configuration()
                cj = PASTAMergerJob(multilocus_dataset=multilocus_dataset1,
                                    pasta_team=self.pasta_team,
                                    tree=t1,
                                    tmp_base_dir=self.tmp_base_dir,
                                    tmp_dir_par=tmp_dir_par,
                                    delete_temps2=False,
                                    **configuration)
                cj.add_parent(self)
                self.add_child(cj)
                merge_job_list.append(cj)

            self.merge_job_list = merge_job_list

            # now launch these new merge jobs
            for merge_job in self.merge_job_list:
                if self.killed:
                    raise RuntimeError("PastaAligner Job killed")
                merge_job.launch_alignment()

            self._merge_queued_event.set()

            if self.killed:
                raise RuntimeError("PastaAligner Job killed")
        return
Example #7
0
for data in dataset:
    result_file = open('result_{}_nj.txt'.format(data), 'w')
    for method in distance_methods:
            #result_file.write(method + '\n')
            for i in range(20):
                    truth = '../../{}/{}/R{}/rose.tt'.format(data, data, i)
                    predicted_tree_file = (data + '/' + method + '/R'+ str(i)
                            + '/out_tree.nwk')
                    if (not os.path.isfile(predicted_tree_file) or 
                            os.stat(predicted_tree_file).st_size == 0):
                        result_file.write(method+',R'+str(i)+',err,err\n')
                        continue
                    true_tree_file = (truth)

                    tree1 = Tree.get_from_path(
                            predicted_tree_file,
                            "newick",
                            taxon_namespace=tns)
                    tree2 = Tree.get_from_path(
                            true_tree_file,
                            "newick",
                            taxon_namespace=tns)

                    tree1.encode_bipartitions()
                    tree2.encode_bipartitions()

                    print('R'+str(i),treecompare.false_positives_and_negatives(tree1, tree2))
                    result_file.write(method+',R'+str(i)+','+','.join([str(x) for x in treecompare.false_positives_and_negatives(tree1, tree2)]))
                    result_file.write('\n')

    result_file.close()
Example #8
0
 def nexus(self, data):
     Tree = DendropyTree.get(data=data, schema="nexus")
     return self._data_to_object(Tree)
Example #9
0
 def nexus(self, data):
     Tree = DendropyTree.get(data=data, schema="nexus")
     return self._data_to_object(Tree)