Esempio n. 1
0
        def merge_nodes(source_arr, isall=False):
            mergers = np.array([[cost_gain(n1,n2, clade) if i1<i2 else (0.0,-1.0)
                                    for i1,n1 in enumerate(source_arr)]
                                for i2, n2 in enumerate(source_arr)])
            LH = 0
            while len(source_arr) > 1 + int(isall):
                # max possible gains of the cost when connecting the nodes:
                # this is only a rough approximation because it assumes the new node positions
                # to be optimal
                new_positions = mergers[:,:,0]
                cost_gains = mergers[:,:,1]
                # set zero to large negative value and find optimal pair
                np.fill_diagonal(cost_gains, -1e11)
                idxs = np.unravel_index(cost_gains.argmax(),cost_gains.shape)
                if (idxs[0] == idxs[1]) or cost_gains.max()<0:
                    self.logger("TreeTime._poly.merge_nodes: node is not fully resolved "+clade.name,4)
                    return LH

                n1, n2 = source_arr[idxs[0]], source_arr[idxs[1]]
                LH += cost_gains[idxs]

                new_node = Phylo.BaseTree.Clade()

                # fix positions and branch lengths
                new_node.time_before_present = new_positions[idxs]
                new_node.branch_length = clade.time_before_present - new_node.time_before_present
                new_node.clades = [n1,n2]
                n1.branch_length = new_node.time_before_present - n1.time_before_present
                n2.branch_length = new_node.time_before_present - n2.time_before_present

                # set parameters for the new node
                new_node.up = clade
                n1.up = new_node
                n2.up = new_node
                new_node.cseq = clade.cseq
                self._store_compressed_sequence_to_node(new_node)

                new_node.mutations = []
                new_node.mutation_length = 0.0
                new_node.branch_length_interpolator = BranchLenInterpolator(new_node, self.gtr, one_mutation=self.one_mutation)
                clade.clades.remove(n1)
                clade.clades.remove(n2)
                clade.clades.append(new_node)
                self.logger('TreeTime._poly.merge_nodes: creating new node as child of '+clade.name,3)
                self.logger("TreeTime._poly.merge_nodes: Delta-LH = " + str(cost_gains[idxs].round(3)), 3)

                # and modify source_arr array for the next loop
                if len(source_arr)>2: # if more than 3 nodes in polytomy, replace row/column
                    for ii in np.sort(idxs)[::-1]:
                        tmp_ind = np.arange(mergers.shape[0])!=ii
                        mergers = mergers[tmp_ind].swapaxes(0,1)
                        mergers = mergers[tmp_ind].swapaxes(0,1)

                    source_arr.remove(n1)
                    source_arr.remove(n2)
                    new_gains = np.array([[cost_gain(n1,new_node, clade) for n1 in source_arr]])
                    mergers = np.vstack((mergers, new_gains)).swapaxes(0,1)

                    source_arr.append(new_node)
                    new_gains = np.array([[cost_gain(n1,new_node, clade) for n1 in source_arr]])
                    mergers = np.vstack((mergers, new_gains)).swapaxes(0,1)
                else: # otherwise just recalculate matrix
                    source_arr.remove(n1)
                    source_arr.remove(n2)
                    source_arr.append(new_node)
                    mergers = np.array([[cost_gain(n1,n2, clade) for n1 in source_arr]
                                       for n2 in source_arr])

            return LH
Esempio n. 2
0
    def init_date_constraints(self, ancestral_inference=False, clock_rate=None, **kwarks):
        """
        Get the conversion coefficients between the dates and the branch
        lengths as they are used in ML computations. The conversion formula is
        assumed to be 'length = k*numdate + b'. For convenience, these
        coefficients as well as regression parameters are stored in the
        dates2dist object.

        ..Note:: that tree must have dates set to all nodes before calling this
        function. (This is accomplished by calling load_dates func).

        Parameters
        ----------

         ancestral_inference: bool
            Whether or not to reinfer ancestral sequences
            done by default when ancestral sequences are missing

        """
        self.logger("ClockTree.init_date_constraints...",2)
        self.tree.coalescent_joint_LH = 0
        if ancestral_inference or (not hasattr(self.tree.root, 'sequence')):
            self.infer_ancestral_sequences('ml', marginal=self.branch_length_mode=='marginal',
                                            sample_from_profile='root',**kwarks)

        # set the None  for the date-related attributes in the internal nodes.
        # make interpolation objects for the branches
        self.logger('ClockTree.init_date_constraints: Initializing branch length interpolation objects...',3)
        for node in self.tree.find_clades(order='postorder'):
            if node.up is None:
                node.branch_length_interpolator = None
            else:
                # copy the merger rate and gamma if they are set
                if hasattr(node,'branch_length_interpolator') and node.branch_length_interpolator is not None:
                    gamma = node.branch_length_interpolator.gamma
                    merger_cost = node.branch_length_interpolator.merger_cost
                else:
                    gamma = 1.0
                    merger_cost = None

                if self.branch_length_mode=='marginal':
                    node.profile_pair = self.marginal_branch_profile(node)

                node.branch_length_interpolator = BranchLenInterpolator(node, self.gtr,
                            pattern_multiplicity = self.multiplicity, min_width=self.min_width,
                            one_mutation=self.one_mutation, branch_length_mode=self.branch_length_mode)

                node.branch_length_interpolator.merger_cost = merger_cost
                node.branch_length_interpolator.gamma = gamma
        self.date2dist = utils.DateConversion.from_tree(self.tree, clock_rate)

        # make node distribution objects
        for node in self.tree.find_clades(order="postorder"):
            # node is constrained
            if hasattr(node, 'numdate_given') and node.numdate_given is not None:
                # set the absolute time before present in branch length units
                if np.isscalar(node.numdate_given):
                    tbp = self.date2dist.get_time_before_present(node.numdate_given)
                    node.date_constraint = Distribution.delta_function(tbp, weight=1.0, min_width=self.min_width)
                else:
                    tbp = self.date2dist.get_time_before_present(np.array(node.numdate_given))
                    node.date_constraint = Distribution(tbp, np.ones_like(tbp), is_log=False, min_width=self.min_width)

                if hasattr(node, 'bad_branch') and node.bad_branch==True:
                    self.logger("ClockTree.init_date_constraints -- WARNING: Branch is marked as bad"
                                ", excluding it from the optimization process"
                                " Will be optimized freely", 4, warn=True)
            else: # node without sampling date set
                node.numdate_given = None
                node.date_constraint = None
Esempio n. 3
0
    def init_date_constraints(self, ancestral_inference=False, slope=None, **kwarks):
        """
        Get the conversion coefficients between the dates and the branch
        lengths as they are used in ML computations. The conversion formula is
        assumed to be 'length = k*numdate_given + b'. For convenience, these
        coefficients as well as regression parameters are stored in the
        dates2dist object.

        Note: that tree must have dates set to all nodes before calling this
        function. (This is accomplished by calling load_dates func).

        Params:
            ancestral_inference: bool -- whether or not to reinfer ancestral sequences
                                 done by default when ancestral sequences are missing

        """
        self.logger("ClockTree.init_date_constraints...",2)

        if ancestral_inference or (not hasattr(self.tree.root, 'sequence')):
            self.infer_ancestral_sequences('ml',sample_from_profile='root',**kwarks)

        # set the None  for the date-related attributes in the internal nodes.
        # make interpolation objects for the branches
        self.logger('ClockTree.init_date_constraints: Initializing branch length interpolation objects...',3)
        for node in self.tree.find_clades():
            if node.up is None:
                node.branch_length_interpolator = None
            else:
                # copy the merger rate and gamma if they are set
                if hasattr(node,'branch_length_interpolator'):
                    gamma = node.branch_length_interpolator.gamma
                    merger_rate = node.branch_length_interpolator.merger_rate
                else:
                    gamma = 1.0
                    merger_rate = self.merger_rate_default
                node.branch_length_interpolator = BranchLenInterpolator(node, self.gtr, one_mutation=self.one_mutation)
                node.branch_length_interpolator.merger_rate = merger_rate
                node.branch_length_interpolator.gamma = gamma
        self.date2dist = utils.DateConversion.from_tree(self.tree, slope)

        # make node distribution objects
        for node in self.tree.find_clades():
            # node is constrained
            if hasattr(node, 'numdate_given') and node.numdate_given is not None:
                # set the absolute time before present in branch length units
                if not np.isscalar(node.numdate_given):
                    node.numdate_given = np.array(node.numdate_given)
                node.time_before_present = self.date2dist.get_time_before_present(node.numdate_given)
                if hasattr(node, 'bad_branch') and node.bad_branch==True:
                    self.logger("ClockTree.init_date_constraints -- WARNING: Branch is marked as bad"
                                ", excluding it from the optimization process"
                                " Will be optimized freely", 4, warn=True)
                    # if there are no constraints - log_prob will be set on-the-fly
                    node.msg_to_parent = None
                else:
                    if np.isscalar(node.numdate_given):
                        node.msg_to_parent = NodeInterpolator.delta_function(node.time_before_present, weight=1)
                    else:
                        node.msg_to_parent = NodeInterpolator(node.time_before_present,
                                                              np.ones_like(node.time_before_present), is_log=False)

            else: # node without sampling date set
                node.numdate_given = None
                node.time_before_present = None
                # if there are no constraints - log_prob will be set on-the-fly
                node.msg_to_parent = None