コード例 #1
0
    def pass_seq_to_node(self, seq, edge):
        father_node = edge[0]
        child_node = edge[1]
        assert (self.tree.find_parent_clade(child_node).name == father_node
                )  # make sure it's as expected

        father_orlg = divide_configuration(self.tree.node_to_conf[father_node])
        child_orlg = divide_configuration(self.tree.node_to_conf[child_node])

        if set(father_orlg['loc']) == set(child_orlg['loc']):
            self.node_to_seq[child_node] = seq
        else:
            father_changed_orlg = set(father_orlg['loc']) - set(
                child_orlg['loc'])
            child_changed_orlg = set(child_orlg['loc']) - set(
                father_orlg['loc'])
            assert (
                len(father_changed_orlg) == 1
            )  # only allow one gene to duplicate or lose for now, for tandem duplication, should allow multiple
            if len(child_changed_orlg) == 2:  # duplication event
                give_birth_orlg = list(father_changed_orlg)[0]
                assert (set(self.tree.dup_events[give_birth_orlg]) == set(
                    child_changed_orlg))
                for new_orlg in child_changed_orlg:
                    seq[new_orlg] = deepcopy(seq[give_birth_orlg])
                seq.pop(give_birth_orlg)
            elif len(child_changed_orlg) == 0:  # a gene loss event
                lost_orlg = list(father_changed_orlg)[0]
                seq.pop(lost_orlg)
            else:
                assert (False)  # should not come to this case

            self.node_to_seq[child_node] = seq
コード例 #2
0
    def sim_root(self):
        root_name = self.tree.phylo_tree.root.name
        root_conf = self.tree.node_to_conf[root_name]
        root_orlg = divide_configuration(root_conf)
        self.node_to_seq[root_name] = dict()

        if self.PMModel.name == 'HKY':
            distn = [self.PMModel.parameters['Pi_' + nt] for nt in 'ACGT']
            for orlg in root_orlg['loc']:
                seq = draw_from_distribution(distn, self.nsites, 'ACGT')
                self.node_to_seq[root_name][orlg] = seq
        elif self.PMModel.name == 'MG94':
            distn = [
                reduce(mul,
                       [self.PMModel.parameters['Pi_' + b] for b in codon], 1)
                for codon in self.PMModel.codon_nonstop
            ]
            distn = np.array(distn) / sum(distn)
            for orlg in root_orlg['loc']:
                seq = draw_from_distribution(distn, self.nsites / 3,
                                             self.PMModel.codon_nonstop)
                self.node_to_seq[root_name][orlg] = [i for i in ''.join(seq)]
コード例 #3
0
    def sim_one_branch(self, edge, display):
        # First, make sure this branch is not simulated
        assert (edge[0] in self.node_to_seq
                and not edge[1] in self.node_to_seq)
        blen = self.tree.edge_to_blen[edge]
        starting_seq = self.node_to_seq[edge[0]]
        conf = self.tree.node_to_conf[edge[0]]
        branch_orlg = divide_configuration(conf)

        current_seq = deepcopy(
            starting_seq
        )  # it's passed on to the next node, need a new allocation of memory
        branch_orlg = divide_configuration(conf)
        #assert(all([orlg in branch_orlg['loc'] for orlg in starting_seq.keys()]))

        # Get sub IGC init matrix from
        ordered_orlg = sorted(branch_orlg['loc'])
        if len(ordered_orlg) == 1:
            Total_IGC_init_rate = 0.0
        else:
            branch_IGC_init_Q = np.zeros(
                (len(ordered_orlg), len(ordered_orlg)), dtype=np.floating)
            branch_IGC_tract_Q = np.zeros(
                (len(ordered_orlg), len(ordered_orlg)), dtype=np.floating)
            Total_IGC_init_Q = np.zeros((len(ordered_orlg), len(ordered_orlg)),
                                        dtype=np.floating)
            for i in range(len(ordered_orlg)):
                for j in range(len(ordered_orlg)):
                    branch_IGC_init_Q[i, j] = self.IGCModel.Q_init[
                        ordered_orlg[i], ordered_orlg[j]]
                    branch_IGC_tract_Q[i, j] = self.IGCModel.Q_tract[
                        ordered_orlg[i], ordered_orlg[j]]
                    if i != j:
                        if branch_IGC_tract_Q[i, j] != 0:
                            Total_IGC_init_Q[i, j] = branch_IGC_init_Q[
                                i, j] * (self.nsites / 3 - 1 +
                                         1.0 / branch_IGC_tract_Q[i, j])

            IGC_init_rate_diag = branch_IGC_init_Q.sum(axis=1)  # row sum
            Total_IGC_init_rate = Total_IGC_init_Q.sum()

            sub_IGC_tract_Q = branch_IGC_tract_Q
            sub_total_IGC_init_Q = Total_IGC_init_Q
            sub_IGC_init_Q = branch_IGC_init_Q

        cummulate_time = 0.0

        while (cummulate_time < blen):
            # Now sample exponential distributed waiting time for next event
            # point mutation or IGC event
            # need to update total point mutation rate with every new event
            # no need to update total IGC rate on the same branch since it's modeled as context independent

            seq_rate_dict, Total_PM_rate = self.get_mutation_rate(current_seq)
            Total_rate = Total_PM_rate + Total_IGC_init_rate

            cummulate_time += np.random.exponential(1.0 / Total_rate)
            if display:
                print cummulate_time

            if cummulate_time > blen:
                break
            else:
                # Now decide whether it's a point mutation or IGC event
                event = draw_from_distribution(
                    np.array([Total_PM_rate, Total_IGC_init_rate]) /
                    Total_rate, 1, range(2))

                if event == 0:
                    # It's a point mutation event
                    current_seq, mutation_info = self.get_one_point_mutation(
                        current_seq, seq_rate_dict, display)
                    to_write_info = ['_'.join(edge),
                                     str(cummulate_time)] + mutation_info
                    self.append_to_log_file(to_write_info, 'PM')

                elif event == 1:
                    # It's an IGC event
                    current_seq, IGC_info = self.get_one_IGC_event(
                        current_seq, branch_IGC_init_Q, branch_IGC_tract_Q,
                        Total_IGC_init_Q, ordered_orlg, display)
                    to_write_info = ['_'.join(edge),
                                     str(cummulate_time)] + IGC_info
                    self.append_to_log_file(to_write_info, 'IGC')
                else:
                    # draw from distribution failure
                    assert (False)

        # Now need to pass the seq to new node
        # need to consider gene duplication loss events here
        self.pass_seq_to_node(current_seq, edge)