Example #1
0
 def get_seq_at_step(self, step_idx, flanked=False):
     """
     @return the nucleotide sequence after the `step_idx`-th  mutation
     """
     intermediate_seq = self.obs_seq_mutation.start_seq
     for i in range(step_idx):
         mut_pos = self.mutation_order[i]
         intermediate_seq = mutate_string(
             intermediate_seq, mut_pos,
             self.obs_seq_mutation.end_seq[mut_pos])
     if flanked:
         return self.obs_seq_mutation.left_flank + intermediate_seq + self.obs_seq_mutation.right_flank
     else:
         return intermediate_seq
Example #2
0
    def create_for_mutation_steps(
        self,
        seq_mut_order,
        left_update_region,
        right_update_region,
    ):
        """
        Calculate the feature values for the mutation steps
        Only returns the deltas at each mutation step

        @param seq_mut_order: ImputedSequenceMutations

        @return list of FeatureMutationStep (correponding to after first mutation to before last mutation)
        """
        feat_mutation_steps = []

        old_mutation_pos = None
        intermediate_seq = seq_mut_order.obs_seq_mutation.start_seq_with_flanks

        feat_dict_prev = dict()
        already_mutated_pos = set()
        for mutation_step, mutation_pos in enumerate(
                seq_mut_order.mutation_order):
            feat_dict_curr, feat_dict_future = self.update_mutation_step(
                mutation_step,
                mutation_pos,
                old_mutation_pos,
                seq_mut_order,
                intermediate_seq,
                already_mutated_pos,
                left_update_region=left_update_region,
                right_update_region=right_update_region,
            )
            mutating_pos_feat_idx = self._get_mutating_pos_feat_idx(
                mutation_pos, intermediate_seq, seq_mut_order.obs_seq_mutation)
            feat_mutation_steps.append(
                MultiFeatureMutationStep(
                    mutating_pos_feat_idx,
                    mutation_pos,
                    neighbors_feat_old=feat_dict_prev,
                    neighbors_feat_new=feat_dict_curr,
                ))

            # Apply mutation
            curr_mutation_pos = mutation_pos + seq_mut_order.obs_seq_mutation.left_flank_len
            intermediate_seq = mutate_string(
                intermediate_seq,
                curr_mutation_pos,
                seq_mut_order.obs_seq_mutation.
                end_seq_with_flanks[curr_mutation_pos],
            )
            already_mutated_pos.add(mutation_pos)
            feat_dict_prev = feat_dict_future
            old_mutation_pos = mutation_pos

        if len(feat_mutation_steps
               ) != seq_mut_order.obs_seq_mutation.num_mutations:
            raise AssertionError(
                "%d vs %d" % (len(feat_mutation_steps),
                              seq_mut_order.obs_seq_mutation.num_mutations))
        return feat_mutation_steps
Example #3
0
    def create_remaining_mutation_steps(
        self,
        seq_mut_order,
        update_step_start,
        left_update_region,
        right_update_region,
    ):
        """
        Calculate the feature values for the mutation steps starting the the `update_step_start`-th step
        Only returns the deltas at each mutation step

        @param seq_mut_order: ImputedSequenceMutations
        @param update_step_start: which mutation step to start calculating features for

        @return list of FeatureMutationStep (correponding to after `update_step_start`-th mutation
                    to before last mutation)
        """
        feat_mutation_steps = []

        old_mutation_pos = None
        feat_dict_prev = dict()
        flanked_seq = seq_mut_order.get_seq_at_step(update_step_start,
                                                    flanked=True)

        already_mutated_pos = set(
            seq_mut_order.mutation_order[:update_step_start])
        for mutation_step in range(
                update_step_start,
                seq_mut_order.obs_seq_mutation.num_mutations):
            mutation_pos = seq_mut_order.mutation_order[mutation_step]
            feat_dict_curr, feat_dict_future = self.update_mutation_step(
                mutation_step,
                mutation_pos,
                old_mutation_pos,
                seq_mut_order,
                flanked_seq,
                already_mutated_pos,
                left_update_region=left_update_region,
                right_update_region=right_update_region,
            )
            mutating_pos_feat_idx = self._get_mutating_pos_feat_idx(
                mutation_pos, flanked_seq, seq_mut_order.obs_seq_mutation)
            feat_mutation_steps.append(
                MultiFeatureMutationStep(
                    mutating_pos_feat_idx,
                    mutation_pos,
                    neighbors_feat_old=feat_dict_prev,
                    neighbors_feat_new=feat_dict_curr,
                ))

            # Apply mutation
            curr_mutation_pos = mutation_pos + seq_mut_order.obs_seq_mutation.left_flank_len
            flanked_seq = mutate_string(
                flanked_seq,
                curr_mutation_pos,
                seq_mut_order.obs_seq_mutation.
                end_seq_with_flanks[curr_mutation_pos],
            )
            already_mutated_pos.add(mutation_pos)
            feat_dict_prev = feat_dict_future
            old_mutation_pos = mutation_pos
        return feat_mutation_steps
Example #4
0
    def get_shuffled_mutation_steps_delta(
        self,
        seq_mut_order,
        update_step,
        flanked_seq,
        already_mutated_pos,
        left_update_region,
        right_update_region,
    ):
        """
        @param seq_mut_order: a list of the positions in the mutation order
        @param update_step: the index of the mutation step being shuffled with the (`update_step` + 1)-th step
        @param flanked_seq: must be a FLANKED sequence
        @param already_mutated_pos: set of positions that already mutated - dont calculate feature vals for these

        @return a tuple with the feature index at this mutation step and the feature mutation step of the next mutation step
        """
        feat_mutation_steps = []
        first_mutation_pos = seq_mut_order.mutation_order[update_step]
        second_mutation_pos = seq_mut_order.mutation_order[update_step + 1]

        _, feat_dict_future = self.update_mutation_step(
            update_step,
            first_mutation_pos,
            None,
            seq_mut_order,
            flanked_seq,
            already_mutated_pos,
            left_update_region=left_update_region,
            right_update_region=right_update_region,
        )
        first_mut_pos_feat_idx = self._get_mutating_pos_feat_idx(
            first_mutation_pos, flanked_seq, seq_mut_order.obs_seq_mutation)

        # Apply mutation
        curr_mutation_pos = first_mutation_pos + seq_mut_order.obs_seq_mutation.left_flank_len
        flanked_seq = mutate_string(
            flanked_seq,
            curr_mutation_pos,
            seq_mut_order.obs_seq_mutation.
            end_seq_with_flanks[curr_mutation_pos],
        )

        feat_dict_curr, _ = self.update_mutation_step(
            update_step + 1,
            second_mutation_pos,
            first_mutation_pos,
            seq_mut_order,
            flanked_seq,
            already_mutated_pos,
            calc_future_dict=False,
            left_update_region=left_update_region,
            right_update_region=right_update_region,
        )
        second_mut_pos_feat_idx = self._get_mutating_pos_feat_idx(
            second_mutation_pos, flanked_seq, seq_mut_order.obs_seq_mutation)

        return first_mut_pos_feat_idx, MultiFeatureMutationStep(
            second_mut_pos_feat_idx,
            second_mutation_pos,
            neighbors_feat_old=feat_dict_future,
            neighbors_feat_new=feat_dict_curr,
        )
    def simulate(self,
                 start_seq,
                 left_flank=None,
                 right_flank=None,
                 censoring_time=None,
                 percent_mutated=None,
                 with_replacement=False,
                 obs_seq_mutation=None):
        """
        @param start_seq: string for the original sequence; includes flanks unless they are provided by left_flank/right_flank
        @param left_flank: the left flank
        @param right_flank: the right flank
        @param censoring_time: how long to mutate the sequence for
        @param percent_mutated: percent of sequence to mutated
        @param with_replacement: True = a position can mutate multiple times, False = a position can mutate at most once

        @return FullSequenceMutations, ending sequence and entire history of mutations
        """
        mutations = []

        if left_flank is None and right_flank is None:
            left_flank = start_seq[:self.feature_generator.
                                   max_left_motif_flank_len]
            right_flank = start_seq[len(start_seq) - self.feature_generator.
                                    max_right_motif_flank_len:]
            start_seq = start_seq[self.feature_generator.
                                  max_left_motif_flank_len:len(start_seq) -
                                  self.feature_generator.
                                  max_right_motif_flank_len]

        pos_to_mutate = set(range(len(start_seq)))
        intermediate_seq = start_seq
        last_mutate_time = 0
        while len(pos_to_mutate) > 0:
            # TODO: For speedup, we don't need to recalculate all the features.
            if with_replacement:
                feature_vec_dict = self.feature_generator.create_for_sequence(
                    intermediate_seq,
                    left_flank,
                    right_flank,
                    obs_seq_mutation=obs_seq_mutation)
            else:
                feature_vec_dict = self.feature_generator.create_for_sequence(
                    intermediate_seq,
                    left_flank,
                    right_flank,
                    do_feat_vec_pos=pos_to_mutate,
                    obs_seq_mutation=obs_seq_mutation)

            mutate_time_delta, mutate_pos, nucleotide_target = self._sample_mutation(
                feature_vec_dict, intermediate_seq, pos_to_mutate)
            mutate_time = last_mutate_time + mutate_time_delta

            if censoring_time is not None and censoring_time < mutate_time:
                break
            elif percent_mutated is not None and len(
                    mutations) >= percent_mutated * len(start_seq):
                break

            last_mutate_time = mutate_time

            if not with_replacement:
                pos_to_mutate.remove(mutate_pos)

            mutations.append(
                MutationEvent(
                    mutate_time,
                    mutate_pos,
                    nucleotide_target,
                ))

            intermediate_seq = mutate_string(intermediate_seq, mutate_pos,
                                             nucleotide_target)
        return FullSequenceMutations(
            start_seq,
            intermediate_seq,
            left_flank,
            right_flank,
            mutations,
        )