Exemple #1
0
def simplest_background_model(markov_order=0, M=4):
    model = hmm.ModelByStates(M=M, markov_order=markov_order)
    state = model.add_state()
    state.add_successor(state, model.add_parameter(1.))
    for m in xrange(M):
        state.b[m] = model.add_parameter(1. / M)
    return model
Exemple #2
0
 def create_uniform_background_model(self):
     """
     @return: A HMM with one mosaic with uniform emission probabilities.
     """
     model = hmm.ModelByStates(self.M, self.order)
     self.add_fully_parameterised_state(
             model,
             emission_dist = numpy.ones(self.M)/4.
     )
     transition_param = model.add_parameter(1.0)
     model.states[0].add_successor(model.states[0], transition_param)
     return model
Exemple #3
0
 def create_background_mosaic_model(self, num_mosaics, p_transition, dirichlet_prior_strength):
     """
     Create a mosaic model
     """
     model = hmm.ModelByStates(self.M, self.order)
     transition_param = model.add_parameter(p_transition)
     no_transition_param = model.add_parameter(1.0 - p_transition)
     for i in xrange(num_mosaics):
         self.add_fully_parameterised_state(
                 model,
                 emission_dist = hmm.dirichlet_draw(numpy.ones(self.M)*dirichlet_prior_strength)
         )
     for state_1 in model.states:
         for state_2 in model.states:
             if state_1 == state_2: state_1.add_successor(state_2, no_transition_param)
             else: state_1.add_successor(state_2, transition_param)
     return model
Exemple #4
0
 def new_model_by_states(self):
     "@return: A new hmm.ModelByStates."
     return hmm.ModelByStates(self.M, self.order)
Exemple #5
0
 def __init__(self):
     self.model = hmm.ModelByStates()
def build_hmm(freqs, gaps):
    """
    Build a HMM representing the gapped PWM with the given frequencies and gaps. Cannot handle PWMs with consecutive gaps
    or gaps at beginning or end.
    """
    if len(gaps) != len(freqs):
        raise ValueError('Frequencies and gaps must be same length.')
    K = len(gaps)

    # create model
    model = hmm.ModelByStates(M=4, markov_order=0)

    # add background state
    bg = model.add_state()
    bg.pi = model.add_parameter(1.)
    uniform_param = model.add_parameter(.25)
    for m in xrange(bg.M):
        bg.b[m] = uniform_param

    # add the binding site states in positive and negative directions
    positive_states = [model.add_state() for i in xrange(K)]
    negative_states = [model.add_state() for i in xrange(K)]

    # connect background to initial binding site states
    p_binding_site = 0.01
    binding_param = model.add_parameter(p_binding_site / 2.)
    not_binding_param = model.add_parameter(1. - p_binding_site)
    bg.add_successor(positive_states[0], binding_param)
    bg.add_successor(negative_states[-1], binding_param)
    bg.add_successor(bg, not_binding_param)
    always_one_param = model.add_parameter(1.)
    positive_states[-1].add_successor(bg, always_one_param)
    negative_states[0].add_successor(bg, always_one_param)

    # set up emissions
    for freq, positive_state, negative_state in zip(freqs, positive_states,
                                                    negative_states):
        for b, f in enumerate(freq):
            emission_param = model.add_parameter(f)
            positive_state.b[b] = emission_param
            negative_state.b[-b - 1] = emission_param
            #positive_state.

    # set up transitions
    for k, gap in enumerate(gaps):
        if gap < 1. and (0 == k or K - 1 == k or gaps[k - 1] < 1.
                         or gaps[k + 1] < 1.):
            raise ValueError(
                'Gaps cannot be at first or last character nor next to another gap.'
            )
        if gap < 1.:
            gap_param = model.add_parameter(gap)
            non_gap_param = model.add_parameter(1. - gap)
            positive_states[k - 1].add_successor(positive_states[k], gap_param)
            positive_states[k - 1].add_successor(positive_states[k + 1],
                                                 non_gap_param)
            negative_states[k + 1].add_successor(negative_states[k - 1],
                                                 non_gap_param)
            negative_states[k + 1].add_successor(negative_states[k], gap_param)
        else:
            if 0 != k:
                positive_states[k - 1].add_successor(positive_states[k],
                                                     always_one_param)
            if K - 1 != k:
                negative_states[k + 1].add_successor(negative_states[k],
                                                     always_one_param)

    return model
def build_model_by_states(freqs, gaps, p_binding_site=0.001):
    """
    Build a HMM representing the gapped PWM with the given frequencies and gaps. Can handle consecutive gaps
    and gaps at beginning or end.
    """
    if len(gaps) != len(freqs):
        raise ValueError('Frequencies and gaps must be same length.')
    K = len(gaps)

    # normalise frequencies
    freqs = (freqs.T / freqs.sum(axis=1)).T

    # create model
    model = hmm.ModelByStates(M=4, markov_order=0)

    # add background state
    bg = model.add_state()
    bg.pi = model.add_parameter(1.)
    uniform_param = model.add_parameter(.25)
    for m in xrange(bg.M):
        bg.b[m] = uniform_param

    # add the binding site states in positive and negative directions
    positive_states = [model.add_state() for i in xrange(K)]
    negative_states = [model.add_state() for i in xrange(K)]

    # connect background to initial binding site states
    binding_param = model.add_parameter()
    not_binding_param = model.add_parameter(1. - p_binding_site)
    bg.add_successor(positive_states[0], binding_param)
    bg.add_successor(negative_states[-1], binding_param)
    bg.add_successor(bg, not_binding_param)
    always_one_param = model.add_parameter(1.)
    positive_states[-1].add_successor(bg, always_one_param)
    negative_states[0].add_successor(bg, always_one_param)

    # set up emissions
    for freq, positive_state, negative_state in zip(freqs, positive_states,
                                                    negative_states):
        for b, f in enumerate(freq):
            emission_param = model.add_parameter(f)
            positive_state.b[b] = emission_param
            negative_state.b[-b - 1] = emission_param

    # set up transitions
    def setup_transitions(states, gaps):
        for k in xrange(-1, K):
            if -1 == k:
                k_state = bg
                p_skip = p_binding_site / 2.
            else:
                k_state = states[k]
                p_skip = 1.
            for m in xrange(k + 1, K):
                gap_param = model.add_parameter(p_skip * gaps[m])
                k_state.add_successor(states[m], gap_param)
                p_skip *= (1. - gaps[m])
                if 0. == p_skip:
                    break
            if p_skip > 0.:
                states[k].add_successor(bg, model.add_parameter(p_skip))

    setup_transitions(positive_states, gaps)
    setup_transitions(negative_states[::-1], gaps[::-1])

    return model
Exemple #8
0
    def create(self, p_gap, emissions):
        """
        @arg p_gap: the probability of a gap.
        @arg emissions: the emission distributions of the bases (including the gap).
        @returns: A tuple (model, positive_start, positive_end, negative_start, negative_end).
        The model is defined by its states and
        includes both the motif and its reverse complement. positive_start indexes the first state in the
        positive motif and negative_start indexes the first state in the negative motif.
        """
        # create the model
        model = hmm.ModelByStates(M=self.M, markov_order=self.markov_order)

        # add enough states to the models
        for k in xrange(self.num_states()):
            model.add_state()

        # link the states
        transition_param_one = model.add_parameter(1.)
        transition_param_gap = model.add_parameter(p_gap)
        transition_param_not_gap = model.add_parameter(1. - p_gap)
        # positive transitions
        for k in xrange(self.K - 1):
            if k + 1 != self.gap_index:
                # this is not the base before the gap
                # so just connect to next base
                model.states[self.map.model_idx(k, True)].add_successor(
                    model.states[self.map.model_idx(k + 1, True)],
                    transition_param_one)
            else:
                # this is the base before the gap
                # so connect to the gap and the base after the gap
                model.states[self.map.model_idx(k, True)].add_successor(
                    model.states[self.map.model_idx(k + 1, True)],
                    transition_param_gap)
                model.states[self.map.model_idx(k, True)].add_successor(
                    model.states[self.map.model_idx(k + 2, True)],
                    transition_param_not_gap)
        # negative transitions
        for k in xrange(self.K - 1):
            if k != self.gap_index:
                # this is not the base before the gap
                # so just connect to next base
                model.states[self.map.model_idx(k + 1, False)].add_successor(
                    model.states[self.map.model_idx(k, False)],
                    transition_param_one)
            else:
                # this is the base before the gap
                # so connect to the gap and the base after the gap
                model.states[self.map.model_idx(k + 1, False)].add_successor(
                    model.states[self.map.model_idx(k, False)],
                    transition_param_gap)
                model.states[self.map.model_idx(k + 1, False)].add_successor(
                    model.states[self.map.model_idx(k - 1, False)],
                    transition_param_not_gap)

        # fill in the emission distributions
        assert len(emissions) == self.K
        for k, base_emissions in enumerate(emissions):
            self._set_emissions(model,
                                model.states[self.map.model_idx(k, True)],
                                model.states[self.map.model_idx(k, False)],
                                base_emissions)

        return (model, [
            self.map.model_idx(k=0, positive=True),
            self.map.model_idx(k=self.K - 1, positive=False),
        ], [
            self.map.model_idx(k=self.K - 1, positive=True),
            self.map.model_idx(k=0, positive=False),
        ])