def simplest_background_model(markov_order=0, M=4): model = hmm.ModelByStates(M=M, markov_order=markov_order) state = model.add_state() state.add_successor(state, model.add_parameter(1.)) for m in xrange(M): state.b[m] = model.add_parameter(1. / M) return model
def create_uniform_background_model(self): """ @return: A HMM with one mosaic with uniform emission probabilities. """ model = hmm.ModelByStates(self.M, self.order) self.add_fully_parameterised_state( model, emission_dist = numpy.ones(self.M)/4. ) transition_param = model.add_parameter(1.0) model.states[0].add_successor(model.states[0], transition_param) return model
def create_background_mosaic_model(self, num_mosaics, p_transition, dirichlet_prior_strength): """ Create a mosaic model """ model = hmm.ModelByStates(self.M, self.order) transition_param = model.add_parameter(p_transition) no_transition_param = model.add_parameter(1.0 - p_transition) for i in xrange(num_mosaics): self.add_fully_parameterised_state( model, emission_dist = hmm.dirichlet_draw(numpy.ones(self.M)*dirichlet_prior_strength) ) for state_1 in model.states: for state_2 in model.states: if state_1 == state_2: state_1.add_successor(state_2, no_transition_param) else: state_1.add_successor(state_2, transition_param) return model
def new_model_by_states(self): "@return: A new hmm.ModelByStates." return hmm.ModelByStates(self.M, self.order)
def __init__(self): self.model = hmm.ModelByStates()
def build_hmm(freqs, gaps): """ Build a HMM representing the gapped PWM with the given frequencies and gaps. Cannot handle PWMs with consecutive gaps or gaps at beginning or end. """ if len(gaps) != len(freqs): raise ValueError('Frequencies and gaps must be same length.') K = len(gaps) # create model model = hmm.ModelByStates(M=4, markov_order=0) # add background state bg = model.add_state() bg.pi = model.add_parameter(1.) uniform_param = model.add_parameter(.25) for m in xrange(bg.M): bg.b[m] = uniform_param # add the binding site states in positive and negative directions positive_states = [model.add_state() for i in xrange(K)] negative_states = [model.add_state() for i in xrange(K)] # connect background to initial binding site states p_binding_site = 0.01 binding_param = model.add_parameter(p_binding_site / 2.) not_binding_param = model.add_parameter(1. - p_binding_site) bg.add_successor(positive_states[0], binding_param) bg.add_successor(negative_states[-1], binding_param) bg.add_successor(bg, not_binding_param) always_one_param = model.add_parameter(1.) positive_states[-1].add_successor(bg, always_one_param) negative_states[0].add_successor(bg, always_one_param) # set up emissions for freq, positive_state, negative_state in zip(freqs, positive_states, negative_states): for b, f in enumerate(freq): emission_param = model.add_parameter(f) positive_state.b[b] = emission_param negative_state.b[-b - 1] = emission_param #positive_state. # set up transitions for k, gap in enumerate(gaps): if gap < 1. and (0 == k or K - 1 == k or gaps[k - 1] < 1. or gaps[k + 1] < 1.): raise ValueError( 'Gaps cannot be at first or last character nor next to another gap.' ) if gap < 1.: gap_param = model.add_parameter(gap) non_gap_param = model.add_parameter(1. - gap) positive_states[k - 1].add_successor(positive_states[k], gap_param) positive_states[k - 1].add_successor(positive_states[k + 1], non_gap_param) negative_states[k + 1].add_successor(negative_states[k - 1], non_gap_param) negative_states[k + 1].add_successor(negative_states[k], gap_param) else: if 0 != k: positive_states[k - 1].add_successor(positive_states[k], always_one_param) if K - 1 != k: negative_states[k + 1].add_successor(negative_states[k], always_one_param) return model
def build_model_by_states(freqs, gaps, p_binding_site=0.001): """ Build a HMM representing the gapped PWM with the given frequencies and gaps. Can handle consecutive gaps and gaps at beginning or end. """ if len(gaps) != len(freqs): raise ValueError('Frequencies and gaps must be same length.') K = len(gaps) # normalise frequencies freqs = (freqs.T / freqs.sum(axis=1)).T # create model model = hmm.ModelByStates(M=4, markov_order=0) # add background state bg = model.add_state() bg.pi = model.add_parameter(1.) uniform_param = model.add_parameter(.25) for m in xrange(bg.M): bg.b[m] = uniform_param # add the binding site states in positive and negative directions positive_states = [model.add_state() for i in xrange(K)] negative_states = [model.add_state() for i in xrange(K)] # connect background to initial binding site states binding_param = model.add_parameter() not_binding_param = model.add_parameter(1. - p_binding_site) bg.add_successor(positive_states[0], binding_param) bg.add_successor(negative_states[-1], binding_param) bg.add_successor(bg, not_binding_param) always_one_param = model.add_parameter(1.) positive_states[-1].add_successor(bg, always_one_param) negative_states[0].add_successor(bg, always_one_param) # set up emissions for freq, positive_state, negative_state in zip(freqs, positive_states, negative_states): for b, f in enumerate(freq): emission_param = model.add_parameter(f) positive_state.b[b] = emission_param negative_state.b[-b - 1] = emission_param # set up transitions def setup_transitions(states, gaps): for k in xrange(-1, K): if -1 == k: k_state = bg p_skip = p_binding_site / 2. else: k_state = states[k] p_skip = 1. for m in xrange(k + 1, K): gap_param = model.add_parameter(p_skip * gaps[m]) k_state.add_successor(states[m], gap_param) p_skip *= (1. - gaps[m]) if 0. == p_skip: break if p_skip > 0.: states[k].add_successor(bg, model.add_parameter(p_skip)) setup_transitions(positive_states, gaps) setup_transitions(negative_states[::-1], gaps[::-1]) return model
def create(self, p_gap, emissions): """ @arg p_gap: the probability of a gap. @arg emissions: the emission distributions of the bases (including the gap). @returns: A tuple (model, positive_start, positive_end, negative_start, negative_end). The model is defined by its states and includes both the motif and its reverse complement. positive_start indexes the first state in the positive motif and negative_start indexes the first state in the negative motif. """ # create the model model = hmm.ModelByStates(M=self.M, markov_order=self.markov_order) # add enough states to the models for k in xrange(self.num_states()): model.add_state() # link the states transition_param_one = model.add_parameter(1.) transition_param_gap = model.add_parameter(p_gap) transition_param_not_gap = model.add_parameter(1. - p_gap) # positive transitions for k in xrange(self.K - 1): if k + 1 != self.gap_index: # this is not the base before the gap # so just connect to next base model.states[self.map.model_idx(k, True)].add_successor( model.states[self.map.model_idx(k + 1, True)], transition_param_one) else: # this is the base before the gap # so connect to the gap and the base after the gap model.states[self.map.model_idx(k, True)].add_successor( model.states[self.map.model_idx(k + 1, True)], transition_param_gap) model.states[self.map.model_idx(k, True)].add_successor( model.states[self.map.model_idx(k + 2, True)], transition_param_not_gap) # negative transitions for k in xrange(self.K - 1): if k != self.gap_index: # this is not the base before the gap # so just connect to next base model.states[self.map.model_idx(k + 1, False)].add_successor( model.states[self.map.model_idx(k, False)], transition_param_one) else: # this is the base before the gap # so connect to the gap and the base after the gap model.states[self.map.model_idx(k + 1, False)].add_successor( model.states[self.map.model_idx(k, False)], transition_param_gap) model.states[self.map.model_idx(k + 1, False)].add_successor( model.states[self.map.model_idx(k - 1, False)], transition_param_not_gap) # fill in the emission distributions assert len(emissions) == self.K for k, base_emissions in enumerate(emissions): self._set_emissions(model, model.states[self.map.model_idx(k, True)], model.states[self.map.model_idx(k, False)], base_emissions) return (model, [ self.map.model_idx(k=0, positive=True), self.map.model_idx(k=self.K - 1, positive=False), ], [ self.map.model_idx(k=self.K - 1, positive=True), self.map.model_idx(k=0, positive=False), ])