def new_model(self, emission_dists = None, dirichlet_strength = 10.0): model = self.background_model_creator(self.order, self.num_background_mosaics) # have we been given any emission dist? if None == emission_dists: # no - were we initialised with some distribution? if None == self.emission_dists: # no - create a random one emission_dists = [ hmm.dirichlet_draw(numpy.ones(4)*dirichlet_strength) for k in xrange(self.K) ] else: # yes - use it emission_dists = self.emission_dists # add the states for the positive strand orientation positive_states = [ self.model_builder.add_order_0_parameterised_state( model, pi=self.p_binding_site, emission_dist=dist) for dist in emission_dists ] # add the states for the negative strand orientation negative_states = [ self.model_builder.add_order_0_rev_comp_state( model, positive_state, pi=self.p_binding_site) for positive_state in positive_states ] negative_states.reverse() # connect the background to the both first states with equal prob param = model.add_parameter(self.p_binding_site/2) for bg in xrange(self.num_background_mosaics): model.states[bg].add_successor(positive_states[0], param) model.states[bg].add_successor(negative_states[0], param) # connect the states in the pssm together one_param = model.add_parameter(1.0) for k in xrange(self.K-1): positive_states[k].add_successor(positive_states[k+1], one_param) negative_states[k].add_successor(negative_states[k+1], one_param) # connect the last states back to the background one_param = model.add_parameter(1.0/self.num_background_mosaics) for bg in xrange(self.num_background_mosaics): positive_states[-1].add_successor(model.states[bg], one_param) negative_states[-1].add_successor(model.states[bg], one_param) return model
def emission_dist_including_n_mer(self, n_mer, strength=9.0, offset=0, dirichlet_strength = 10.0): ''' Get an emission dist that incorporates the n-mer Draws a distribution from a dirichlet of given strength (default 10) then adds the n-mer in the middle (default # copies of n-mer = 9) ''' emission_dists = [ hmm.dirichlet_draw(numpy.ones(4)*dirichlet_strength) for k in xrange(self.K_prime) ] n = len(n_mer) offset = (self.K - n)/2 for i, base in enumerate(n_mer): idx = 2*(i+offset) emission_dists[idx].setflags(write=True) emission_dists[idx][base] += strength emission_dists[idx] /= emission_dists[idx].sum() return emission_dists
def create_mosaic_model( num_mosaics, p_transition, alphabet_size, order, dirichlet_prior_strength=None ): """ Create a mosaic model. Each mosaic has an independent parameter that specifies the probability of transitioning to any other given mosaic (this effectively ties these transition probabilities together). @arg num_mosaics: The number of mosaics. @arg p_transition: The probability of leaving a mosaic. @arg alphabet_size: The size of the output alphabet. @arg order: The Markov order of the output. @arg dirichlet_prior_strength: The strength of the uniform prior on the emission probabilities. If None then a uniform distribution is used. """ builder = hmm.pssm.ModelBuilder(order, alphabet_size=alphabet_size) model = builder.new_model_by_states() for n in xrange(num_mosaics): # add the state if None == dirichlet_prior_strength: emission_dist = numpy.ones(model.M) / alphabet_size else: emission_dist = hmm.dirichlet_draw(numpy.ones(model.M)*dirichlet_prior_strength) state = builder.add_fully_parameterised_state( model, pi=1./num_mosaics, emission_dist=emission_dist ) if 1 == num_mosaics: model.states[0].add_successor(model.states[0], model.add_parameter(1.)) else: for s1 in model.states: transition_param = model.add_parameter(p_transition) no_transition_param = model.add_parameter(1.0 - p_transition) for s2 in model.states: s1.add_successor(s2, s1 == s2 and no_transition_param or transition_param) return model
def create_background_mosaic_model(self, num_mosaics, p_transition, dirichlet_prior_strength): """ Create a mosaic model """ model = hmm.ModelByStates(self.M, self.order) transition_param = model.add_parameter(p_transition) no_transition_param = model.add_parameter(1.0 - p_transition) for i in xrange(num_mosaics): self.add_fully_parameterised_state( model, emission_dist = hmm.dirichlet_draw(numpy.ones(self.M)*dirichlet_prior_strength) ) for state_1 in model.states: for state_2 in model.states: if state_1 == state_2: state_1.add_successor(state_2, no_transition_param) else: state_1.add_successor(state_2, transition_param) return model
def create_mosaic_model(num_mosaics, p_transition, alphabet_size, order, dirichlet_prior_strength=None): """ Create a mosaic model. Each mosaic has an independent parameter that specifies the probability of transitioning to any other given mosaic (this effectively ties these transition probabilities together). @arg num_mosaics: The number of mosaics. @arg p_transition: The probability of leaving a mosaic. @arg alphabet_size: The size of the output alphabet. @arg order: The Markov order of the output. @arg dirichlet_prior_strength: The strength of the uniform prior on the emission probabilities. If None then a uniform distribution is used. """ builder = hmm.pssm.ModelBuilder(order, alphabet_size=alphabet_size) model = builder.new_model_by_states() for n in xrange(num_mosaics): # add the state if None == dirichlet_prior_strength: emission_dist = numpy.ones(model.M) / alphabet_size else: emission_dist = hmm.dirichlet_draw( numpy.ones(model.M) * dirichlet_prior_strength) state = builder.add_fully_parameterised_state( model, pi=1. / num_mosaics, emission_dist=emission_dist) if 1 == num_mosaics: model.states[0].add_successor(model.states[0], model.add_parameter(1.)) else: for s1 in model.states: transition_param = model.add_parameter(p_transition) no_transition_param = model.add_parameter(1.0 - p_transition) for s2 in model.states: s1.add_successor( s2, s1 == s2 and no_transition_param or transition_param) return model
def new_model(self, emission_dists = None, dirichlet_strength = 10.0, generate_uniform=False): model = self.background_model_creator(self.order, self.num_background_mosaics) # are we generating a uniform emission model? if generate_uniform: emission_dists = [ .25 * numpy.ones(4) for k in xrange(self.K_prime) ] else: # have we been given any emission dist? if None == emission_dists: # no - were we initialised with some distribution? if None == self.emission_dists: # no - create a random one emission_dists = [ hmm.dirichlet_draw(numpy.ones(4)*dirichlet_strength) for k in xrange(self.K_prime) ] else: # yes - use it emission_dists = self.emission_dists if len(emission_dists) != self.K_prime: raise RuntimeError('Wrong number of emissions') # add the states for the positive strand orientation positive_states = [ self.model_builder.add_order_0_parameterised_state( model, pi=self.p_binding_site, emission_dist=dist) for dist in emission_dists ] # add the states for the negative strand orientation negative_states = [ self.model_builder.add_order_0_rev_comp_state( model, positive_state, pi=self.p_binding_site) for positive_state in positive_states ] negative_states.reverse() # connect the background to the both first states with equal prob param = model.add_parameter(self.p_binding_site/2) for bg in xrange(self.num_background_mosaics): model.states[bg].add_successor(positive_states[0], param) model.states[bg].add_successor(negative_states[0], param) # connect the states in the pssm together one_param = model.add_parameter(1.0) gap_params = [ model.add_parameter(self.p_gap) for i in xrange(self.K-1) ] non_gap_params = [ model.add_parameter(1.0-self.p_gap) for i in xrange(self.K-1) ] for k in xrange(self.K-1): positive_states[2*k+1].add_successor(positive_states[2*k+2], one_param) negative_states[2*k+1].add_successor(negative_states[2*k+2], one_param) positive_states[2*k].add_successor(positive_states[2*k+1], gap_params[k]) negative_states[2*k].add_successor(negative_states[2*k+1], gap_params[self.K-2-k]) positive_states[2*k].add_successor(positive_states[2*k+2], non_gap_params[k]) negative_states[2*k].add_successor(negative_states[2*k+2], non_gap_params[self.K-2-k]) # connect the last states back to the background one_param = model.add_parameter(1.0/self.num_background_mosaics) for bg in xrange(self.num_background_mosaics): positive_states[-1].add_successor(model.states[bg], one_param) negative_states[-1].add_successor(model.states[bg], one_param) return model
print "Seeding the random number generator with %d" % seed # seed all the RNGs that we use hmm.seed_rng(seed) numpy.random.seed(seed) # create something to build the gapped pssms builder = single_gap.SingleGappedPssmBuilder(K=K, gap_position=K / 2, markov_order=0, M=4) # create our emission distributions dirichlet_prior_strengths = [0.01, 0.1, 1.0] emissions = [ numpy.array([hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for k in xrange(builder.K)]) for strength in dirichlet_prior_strengths ] gap_emissions = [hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for strength in dirichlet_prior_strengths] # create out single gapped pssms pssms = [builder.create(p_gap, non_gap, gap) for non_gap, gap in zip(emissions, gap_emissions)] # create our complete models (by adding a background model) p_binding_site = exp_sites_per_sequence / L models = [ hmm.as_model( single_gap.add_to_simple_background_model( model=pssm[0], in_states=pssm[1], out_states=pssm[2], p_binding_site=p_binding_site
complete_model.states[0].add_successor( complete_model.states[1 + in_state], binding_site_transition_param) for out_state in out_states: complete_model.states[1 + out_state].add_successor( complete_model.states[0], back_to_bg_transition_param) complete_model.states[0].pi = complete_model.add_parameter(1.) return complete_model if '__main__' == __name__: import numpy # build a single gapped pssm with some random emissions builder = SingleGappedPssmBuilder(K=6, gap_index=1, markov_order=0, M=4) emissions = numpy.array([ hmm.dirichlet_draw(numpy.ones(builder.M) * .1) for k in xrange(builder.K) ]) emissions[builder.gap_index] = hmm.dirichlet_draw( numpy.ones(builder.M) * .3) model_by_states, in_states, out_states = builder.create( p_gap=.6, emissions=emissions) # create a background model and add the single gapped pssm to it complete_model = add_to_simple_background_model(model_by_states, in_states, out_states, p_binding_site=.01) # convert to other type of model model = hmm.as_model(complete_model)
back_to_bg_transition_param ) complete_model.states[0].pi = complete_model.add_parameter(1.) return complete_model if '__main__' == __name__: import numpy # build a single gapped pssm with some random emissions builder = SingleGappedPssmBuilder(K=6, gap_index=1, markov_order=0, M=4) emissions = numpy.array( [ hmm.dirichlet_draw(numpy.ones(builder.M) * .1) for k in xrange(builder.K) ] ) emissions[builder.gap_index] = hmm.dirichlet_draw(numpy.ones(builder.M) * .3) model_by_states, in_states, out_states = builder.create( p_gap=.6, emissions=emissions ) # create a background model and add the single gapped pssm to it complete_model = add_to_simple_background_model( model_by_states, in_states, out_states, p_binding_site=.01)