def _model_for_L_mer(self, L_mer, gap_index, p_binding_site): """ Create a model initialised by this K-mer. """ # get the start position of the K-mer and a builder to make the model mer_len = len(L_mer) start, builder = self._make_builder(gap_index, mer_len) # get the emission distribution nucleo_dist = nucleo_dist_from_mer( seq_to_numpy(L_mer), self.options.pseudo_count_for_model_initialisation, gap_index=gap_index ) emissions = numpy.ones((self.options.K,4))/4. emissions[start:start+mer_len+1] = nucleo_dist # build the model pssm, in_states, out_states = builder.create( p_gap=.5, emissions=emissions ) model = hmm.as_model( single_gap.add_to_simple_background_model( model=pssm, in_states=in_states, out_states=out_states, p_binding_site=p_binding_site ) ) #print model.A #from IPython.Debugger import Pdb; Pdb().set_trace(); return model
def model_for_initialisation_K_mer(self, K_mer, p_binding_site): """ Create a model initialised by this K-mer. """ emission_distributions = numpy.ones((self.K, 4)) * self.initialisation_pseudo_count for k, base in enumerate(K_mer): if 4 == base: emission_distributions[k] += 0.25 else: emission_distributions[k, base] += 1.0 gap_emissions = numpy.ones((4,)) / 4.0 pssm, in_states, out_states = self.builder.create( p_gap=0.5, non_gap_emissions=emission_distributions, gap_emissions=gap_emissions ) model = hmm.as_model( single_gap.add_to_simple_background_model( model=pssm, in_states=in_states, out_states=out_states, p_binding_site=p_binding_site ) ) return model
numpy.array([hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for k in xrange(builder.K)]) for strength in dirichlet_prior_strengths ] gap_emissions = [hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for strength in dirichlet_prior_strengths] # create out single gapped pssms pssms = [builder.create(p_gap, non_gap, gap) for non_gap, gap in zip(emissions, gap_emissions)] # create our complete models (by adding a background model) p_binding_site = exp_sites_per_sequence / L models = [ hmm.as_model( single_gap.add_to_simple_background_model( model=pssm[0], in_states=pssm[1], out_states=pssm[2], p_binding_site=p_binding_site ) ) for pssm in pssms ] # write our logos # convert to sequences and write fasta def tag(sample_idx): return "K%d-g%.2f-N%d-L%d-seed%d-%d" % (K, p_gap, N, L, seed, sample_idx) print "Writing logos" for i, model in enumerate(models): emissions, gap_probs = builder.get_emissions_and_gap_probabilities(model, offset=1)