Esempio n. 1
0
    def _model_for_L_mer(self, L_mer, gap_index, p_binding_site):
        """
        Create a model initialised by this K-mer.
        """
        # get the start position of the K-mer and a builder to make the model
        mer_len = len(L_mer)
        start, builder = self._make_builder(gap_index, mer_len)

        # get the emission distribution
        nucleo_dist = nucleo_dist_from_mer(
          seq_to_numpy(L_mer),
          self.options.pseudo_count_for_model_initialisation,
          gap_index=gap_index
        )
        emissions = numpy.ones((self.options.K,4))/4.
        emissions[start:start+mer_len+1] = nucleo_dist

        # build the model
        pssm, in_states, out_states = builder.create(
          p_gap=.5,
          emissions=emissions
        )
        model = hmm.as_model(
          single_gap.add_to_simple_background_model(
            model=pssm,
            in_states=in_states,
            out_states=out_states,
            p_binding_site=p_binding_site
          )
        )
        #print model.A
        #from IPython.Debugger import Pdb; Pdb().set_trace();
        return model
Esempio n. 2
0
 def model_for_initialisation_K_mer(self, K_mer, p_binding_site):
     """
     Create a model initialised by this K-mer.
     """
     emission_distributions = numpy.ones((self.K, 4)) * self.initialisation_pseudo_count
     for k, base in enumerate(K_mer):
         if 4 == base:
             emission_distributions[k] += 0.25
         else:
             emission_distributions[k, base] += 1.0
     gap_emissions = numpy.ones((4,)) / 4.0
     pssm, in_states, out_states = self.builder.create(
         p_gap=0.5, non_gap_emissions=emission_distributions, gap_emissions=gap_emissions
     )
     model = hmm.as_model(
         single_gap.add_to_simple_background_model(
             model=pssm, in_states=in_states, out_states=out_states, p_binding_site=p_binding_site
         )
     )
     return model
Esempio n. 3
0
    numpy.array([hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for k in xrange(builder.K)])
    for strength in dirichlet_prior_strengths
]
gap_emissions = [hmm.dirichlet_draw(numpy.ones(builder.M) * strength) for strength in dirichlet_prior_strengths]


# create out single gapped pssms
pssms = [builder.create(p_gap, non_gap, gap) for non_gap, gap in zip(emissions, gap_emissions)]


# create our complete models (by adding a background model)
p_binding_site = exp_sites_per_sequence / L
models = [
    hmm.as_model(
        single_gap.add_to_simple_background_model(
            model=pssm[0], in_states=pssm[1], out_states=pssm[2], p_binding_site=p_binding_site
        )
    )
    for pssm in pssms
]


# write our logos
# convert to sequences and write fasta
def tag(sample_idx):
    return "K%d-g%.2f-N%d-L%d-seed%d-%d" % (K, p_gap, N, L, seed, sample_idx)


print "Writing logos"
for i, model in enumerate(models):
    emissions, gap_probs = builder.get_emissions_and_gap_probabilities(model, offset=1)