Beispiel #1
0
 def __init__(self, T, hidden_state_objects, dp_filenames):
     """
     If a filename is None then that stream will be done in memory.
     The dynamic programming streams are the forward stream,
     the backward stream, and the scaling factor stream.
     @param T: a transition object
     @param hidden_state_objects: a conformant list of hidden state objects
     @param dp_filenames: a tuple of (f, s, b); each is a filename or None
     """
     self.model = Model(T, hidden_state_objects)
     # Define the data type of each stream.
     f_type = lineario.FloatTupleConverter()
     s_type = lineario.FloatConverter()
     b_type = lineario.FloatTupleConverter()
     # initialize the streams for dynamic programming
     f_name, s_name, b_name = dp_filenames
     if f_name is None:
         self.f_stream = lineario.SequentialStringIO(f_type)
     else:
         self.f_stream = lineario.SequentialDiskIO(f_type, f_name)
     if s_name is None:
         self.s_stream = lineario.SequentialStringIO(s_type)
     else:
         self.s_stream = lineario.SequentialDiskIO(s_type, s_name)
     if b_name is None:
         self.b_stream = lineario.SequentialStringIO(b_type)
     else:
         self.b_stream = lineario.SequentialDiskIO(b_type, b_name)
Beispiel #2
0
 def test_external_file_model_compatibility(self):
     """
     Test StringIO streams for dynamic programming.
     """
     # define the dishonest casino model
     fair_state = HMM.HiddenDieState(1 / 6.0)
     loaded_state = HMM.HiddenDieState(0.5)
     M = np.array([[0.95, 0.05], [0.1, 0.9]])
     T = TransitionMatrix.MatrixTransitionObject(M)
     hidden_states = [fair_state, loaded_state]
     # define a sequence of observations
     observations = [1, 2, 6, 6, 1, 2, 3, 4, 5, 6]
     # define the observation stream
     o_converter = lineario.IntConverter()
     o_stream = lineario.SequentialStringIO(o_converter)
     o_stream.open_write()
     for x in observations:
         o_stream.write(x)
     o_stream.close()
     # create the reference hidden markov model object
     hmm_old = HMM.TrainedModel(M, hidden_states)
     # create the testing hidden markov model object
     names = ('tmp_f.tmp', 'tmp_s.tmp', 'tmp_b.tmp')
     hmm_new = ExternalModel(T, hidden_states, names)
     # get posterior distributions
     distributions_old = hmm_old.scaled_posterior_durbin(observations)
     hmm_new.init_dp(o_stream)
     distributions_new = list(hmm_new.posterior())
     # assert that the distributions are the same
     self.assertTrue(np.allclose(distributions_old, distributions_new))
Beispiel #3
0
def get_response_content(fs):
    """
    @param fs: a FieldStorage object containing the cgi arguments
    @return: a (response_headers, response_text) pair
    """
    out = StringIO()
    lines = Util.get_stripped_lines(StringIO(fs.param_field))
    model = DGRP.Model()
    model.from_lines(lines)
    # see how the states interact with the observations
    states = (model.get_recent_state(), model.get_ancient_state(),
              model.get_misaligned_state(fs.misalignment_effect),
              model.get_garbage_state())
    # define the transition object
    nstates = len(states)
    prandom = min(1.0, (nstates / (nstates - 1.0)) / fs.region_size)
    T = TransitionMatrix.UniformTransitionObject(prandom, nstates)
    # use StringIO objects for storage
    hmm = ExternalHMM.ExternalModel(T, states, (None, None, None))
    converter = lineario.IntTupleConverter()
    o_stream = lineario.SequentialStringIO(converter, fs.data_field)
    hmm.init_dp(o_stream)
    o_stream.open_read()
    for p, obs in itertools.izip(hmm.posterior(), o_stream.read_forward()):
        p_recent, p_ancient, p_misaligned, p_garbage = p
        # get the prior probability of polymorphism conditional on state
        p_recent_AA = states[0].get_posterior_distribution(obs)[2]
        p_ancient_AA = states[1].get_posterior_distribution(obs)[2]
        # compute the posterior probability of a polymorphism
        posterior_polymorphism = 0
        posterior_polymorphism += p_recent * p_recent_AA
        posterior_polymorphism += p_ancient * p_ancient_AA
        # Given that a polymorphism occurred,
        # get the probability distribution over the
        # three non-reference nucleotides.
        r = model.seqerr
        log_Pr = math.log(r / 4.0)
        log_PA = math.log(1 - 3 * r / 4.0)
        logs = [
            obs[1] * log_PA + obs[2] * log_Pr + obs[3] * log_Pr,
            obs[1] * log_Pr + obs[2] * log_PA + obs[3] * log_Pr,
            obs[1] * log_Pr + obs[2] * log_Pr + obs[3] * log_PA
        ]
        condmaxpost = math.exp(max(logs) - scipy.misc.logsumexp(logs))
        # get the posterior probability distribution
        maxpost = posterior_polymorphism * condmaxpost
        # show the inference for this position
        print >> out, obs, p, maxpost
    o_stream.close()
    return out.getvalue()