def __init__(self, T, hidden_state_objects, dp_filenames): """ If a filename is None then that stream will be done in memory. The dynamic programming streams are the forward stream, the backward stream, and the scaling factor stream. @param T: a transition object @param hidden_state_objects: a conformant list of hidden state objects @param dp_filenames: a tuple of (f, s, b); each is a filename or None """ self.model = Model(T, hidden_state_objects) # Define the data type of each stream. f_type = lineario.FloatTupleConverter() s_type = lineario.FloatConverter() b_type = lineario.FloatTupleConverter() # initialize the streams for dynamic programming f_name, s_name, b_name = dp_filenames if f_name is None: self.f_stream = lineario.SequentialStringIO(f_type) else: self.f_stream = lineario.SequentialDiskIO(f_type, f_name) if s_name is None: self.s_stream = lineario.SequentialStringIO(s_type) else: self.s_stream = lineario.SequentialDiskIO(s_type, s_name) if b_name is None: self.b_stream = lineario.SequentialStringIO(b_type) else: self.b_stream = lineario.SequentialDiskIO(b_type, b_name)
def test_external_file_model_compatibility(self): """ Test StringIO streams for dynamic programming. """ # define the dishonest casino model fair_state = HMM.HiddenDieState(1 / 6.0) loaded_state = HMM.HiddenDieState(0.5) M = np.array([[0.95, 0.05], [0.1, 0.9]]) T = TransitionMatrix.MatrixTransitionObject(M) hidden_states = [fair_state, loaded_state] # define a sequence of observations observations = [1, 2, 6, 6, 1, 2, 3, 4, 5, 6] # define the observation stream o_converter = lineario.IntConverter() o_stream = lineario.SequentialStringIO(o_converter) o_stream.open_write() for x in observations: o_stream.write(x) o_stream.close() # create the reference hidden markov model object hmm_old = HMM.TrainedModel(M, hidden_states) # create the testing hidden markov model object names = ('tmp_f.tmp', 'tmp_s.tmp', 'tmp_b.tmp') hmm_new = ExternalModel(T, hidden_states, names) # get posterior distributions distributions_old = hmm_old.scaled_posterior_durbin(observations) hmm_new.init_dp(o_stream) distributions_new = list(hmm_new.posterior()) # assert that the distributions are the same self.assertTrue(np.allclose(distributions_old, distributions_new))
def get_response_content(fs): """ @param fs: a FieldStorage object containing the cgi arguments @return: a (response_headers, response_text) pair """ out = StringIO() lines = Util.get_stripped_lines(StringIO(fs.param_field)) model = DGRP.Model() model.from_lines(lines) # see how the states interact with the observations states = (model.get_recent_state(), model.get_ancient_state(), model.get_misaligned_state(fs.misalignment_effect), model.get_garbage_state()) # define the transition object nstates = len(states) prandom = min(1.0, (nstates / (nstates - 1.0)) / fs.region_size) T = TransitionMatrix.UniformTransitionObject(prandom, nstates) # use StringIO objects for storage hmm = ExternalHMM.ExternalModel(T, states, (None, None, None)) converter = lineario.IntTupleConverter() o_stream = lineario.SequentialStringIO(converter, fs.data_field) hmm.init_dp(o_stream) o_stream.open_read() for p, obs in itertools.izip(hmm.posterior(), o_stream.read_forward()): p_recent, p_ancient, p_misaligned, p_garbage = p # get the prior probability of polymorphism conditional on state p_recent_AA = states[0].get_posterior_distribution(obs)[2] p_ancient_AA = states[1].get_posterior_distribution(obs)[2] # compute the posterior probability of a polymorphism posterior_polymorphism = 0 posterior_polymorphism += p_recent * p_recent_AA posterior_polymorphism += p_ancient * p_ancient_AA # Given that a polymorphism occurred, # get the probability distribution over the # three non-reference nucleotides. r = model.seqerr log_Pr = math.log(r / 4.0) log_PA = math.log(1 - 3 * r / 4.0) logs = [ obs[1] * log_PA + obs[2] * log_Pr + obs[3] * log_Pr, obs[1] * log_Pr + obs[2] * log_PA + obs[3] * log_Pr, obs[1] * log_Pr + obs[2] * log_Pr + obs[3] * log_PA ] condmaxpost = math.exp(max(logs) - scipy.misc.logsumexp(logs)) # get the posterior probability distribution maxpost = posterior_polymorphism * condmaxpost # show the inference for this position print >> out, obs, p, maxpost o_stream.close() return out.getvalue()