def learn_likelihoods_progress(i, n, m, A, B, pi, F, X_train, nEmissionDim, g_mu, g_sig, nState): if nEmissionDim >= 2: ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi) else: ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi) l_likelihood_mean = 0.0 l_likelihood_mean2 = 0.0 l_statePosterior = np.zeros(nState) for j in xrange(n): g_post = np.zeros(nState) g_lhood = 0.0 g_lhood2 = 0.0 prop_sum = 0.0 for k in xrange(1, m): final_ts_obj = ghmm.EmissionSequence(F, X_train[j][:k*nEmissionDim]) logp = ml.loglikelihoods(final_ts_obj)[0] # print 'Log likelihood:', logp post = np.array(ml.posterior(final_ts_obj)) k_prop = norm(loc=g_mu, scale=g_sig).pdf(k) g_post += post[k-1] * k_prop g_lhood += logp * k_prop g_lhood2 += logp * logp * k_prop prop_sum += k_prop l_statePosterior += g_post / prop_sum / float(n) l_likelihood_mean += g_lhood / prop_sum / float(n) l_likelihood_mean2 += g_lhood2 / prop_sum / float(n) return i, l_statePosterior, l_likelihood_mean, np.sqrt(l_likelihood_mean2 - l_likelihood_mean**2)
def set_hmm_object(self, A, B, pi, out_a_num=None, vec_num=None, mat_num=None, u_denom=None): """Set HMM's hyper parameters """ if self.nEmissionDim == 1: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \ A, B, pi) else: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \ A, B, pi) self.A = A self.B = B self.pi = pi try: self.ml.setBaumWelchParams(out_a_num, vec_num, mat_num, u_denom) except: print "Install Daehyung's custom ghmm if you want partial fit functionalities." return self.ml
def _train(self, seq, trans, emi, num_possible_states, pseudo_transitions=False, start_at_zero=False): """Uses the given parameters to train a multinominal HMM to represent the given seqences of observations. Uses Baum-Welch training. Please override if special training is necessary for your QSR. :param seq: the sequence of observations represented by alphabet symbols :param trans: the transition matrix as a numpy array :param emi: the emission matrix as a numpy array :param num_possible_states: the total number of possible states :return: the via baum-welch training generated hmm """ print 'Generating HMM:' print seq print '\tCreating symbols...' symbols = self.generate_alphabet(num_possible_states) if start_at_zero: startprob = np.zeros(num_possible_states) startprob[0] = 1 else: startprob = np.ones(num_possible_states) startprob = startprob / np.sum(startprob) print startprob print '\t\t', symbols print '\tCreating HMM...' hmm = gh.HMMFromMatrices(symbols, gh.DiscreteDistribution(symbols), trans.tolist(), emi.tolist(), startprob.tolist()) print '\tTraining...' hmm.baumWelch(self._create_sequence_set(seq, symbols)) if pseudo_transitions: print '\tAdding pseudo transitions...' pseudo = deepcopy(trans) pseudo[pseudo > 0.] = 1. pseudo = pseudo / (float(len(seq) + 1)) trans_trained, emi, start = hmm.asMatrices() trans_trained = np.array(trans_trained) + pseudo hmm = gh.HMMFromMatrices(symbols, gh.DiscreteDistribution(symbols), trans_trained.tolist(), emi, start) hmm.normalize() return hmm
def computeLikelihood(idx, A, B, pi, F, X, nEmissionDim, nState, startIdx=1, \ bPosterior=False, converted_X=False, cov_type='full'): ''' This function will be deprecated. Please, use computeLikelihoods. ''' if nEmissionDim >= 2: ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi) if cov_type == 'diag' or cov_type.find('diag') >= 0: ml.setDiagonalCovariance(1) else: ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi) if converted_X is False: X_test = util.convert_sequence(X, emission=False) X_test = np.squeeze(X_test) X_test = X_test.tolist() else: X_test = X l_idx = [] l_likelihood = [] l_posterior = [] for i in xrange(startIdx, len(X_test) / nEmissionDim): final_ts_obj = ghmm.EmissionSequence(F, X_test[:i * nEmissionDim]) try: logp = ml.loglikelihood(final_ts_obj) if bPosterior: post = np.array(ml.posterior(final_ts_obj)) except: print "Unexpected profile!! GHMM cannot handle too low probability. Underflow?" l_idx.append(i) l_likelihood.append(-100000000) if bPosterior: if len(l_posterior) == 0: l_posterior.append(list(pi)) else: l_posterior.append(l_posterior[-1]) ## return False, False # anomaly continue l_idx.append(i) l_likelihood.append(logp) if bPosterior: l_posterior.append(post[i - 1]) if bPosterior: return idx, l_idx, l_likelihood, l_posterior else: return idx, l_idx, l_likelihood
def reset(self): """Reset the HMM object """ [A, B, pi] = self.ml.asMatrices() if self.nEmissionDim == 1: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \ A, B, pi) else: self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \ A, B, pi) self.A = A self.B = B self.pi = pi
def markov_model(self): mm = ghmm.HMMFromMatrices( self.F, ghmm.MultivariateGaussianDistribution(self.F), self.transition_probabilities, self.observation_probabilities, self.initial_probabilities) #print ".>"+str(mm.asMatrices()) return mm
def get_hidden_markov_model(mixture_model, guess_t_matrix): """Get an (unoptomized) hidden markov model from the mixture model and a guess at the transition matrix. The guess transition matrix is typically created by summing over the outer product of time-pairs of membership vectors. """ # Emission probabilities for HMM, using their very silly # matrix arrangement emissions = [[mixture_model.means_[j], mixture_model.covars_[j].flatten()] for j in xrange(mixture_model.n_components)] # Initial transition matrix if isinstance(guess_t_matrix, scipy.sparse.csr.csr_matrix): guess_t_matrix = guess_t_matrix.todense() guess_t_matrix = guess_t_matrix.tolist() # Initial occupancy # Todo: figure out if initial occupancy matters initial_occupancy = ([1.0 / mixture_model.n_components] * mixture_model.n_components) # Set up distribution g_float = ghmm.Float() g_distribution = ghmm.MultivariateGaussianDistribution(g_float) # Put it all together model = ghmm.HMMFromMatrices(g_float, g_distribution, guess_t_matrix, emissions, initial_occupancy) return model
def _predict_next(self): """@todo: Docstring for _predict_next. :returns: @todo """ a_init = normalize_stoch_map(np.random.rand(self._n_hid, self._n_hid)) b_init = normalize_stoch_map( np.random.rand(self._n_hid, self._n_sym**2)) pi_init = normalize_stoch_map(np.random.rand(self._n_hid)) hmm = gh.HMMFromMatrices(self._alphab, gh.DiscreteDistribution(self._alphab), a_init, b_init, pi_init) obs = gh.EmissionSequence(self._alphab, self._memory) hmm.baumWelch(obs) alpha = hmm.forward(obs)[0][-1] trans = hmm.asMatrices()[0] alpha = np.dot(alpha, trans) next_moves_dist = np.zeros(self._n_sym**2) for i in range(self._n_hid): next_moves_dist += np.asarray(hmm.getEmission(i)) * alpha[i] next_moves_dist = next_moves_dist[self._conversion_array] next_move = np.argmax(np.sum(next_moves_dist, axis=0)) return np.where(self._rules[next_move] == -1)[0][0]
def newModel(states, randomize = True, startAtFirstState = False, \ feedForward = True): """newModel(states, obs, sigma) Make a new random model. """ pi = [1.0 / states] * states if startAtFirstState: pi = [0] * states pi[0] = 1 aMat = numpy.zeros((states, states), float) bMat = numpy.zeros((states, 2), float) if randomize: for i in range(states): for j in range(states): aMat[i][j] = random.random() if feedForward and (j != i + 1): aMat[i][j] = 0 if feedForward and (j == i + 1): aMat[i][j] = 1 for j in range(2): bMat[i][j] = random.random() aMat += 0.01 bMat += 0.01 m = ghmm.HMMFromMatrices(ghmm.Float(), \ ghmm.GaussianDistribution(ghmm.Float()), \ aMat, bMat, pi) return m
def train(self): # This tells GHMM every possible value that it will be seeing alphabet = ghmm.Alphabet(list(set(self.events))) alphaLen = len(alphabet) # Initiaize the probabilities of transitioning from each state to each other # state. There is probably a better way to do this, but this is nice and simple. trans_prob = 1.0 / (alphaLen) trans = [[trans_prob for row in range(alphaLen)] for col in range(alphaLen)] # Initialize the probabilities of seeing each output from each state. # Again, there is probably a better way to do this, but this is simple. emiss_prob = 1.0 / (alphaLen) emiss = [[emiss_prob for row in range(alphaLen)] for col in range(alphaLen)] # Some grease to get GHMM to work pi = [1.0 / alphaLen] * alphaLen # The sequence of musical events gathered from the music train_seq = ghmm.EmissionSequence(alphabet, self.events) # Generate the model of the data m = ghmm.HMMFromMatrices(alphabet, ghmm.DiscreteDistribution(alphabet), trans, emiss, pi) # Train the model based on the training sequence m.baumWelch(train_seq) return (m, alphabet)
def trainHMM(self, seq, trans, emi, qtc_type='qtcc'): """Uses the given parameters to train a multinominal HMM to represent the given seqences""" if qtc_type == 'qtcb': state_num = 11 elif qtc_type == 'qtcc': state_num = 83 elif qtc_type == 'qtcbc': state_num = 92 else: raise (QtcException( "trainHMM: Unknow qtc type: {!r}".format(qtc_type))) print 'Generating HMM:' print '\tCreating symbols...' symbols = self.generateAlphabet(state_num) startprob = np.zeros((state_num)) startprob[0] = 1 print '\t\t', symbols print '\tCreating HMM...' qtc_hmm = gh.HMMFromMatrices(symbols, gh.DiscreteDistribution(symbols), trans.tolist(), emi.tolist(), startprob.tolist()) print '\tTraining...' qtc_hmm.baumWelch(self.createSequenceSet(seq, symbols)) return qtc_hmm
def _train(self, seq, trans, emi, num_possible_states): """Uses the given parameters to train a multinominal HMM to represent the given seqences of observations. Uses Baum-Welch training. Please override if special training is necessary for your QSR. :param seq: the sequence of observations represented by alphabet symbols :param trans: the transition matrix as a numpy array :param emi: the emission matrix as a numpy array :param num_possible_states: the total number of possible states :return: the via baum-welch training generated hmm """ print 'Generating HMM:' print '\tCreating symbols...' symbols = self._generate_alphabet(num_possible_states) startprob = np.zeros(num_possible_states) startprob[0] = 1 print '\t\t', symbols print '\tCreating HMM...' hmm = gh.HMMFromMatrices(symbols, gh.DiscreteDistribution(symbols), trans.tolist(), emi.tolist(), startprob.tolist()) print '\tTraining...' hmm.baumWelch(self._create_sequence_set(seq, symbols)) return hmm
def baum_welch(self, obs_seqs): self.model = ghmm.HMMFromMatrices(self.emission_domain, self.emission_distr, self.A, self.B, self.pi) obs_seqs_set = self.get_seqs_set(obs_seqs) self.model.baumWelch(obs_seqs_set) self.st_st = self.merge_states()
def create_model(self, flag, number_states): A, B, pi = self.calculate_A_B_pi(number_states, flag) # generate models from parameters model = ghmm.HMMFromMatrices(self.F,ghmm.GaussianDistribution(self.F), A, B, pi) #model = ghmm.HMMFromMatrices(F,ghmm.MultivariateGaussianDistribution(F), A, B, pi) return model
def computeLikelihoods(idx, A, B, pi, F, X, nEmissionDim, nState, startIdx=2, \ bPosterior=False, converted_X=False, cov_type='full'): ''' Input: - X: dimension x length ''' if nEmissionDim >= 2: ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi) if cov_type == 'diag': ml.setDiagonalCovariance(1) else: ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi) X_test = util.convert_sequence(X, emission=False) X_test = np.squeeze(X_test) l_idx = [] l_likelihood = [] l_posterior = [] for i in xrange(startIdx, len(X[0])): final_ts_obj = ghmm.EmissionSequence( F, X_test[:i * nEmissionDim].tolist()) try: logp = ml.loglikelihood(final_ts_obj) if bPosterior: post = np.array(ml.posterior(final_ts_obj)) l_likelihood.append(logp) if bPosterior: l_posterior.append(post[i - 1]) except: print "Unexpected profile!! GHMM cannot handle too low probability. Underflow?" ## return False, False # anomaly ## continue # we keep the state as the previous one l_likelihood.append(-1000000000000) if bPosterior: if len(l_posterior) == 0: l_posterior.append(list(pi)) else: l_posterior.append(l_posterior[-1]) l_idx.append(i) if bPosterior: return idx, l_idx, l_likelihood, l_posterior else: return idx, l_idx, l_likelihood
def computeLikelihood(F, k, data, g_mu, g_sig, nEmissionDim, A, B, pi): if nEmissionDim >= 2: hmm_ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi) else: hmm_ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi) final_ts_obj = ghmm.EmissionSequence(F, data) logp = hmm_ml.loglikelihoods(final_ts_obj)[0] post = np.array(hmm_ml.posterior(final_ts_obj)) k_prop = norm(loc=g_mu, scale=g_sig).pdf(k) g_post = post[k-1] * k_prop g_lhood = logp * k_prop g_lhood2 = logp * logp * k_prop prop_sum = k_prop # print np.shape(g_post), np.shape(g_lhood), np.shape(g_lhood2), np.shape(prop_sum) return g_post, g_lhood, g_lhood2, prop_sum
def create_0order_hmm(nb_seq, nb_residues, first_letters, motif): """ Create a 0-order HMM initialized from MEME result :arg nb_seq: Number of sequences used by MEME :type nb_seq: int :arg nb_residues: Number of residues used by MEME :type nb_residues: int :arg first_letters: Number of occurrences of ACGT at the begining of sequences used by MEME :type first_letters: dic of str->int :arg motif: PFM as a Biopython motif to be used to initialize the TFFFM :type motif: :class:`Bio.motifs` :returns: The constructed HMM :rtype: :class:`ghmm.DiscreteEmissionHMM` """ # The first state is random emissions = [[0.25, 0.25, 0.25, 0.25]] # Complete the emissions with the actual motif frequencies if motif.instances: # The motif.counts is computed directly when creating the motif from # instances nb_hits = len(motif.instances) else: nb_hits = nb_seq for position in xrange(len(motif)): frequencies = [] for letter in "ACGT": freq = (motif.counts[letter][position] + 1.) / (nb_hits + 4.) frequencies.append(freq) emissions.append(frequencies) # Background transitions transitions = [] background_to_background = 1. - float(nb_seq) / nb_residues background_to_foreground = 1. - background_to_background transitions.append( [background_to_background, background_to_foreground] + [0.] * (len(motif) - 1)) # Core transitions for position in xrange(1, len(motif)): transitions.append( [0.] * (position + 1) + [1.] + [0.] * (len(motif) - position - 1)) # Final transitions now transitions.append([1.] + [0.] * len(motif)) # Starting proba initials = [1.] + [0.] * len(motif) return ghmm.HMMFromMatrices(ghmm.Alphabet(ALPHABET), ghmm.DiscreteDistribution( ghmm.Alphabet(ALPHABET)), transitions, emissions, initials)
def ghmm_from_discrete_hmm(hmm): hmm = deepcopy(hmm) domain = ghmm.Alphabet(range(hmm.alphabetSize)) trans = hmm.transitionMatrix init = hmm.initialProbabilities emissions = [d.probabilities for d in hmm.emissionDistributions] return ghmm.HMMFromMatrices(emissionDomain=domain, distribution=ghmm.DiscreteDistribution(domain), A=trans, B=emissions, pi=init)
def __init__(self, A, B, Pi, observations): if len(A) == len(Pi): self.states = range(len(A)) self.sigma = ghmm.Alphabet(observations) # The "alphabet" comprising action indices self.initA = A self.initB = B self.initPi = Pi self.ghmmModel = ghmm.HMMFromMatrices(self.sigma, ghmm.DiscreteDistribution(self.sigma), self.initA, self.initB, self.initPi) else: prettyPrint("Unable to initialize model. Unequal number of states", "error") return
def setUp(self): '''Create a simple dice rolling HMM''' self.sigma = g.IntegerRange(1, 7) self.A = [[0.9, 0.1], [0.3, 0.7]] efair = [1.0 / 6] * 6 eloaded = [3.0 / 13, 3.0 / 13, 2.0 / 13, 2.0 / 13, 2.0 / 13, 1.0 / 13] self.B = [efair, eloaded] self.pi = [0.5] * 2 self.m = g.HMMFromMatrices(self.sigma, g.DiscreteDistribution(self.sigma), self.A, self.B, self.pi)
def getHMM(emissionDomain, distribution, A, B, pi, name=None): """ Takes HMM-style parameter matrices and returns a HMM object which was intialised with a GHMM object using these parameters. """ hmm = ghmm.HMMFromMatrices(emissionDomain, distribution, A, B, pi, hmmName=name) return HMM(hmm, 1)
def test_ghmm(self): # this is being extended to also support mixtures of multivariate gaussians # Interpretation of B matrix for the multivariate gaussian case # (Example with three states and two mixture components with two dimensions): # B = [ # [["mu111","mu112"],["sig1111","sig1112","sig1121","sig1122"], # ["mu121","mu122"],["sig1211","sig1212","sig1221","sig1222"], # ["w11","w12"] ], # [["mu211","mu212"],["sig2111","sig2112","sig2121","sig2122"], # ["mu221","mu222"],["sig2211","sig2212","sig2221","sig2222"], # ["w21","w22"] ], # [["mu311","mu312"],["sig3111","sig3112","sig3121","sig3122"], # ["mu321","mu322"],["sig3211","sig3212","sig3221","sig3222"], # ["w31","w32"] ], # ] # # ["mu311","mu312"] is the mean vector of the two dimensional # gaussian in state 3, mixture component 1 # ["sig1211","sig1212","sig1221","sig1222"] is the covariance # matrix of the two dimensional gaussian in state 1, mixture component 2 # ["w21","w22"] are the weights of the mixture components # in state 2 # For states with only one mixture component, a implicit weight # of 1.0 is assumed import ghmm F = ghmm.Float() Abig = [[0.0, 1.0], [1.0, 0.0]] Bbig = [[[1.0, 1.0, 1.0], [0.9, 0.4, 0.2, 0.4, 2.2, 0.5, 0.2, 0.5, 1.0]], [[10.0, 10.0, 10.0], [1.0, 0.2, 0.8, 0.2, 2.0, 0.6, 0.8, 0.6, 0.9]]] piBig = [0.5, 0.5] modelBig = ghmm.HMMFromMatrices( F, ghmm.MultivariateGaussianDistribution(F), Abig, Bbig, piBig) modelBig.sample(10, 100, seed=3586662) e = modelBig.sampleSingle(1) print[x for x in e] # get log P(seq | model) logp = model.loglikelihood(seq) print logp # cacluate viterbi path path = model.viterbi(seq) print path # train model parameters model.baumWelch(seq_set, 500, 0.0001)
def fit(self, X_train, A=None, B=None, pi=None, B_dict=None, verbose=False): if A is None: if verbose: print "Generate new A matrix" # Transition probability matrix (Initial transition probability, TODO?) A = self.init_trans_mat(self.nState).tolist() if B is None: if verbose: print "Generate new B matrix" # We should think about multivariate Gaussian pdf. self.mu, self.sig = self.vectors_to_mean_sigma(X_train, self.nState) # Emission probability matrix B = np.hstack([self.mu, self.sig]).tolist() # Must be [i,:] = [mu, sig] if pi is None: # pi - initial probabilities per state ## pi = [1.0/float(self.nState)] * self.nState pi = [0.] * self.nState pi[0] = 1.0 # HMM model object self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), A, B, pi) ## print "Run Baum Welch method with (samples, length)", X_train.shape train_seq = X_train.tolist() final_seq = ghmm.SequenceSet(self.F, train_seq) self.ml.baumWelch(final_seq, 10000) [self.A,self.B,self.pi] = self.ml.asMatrices() self.A = np.array(self.A) self.B = np.array(self.B) ## self.mean_path_plot(mu[:,0], sigma[:,0]) ## print "Completed to fitting", np.array(final_seq).shape # state range self.state_range = np.arange(0, self.nState, 1) # Pre-computation for PHMM variables self.mu_z = np.zeros((self.nState)) self.mu_z2 = np.zeros((self.nState)) self.mu_z3 = np.zeros((self.nState)) self.var_z = np.zeros((self.nState)) self.sig_z3 = np.zeros((self.nState)) for i in xrange(self.nState): zp = self.A[i,:]*self.state_range self.mu_z[i] = np.sum(zp) self.mu_z2[i] = self.mu_z[i]**2 #self.mu_z3[i] = self.mu_z[i]**3 self.var_z[i] = np.sum(zp*self.state_range) - self.mu_z[i]**2
def ghmm_from_gaussian_hmm(hmm): hmm = deepcopy(hmm) domain = ghmm.Float() trans = hmm.transitionMatrix.tolist() init = hmm.initialProbabilities.tolist() emissions = [map(float, [d.mean, d.variance]) for d in hmm.emissionDistributions] # print init # print trans # print emissions return ghmm.HMMFromMatrices(emissionDomain=domain, distribution=ghmm.GaussianDistribution(domain), A=trans, B=emissions, pi=init)
def ghmm_from_multivariate_continuous_hmm(hmm): hmm = deepcopy(hmm) domain = ghmm.Float() trans = hmm.transitionMatrix.tolist() init = hmm.initialProbabilities.tolist() emissions = [[d.mean.tolist(), d.variance.flatten().tolist()] for d in hmm.emissionDistributions] # print init # print trans # print emissions return ghmm.HMMFromMatrices(emissionDomain=domain, distribution=ghmm.MultivariateGaussianDistribution(domain), A=trans, B=emissions, pi=init)
def _new_model(n_features, n_states, means, covars, topology): # Generate emissions emissions = [] for i in range(n_states): emission = [means[i].tolist(), covars[i].ravel().tolist()] emissions.append(emission) # Create model domain = impl.Float() transitions = transition_matrix(n_states, topology).tolist() pi = start_probabilities(n_states, topology) distribution = impl.MultivariateGaussianDistribution(domain) model = impl.HMMFromMatrices(domain, distribution, transitions, emissions, pi) return model
def __create_hmm_from_dict(self, dictionary, qsr_type, num_symbols): """Creates a hmm from the xml representation. Not nice to use tempfile but not otherwise possible due to hidden code and swig in ghmm. :param xml: The xml string :return: the ghmm hmm object """ symbols = self.hmm_types_available[qsr_type]().generate_alphabet( num_symbols) hmm = gh.HMMFromMatrices(symbols, gh.DiscreteDistribution(symbols), dictionary[self.TRANS], dictionary[self.EMI], dictionary[self.START]) return hmm
def trainHMM(hmmState): ''' Train HMM with the given chromosome. ''' print >> sys.stderr, printTime(), "Train HMM with one chromosome." T = [[0.9, 0.1], [0.1, 0.9]] e1 = [0.1, 0.9] e0 = [0.9, 0.1] E = [e0, e1] pi = [0.9, 0.1] # initial 10% are peak? sigma = ghmm.IntegerRange(0, 2) # 0, 1 m = ghmm.HMMFromMatrices(sigma, ghmm.DiscreteDistribution(sigma), T, E, pi) m.baumWelch(ghmm.EmissionSequence(sigma, hmmState)) print >> sys.stderr, printTime(), "Train HMM finished." print >> sys.stderr return m
def conditional_prob(self, x): ''' Input @ x: dim x length Output @ A list of conditional probabilities P(x_t|x_s,lambda) Only single sample works ''' from scipy.stats import norm, entropy # logp from all features X_test = util.convert_sequence2(x, emission=False) X_test = np.squeeze(X_test) final_ts_obj = ghmm.EmissionSequence(self.F, X_test.tolist()) logp_all = self.ml.loglikelihood(final_ts_obj) # feature-wise conditional probability cond_prob = [] for i in xrange(self.nEmissionDim): # per feature B = copy.copy(self.B) for j in xrange(self.nState): B[j][0] = [b for idx, b in enumerate(B[j][0]) if idx != i] B_arr = copy.copy(B[j][1]) B_arr = np.array(B_arr).reshape( (self.nEmissionDim, self.nEmissionDim)) B_arr = np.delete(B_arr, (i), axis=0) B_arr = np.delete(B_arr, (i), axis=1) B[j][1] = B_arr.flatten().tolist() ml_src = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \ self.A, B, self.pi) # logp from remains X_test = util.convert_sequence2([ x[j] for j in xrange(len(x)) if j != i ], \ emission=False) X_test = np.squeeze(X_test) final_ts_obj = ghmm.EmissionSequence(self.F, X_test.tolist()) logp_src = ml_src.loglikelihood(final_ts_obj) cond_prob.append(logp_all - logp_src) if np.isnan(cond_prob[-1]) or np.isinf(cond_prob[-1]): print "NaN in conditional probabilities: ", np.shape(x) return None return np.array(cond_prob)
def predict_from_single_seq(self, x, ref_num): ''' Input @ x: length #samples x known steps Output @ observation distribution: nDimension ''' # new emission for partial sequence B = [] for i in xrange(self.nState): B.append([ self.B[i][0][ref_num], self.B[i][1][ref_num * self.nEmissionDim + ref_num] ]) ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \ self.A, B, self.pi) if type(x) is not list: x = x.tolist() final_ts_obj = ghmm.EmissionSequence(self.F, x) try: (alpha, scale) = ml.forward(final_ts_obj) except: print "No alpha is available !!" sys.exit() x_pred = [] for i in xrange(self.nEmissionDim): if i == ref_num: x_pred.append(x[-1]) else: src_cov_idx = ref_num * self.nEmissionDim + ref_num tgt_cov_idx = ref_num * self.nEmissionDim + i t_o = 0.0 for j in xrange(self.nState): m_j = self.B[j][0][i] + \ self.B[j][1][tgt_cov_idx]/self.B[j][1][src_cov_idx]*\ (x[-1]-self.B[j][0][ref_num]) t_o += alpha[-1][j] * m_j x_pred.append(t_o) return x_pred