コード例 #1
0
def learn_likelihoods_progress(i, n, m, A, B, pi, F, X_train, nEmissionDim, g_mu, g_sig, nState):
    if nEmissionDim >= 2:
        ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi)
    else:
        ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi)

    l_likelihood_mean = 0.0
    l_likelihood_mean2 = 0.0
    l_statePosterior = np.zeros(nState)

    for j in xrange(n):    

        g_post = np.zeros(nState)
        g_lhood = 0.0
        g_lhood2 = 0.0
        prop_sum = 0.0

        for k in xrange(1, m):
            final_ts_obj = ghmm.EmissionSequence(F, X_train[j][:k*nEmissionDim])
            logp = ml.loglikelihoods(final_ts_obj)[0]
            # print 'Log likelihood:', logp
            post = np.array(ml.posterior(final_ts_obj))

            k_prop = norm(loc=g_mu, scale=g_sig).pdf(k)
            g_post += post[k-1] * k_prop
            g_lhood += logp * k_prop
            g_lhood2 += logp * logp * k_prop

            prop_sum  += k_prop

        l_statePosterior += g_post / prop_sum / float(n)
        l_likelihood_mean += g_lhood / prop_sum / float(n)
        l_likelihood_mean2 += g_lhood2 / prop_sum / float(n)

    return i, l_statePosterior, l_likelihood_mean, np.sqrt(l_likelihood_mean2 - l_likelihood_mean**2)
コード例 #2
0
    def set_hmm_object(self,
                       A,
                       B,
                       pi,
                       out_a_num=None,
                       vec_num=None,
                       mat_num=None,
                       u_denom=None):
        """Set HMM's hyper parameters
        """
        if self.nEmissionDim == 1:
            self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \
                                           A, B, pi)
        else:
            self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \
                                           A, B, pi)
        self.A = A
        self.B = B
        self.pi = pi

        try:
            self.ml.setBaumWelchParams(out_a_num, vec_num, mat_num, u_denom)
        except:
            print "Install Daehyung's custom ghmm if you want partial fit functionalities."

        return self.ml
コード例 #3
0
    def _train(self,
               seq,
               trans,
               emi,
               num_possible_states,
               pseudo_transitions=False,
               start_at_zero=False):
        """Uses the given parameters to train a multinominal HMM to represent
        the given seqences of observations. Uses Baum-Welch training.
        Please override if special training is necessary for your QSR.

        :param seq: the sequence of observations represented by alphabet symbols
        :param trans: the transition matrix as a numpy array
        :param emi: the emission matrix as a numpy array
        :param num_possible_states: the total number of possible states

        :return: the via baum-welch training generated hmm
        """

        print 'Generating HMM:'
        print seq
        print '\tCreating symbols...'
        symbols = self.generate_alphabet(num_possible_states)
        if start_at_zero:
            startprob = np.zeros(num_possible_states)
            startprob[0] = 1
        else:
            startprob = np.ones(num_possible_states)
            startprob = startprob / np.sum(startprob)
        print startprob
        print '\t\t', symbols
        print '\tCreating HMM...'
        hmm = gh.HMMFromMatrices(symbols, gh.DiscreteDistribution(symbols),
                                 trans.tolist(), emi.tolist(),
                                 startprob.tolist())
        print '\tTraining...'
        hmm.baumWelch(self._create_sequence_set(seq, symbols))

        if pseudo_transitions:
            print '\tAdding pseudo transitions...'
            pseudo = deepcopy(trans)
            pseudo[pseudo > 0.] = 1.
            pseudo = pseudo / (float(len(seq) + 1))

            trans_trained, emi, start = hmm.asMatrices()
            trans_trained = np.array(trans_trained) + pseudo

            hmm = gh.HMMFromMatrices(symbols, gh.DiscreteDistribution(symbols),
                                     trans_trained.tolist(), emi, start)

            hmm.normalize()

        return hmm
コード例 #4
0
def computeLikelihood(idx, A, B, pi, F, X, nEmissionDim, nState, startIdx=1, \
                      bPosterior=False, converted_X=False, cov_type='full'):
    '''
    This function will be deprecated. Please, use computeLikelihoods.
    '''

    if nEmissionDim >= 2:
        ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F),
                                  A, B, pi)
        if cov_type == 'diag' or cov_type.find('diag') >= 0:
            ml.setDiagonalCovariance(1)
    else:
        ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi)

    if converted_X is False:
        X_test = util.convert_sequence(X, emission=False)
        X_test = np.squeeze(X_test)
        X_test = X_test.tolist()
    else:
        X_test = X

    l_idx = []
    l_likelihood = []
    l_posterior = []

    for i in xrange(startIdx, len(X_test) / nEmissionDim):
        final_ts_obj = ghmm.EmissionSequence(F, X_test[:i * nEmissionDim])

        try:
            logp = ml.loglikelihood(final_ts_obj)
            if bPosterior: post = np.array(ml.posterior(final_ts_obj))
        except:
            print "Unexpected profile!! GHMM cannot handle too low probability. Underflow?"

            l_idx.append(i)
            l_likelihood.append(-100000000)
            if bPosterior:
                if len(l_posterior) == 0: l_posterior.append(list(pi))
                else: l_posterior.append(l_posterior[-1])
            ## return False, False # anomaly
            continue

        l_idx.append(i)
        l_likelihood.append(logp)
        if bPosterior: l_posterior.append(post[i - 1])

    if bPosterior:
        return idx, l_idx, l_likelihood, l_posterior
    else:
        return idx, l_idx, l_likelihood
コード例 #5
0
    def reset(self):
        """Reset the HMM object
        """
        [A, B, pi] = self.ml.asMatrices()

        if self.nEmissionDim == 1:
            self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \
                                           A, B, pi)
        else:
            self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \
                                           A, B, pi)
        self.A = A
        self.B = B
        self.pi = pi
コード例 #6
0
 def markov_model(self):
     mm = ghmm.HMMFromMatrices(
         self.F, ghmm.MultivariateGaussianDistribution(self.F),
         self.transition_probabilities, self.observation_probabilities,
         self.initial_probabilities)
     #print ".>"+str(mm.asMatrices())
     return mm
コード例 #7
0
def get_hidden_markov_model(mixture_model, guess_t_matrix):
    """Get an (unoptomized) hidden markov model from the mixture model and
    a guess at the transition matrix.

    The guess transition matrix is typically created by summing over the
    outer product of time-pairs of membership vectors.
    """

    # Emission  probabilities for HMM, using their very silly
    # matrix arrangement
    emissions = [[mixture_model.means_[j], mixture_model.covars_[j].flatten()]
                 for j in xrange(mixture_model.n_components)]

    # Initial transition matrix
    if isinstance(guess_t_matrix, scipy.sparse.csr.csr_matrix):
        guess_t_matrix = guess_t_matrix.todense()
        guess_t_matrix = guess_t_matrix.tolist()

    # Initial occupancy
    # Todo: figure out if initial occupancy matters
    initial_occupancy = ([1.0 / mixture_model.n_components] *
                         mixture_model.n_components)

    # Set up distribution
    g_float = ghmm.Float()
    g_distribution = ghmm.MultivariateGaussianDistribution(g_float)

    # Put it all together
    model = ghmm.HMMFromMatrices(g_float, g_distribution, guess_t_matrix,
                                 emissions, initial_occupancy)
    return model
コード例 #8
0
ファイル: players.py プロジェクト: dsuess/stuff
    def _predict_next(self):
        """@todo: Docstring for _predict_next.
        :returns: @todo

        """
        a_init = normalize_stoch_map(np.random.rand(self._n_hid, self._n_hid))
        b_init = normalize_stoch_map(
            np.random.rand(self._n_hid, self._n_sym**2))
        pi_init = normalize_stoch_map(np.random.rand(self._n_hid))
        hmm = gh.HMMFromMatrices(self._alphab,
                                 gh.DiscreteDistribution(self._alphab), a_init,
                                 b_init, pi_init)
        obs = gh.EmissionSequence(self._alphab, self._memory)
        hmm.baumWelch(obs)

        alpha = hmm.forward(obs)[0][-1]
        trans = hmm.asMatrices()[0]
        alpha = np.dot(alpha, trans)
        next_moves_dist = np.zeros(self._n_sym**2)
        for i in range(self._n_hid):
            next_moves_dist += np.asarray(hmm.getEmission(i)) * alpha[i]
        next_moves_dist = next_moves_dist[self._conversion_array]
        next_move = np.argmax(np.sum(next_moves_dist, axis=0))

        return np.where(self._rules[next_move] == -1)[0][0]
コード例 #9
0
def newModel(states, randomize = True, startAtFirstState = False, \
            feedForward = True):
    """newModel(states, obs, sigma)
    Make a new random model.
    """
    pi = [1.0 / states] * states

    if startAtFirstState:
        pi = [0] * states
        pi[0] = 1

    aMat = numpy.zeros((states, states), float)
    bMat = numpy.zeros((states, 2), float)

    if randomize:
        for i in range(states):
            for j in range(states):
                aMat[i][j] = random.random()
                if feedForward and (j != i + 1):
                    aMat[i][j] = 0
                if feedForward and (j == i + 1):
                    aMat[i][j] = 1

            for j in range(2):
                bMat[i][j] = random.random()

    aMat += 0.01
    bMat += 0.01

    m = ghmm.HMMFromMatrices(ghmm.Float(), \
                                ghmm.GaussianDistribution(ghmm.Float()), \
                                aMat, bMat, pi)
    return m
コード例 #10
0
    def train(self):
        # This tells GHMM every possible value that it will be seeing
        alphabet = ghmm.Alphabet(list(set(self.events)))
        alphaLen = len(alphabet)

        # Initiaize the probabilities of transitioning from each state to each other
        # state. There is probably a better way to do this, but this is nice and simple.
        trans_prob = 1.0 / (alphaLen)
        trans = [[trans_prob for row in range(alphaLen)]
                 for col in range(alphaLen)]

        # Initialize the probabilities of seeing each output from each state.
        # Again, there is probably a better way to do this, but this is simple.
        emiss_prob = 1.0 / (alphaLen)
        emiss = [[emiss_prob for row in range(alphaLen)]
                 for col in range(alphaLen)]

        # Some grease to get GHMM to work
        pi = [1.0 / alphaLen] * alphaLen

        # The sequence of musical events gathered from the music
        train_seq = ghmm.EmissionSequence(alphabet, self.events)

        # Generate the model of the data
        m = ghmm.HMMFromMatrices(alphabet, ghmm.DiscreteDistribution(alphabet),
                                 trans, emiss, pi)

        # Train the model based on the training sequence
        m.baumWelch(train_seq)

        return (m, alphabet)
コード例 #11
0
    def trainHMM(self, seq, trans, emi, qtc_type='qtcc'):
        """Uses the given parameters to train a multinominal HMM to represent the given seqences"""

        if qtc_type == 'qtcb':
            state_num = 11
        elif qtc_type == 'qtcc':
            state_num = 83
        elif qtc_type == 'qtcbc':
            state_num = 92
        else:
            raise (QtcException(
                "trainHMM: Unknow qtc type: {!r}".format(qtc_type)))

        print 'Generating HMM:'
        print '\tCreating symbols...'
        symbols = self.generateAlphabet(state_num)
        startprob = np.zeros((state_num))
        startprob[0] = 1
        print '\t\t', symbols
        print '\tCreating HMM...'
        qtc_hmm = gh.HMMFromMatrices(symbols, gh.DiscreteDistribution(symbols),
                                     trans.tolist(), emi.tolist(),
                                     startprob.tolist())
        print '\tTraining...'
        qtc_hmm.baumWelch(self.createSequenceSet(seq, symbols))

        return qtc_hmm
コード例 #12
0
    def _train(self, seq, trans, emi, num_possible_states):
        """Uses the given parameters to train a multinominal HMM to represent
        the given seqences of observations. Uses Baum-Welch training.
        Please override if special training is necessary for your QSR.

        :param seq: the sequence of observations represented by alphabet symbols
        :param trans: the transition matrix as a numpy array
        :param emi: the emission matrix as a numpy array
        :param num_possible_states: the total number of possible states

        :return: the via baum-welch training generated hmm
        """

        print 'Generating HMM:'
        print '\tCreating symbols...'
        symbols = self._generate_alphabet(num_possible_states)
        startprob = np.zeros(num_possible_states)
        startprob[0] = 1
        print '\t\t', symbols
        print '\tCreating HMM...'
        hmm = gh.HMMFromMatrices(symbols, gh.DiscreteDistribution(symbols),
                                 trans.tolist(), emi.tolist(),
                                 startprob.tolist())
        print '\tTraining...'
        hmm.baumWelch(self._create_sequence_set(seq, symbols))

        return hmm
コード例 #13
0
 def baum_welch(self, obs_seqs):
     self.model = ghmm.HMMFromMatrices(self.emission_domain,
                                       self.emission_distr, self.A, self.B,
                                       self.pi)
     obs_seqs_set = self.get_seqs_set(obs_seqs)
     self.model.baumWelch(obs_seqs_set)
     self.st_st = self.merge_states()
コード例 #14
0
    def create_model(self, flag, number_states):
          
        A, B, pi = self.calculate_A_B_pi(number_states, flag)

        # generate models from parameters
        model = ghmm.HMMFromMatrices(self.F,ghmm.GaussianDistribution(self.F), A, B, pi)
        #model = ghmm.HMMFromMatrices(F,ghmm.MultivariateGaussianDistribution(F), A, B, pi)
        return model
コード例 #15
0
def computeLikelihoods(idx, A, B, pi, F, X, nEmissionDim, nState, startIdx=2, \
                       bPosterior=False, converted_X=False, cov_type='full'):
    '''
    Input:
    - X: dimension x length
    '''

    if nEmissionDim >= 2:
        ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F),
                                  A, B, pi)
        if cov_type == 'diag': ml.setDiagonalCovariance(1)
    else:
        ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi)

    X_test = util.convert_sequence(X, emission=False)
    X_test = np.squeeze(X_test)

    l_idx = []
    l_likelihood = []
    l_posterior = []

    for i in xrange(startIdx, len(X[0])):
        final_ts_obj = ghmm.EmissionSequence(
            F, X_test[:i * nEmissionDim].tolist())

        try:
            logp = ml.loglikelihood(final_ts_obj)
            if bPosterior: post = np.array(ml.posterior(final_ts_obj))
            l_likelihood.append(logp)
            if bPosterior: l_posterior.append(post[i - 1])
        except:
            print "Unexpected profile!! GHMM cannot handle too low probability. Underflow?"
            ## return False, False # anomaly
            ## continue
            # we keep the state as the previous one
            l_likelihood.append(-1000000000000)
            if bPosterior:
                if len(l_posterior) == 0: l_posterior.append(list(pi))
                else: l_posterior.append(l_posterior[-1])

        l_idx.append(i)

    if bPosterior: return idx, l_idx, l_likelihood, l_posterior
    else: return idx, l_idx, l_likelihood
コード例 #16
0
def computeLikelihood(F, k, data, g_mu, g_sig, nEmissionDim, A, B, pi):
    if nEmissionDim >= 2:
        hmm_ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi)
    else:
        hmm_ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi)

    final_ts_obj = ghmm.EmissionSequence(F, data)
    logp = hmm_ml.loglikelihoods(final_ts_obj)[0]
    post = np.array(hmm_ml.posterior(final_ts_obj))

    k_prop = norm(loc=g_mu, scale=g_sig).pdf(k)
    g_post = post[k-1] * k_prop
    g_lhood = logp * k_prop
    g_lhood2 = logp * logp * k_prop
    prop_sum = k_prop

    # print np.shape(g_post), np.shape(g_lhood), np.shape(g_lhood2), np.shape(prop_sum)

    return g_post, g_lhood, g_lhood2, prop_sum
コード例 #17
0
ファイル: tffm_module.py プロジェクト: sakatani/TFFM
def create_0order_hmm(nb_seq, nb_residues, first_letters, motif):
    """
    Create a 0-order HMM initialized from MEME result

    :arg nb_seq: Number of sequences used by MEME
    :type nb_seq: int
    :arg nb_residues: Number of residues used by MEME
    :type nb_residues: int
    :arg first_letters: Number of occurrences of ACGT at the begining of
        sequences used by MEME
    :type first_letters: dic of str->int
    :arg motif: PFM as a Biopython motif to be used to initialize the TFFFM
    :type motif: :class:`Bio.motifs`

    :returns: The constructed HMM
    :rtype: :class:`ghmm.DiscreteEmissionHMM`

    """

    # The first state is random
    emissions = [[0.25, 0.25, 0.25, 0.25]]
    # Complete the emissions with the actual motif frequencies
    if motif.instances:
        # The motif.counts is computed directly when creating the motif from
        # instances
        nb_hits = len(motif.instances)
    else:
        nb_hits = nb_seq
    for position in xrange(len(motif)):
        frequencies = []
        for letter in "ACGT":
            freq = (motif.counts[letter][position] + 1.) / (nb_hits + 4.)
            frequencies.append(freq)
        emissions.append(frequencies)

    # Background transitions
    transitions = []
    background_to_background = 1. - float(nb_seq) / nb_residues
    background_to_foreground = 1. - background_to_background
    transitions.append(
        [background_to_background, background_to_foreground] + [0.] *
        (len(motif) - 1))
    # Core transitions
    for position in xrange(1, len(motif)):
        transitions.append(
            [0.] * (position + 1) + [1.] + [0.] * (len(motif) - position - 1))
    # Final transitions now
    transitions.append([1.] + [0.] * len(motif))

    # Starting proba
    initials = [1.] + [0.] * len(motif)
    return ghmm.HMMFromMatrices(ghmm.Alphabet(ALPHABET),
                                ghmm.DiscreteDistribution(
                                    ghmm.Alphabet(ALPHABET)),
                                transitions, emissions, initials)
コード例 #18
0
def ghmm_from_discrete_hmm(hmm):
    hmm = deepcopy(hmm)
    domain = ghmm.Alphabet(range(hmm.alphabetSize))
    trans = hmm.transitionMatrix
    init = hmm.initialProbabilities
    emissions = [d.probabilities for d in hmm.emissionDistributions]
    return ghmm.HMMFromMatrices(emissionDomain=domain,
                                distribution=ghmm.DiscreteDistribution(domain),
                                A=trans,
                                B=emissions,
                                pi=init)
コード例 #19
0
 def __init__(self, A, B, Pi, observations):
     if len(A) == len(Pi):
         self.states = range(len(A))
         self.sigma = ghmm.Alphabet(observations) # The "alphabet" comprising action indices
         self.initA = A
         self.initB = B
         self.initPi = Pi
         self.ghmmModel = ghmm.HMMFromMatrices(self.sigma, ghmm.DiscreteDistribution(self.sigma), self.initA, self.initB, self.initPi)
     else:
         prettyPrint("Unable to initialize model. Unequal number of states", "error")
         return
コード例 #20
0
 def setUp(self):
     '''Create a simple dice rolling HMM'''
     self.sigma = g.IntegerRange(1, 7)
     self.A = [[0.9, 0.1], [0.3, 0.7]]
     efair = [1.0 / 6] * 6
     eloaded = [3.0 / 13, 3.0 / 13, 2.0 / 13, 2.0 / 13, 2.0 / 13, 1.0 / 13]
     self.B = [efair, eloaded]
     self.pi = [0.5] * 2
     self.m = g.HMMFromMatrices(self.sigma,
                                g.DiscreteDistribution(self.sigma), self.A,
                                self.B, self.pi)
コード例 #21
0
def getHMM(emissionDomain, distribution, A, B, pi, name=None):
    """
    Takes HMM-style parameter matrices and returns a HMM object which was
    intialised with a GHMM object using these parameters.
    """
    hmm = ghmm.HMMFromMatrices(emissionDomain,
                               distribution,
                               A,
                               B,
                               pi,
                               hmmName=name)
    return HMM(hmm, 1)
コード例 #22
0
    def test_ghmm(self):
        # this is being extended to also support mixtures of multivariate gaussians
        # Interpretation of B matrix for the multivariate gaussian case
        # (Example with three states and two mixture components with two dimensions):
        #  B = [
        #       [["mu111","mu112"],["sig1111","sig1112","sig1121","sig1122"],
        #        ["mu121","mu122"],["sig1211","sig1212","sig1221","sig1222"],
        #        ["w11","w12"] ],
        #       [["mu211","mu212"],["sig2111","sig2112","sig2121","sig2122"],
        #        ["mu221","mu222"],["sig2211","sig2212","sig2221","sig2222"],
        #        ["w21","w22"] ],
        #       [["mu311","mu312"],["sig3111","sig3112","sig3121","sig3122"],
        #        ["mu321","mu322"],["sig3211","sig3212","sig3221","sig3222"],
        #        ["w31","w32"] ],
        #      ]
        #
        # ["mu311","mu312"] is the mean vector of the two dimensional
        # gaussian in state 3, mixture component 1
        # ["sig1211","sig1212","sig1221","sig1222"] is the covariance
        # matrix of the two dimensional gaussian in state 1, mixture component 2
        # ["w21","w22"] are the weights of the mixture components
        # in state 2
        # For states with only one mixture component, a implicit weight
        # of 1.0 is assumed

        import ghmm
        F = ghmm.Float()

        Abig = [[0.0, 1.0], [1.0, 0.0]]
        Bbig = [[[1.0, 1.0, 1.0],
                 [0.9, 0.4, 0.2, 0.4, 2.2, 0.5, 0.2, 0.5, 1.0]],
                [[10.0, 10.0, 10.0],
                 [1.0, 0.2, 0.8, 0.2, 2.0, 0.6, 0.8, 0.6, 0.9]]]
        piBig = [0.5, 0.5]
        modelBig = ghmm.HMMFromMatrices(
            F, ghmm.MultivariateGaussianDistribution(F), Abig, Bbig, piBig)
        modelBig.sample(10, 100, seed=3586662)

        e = modelBig.sampleSingle(1)
        print[x for x in e]

        # get log P(seq | model)
        logp = model.loglikelihood(seq)
        print logp

        # cacluate viterbi path
        path = model.viterbi(seq)
        print path

        # train model parameters
        model.baumWelch(seq_set, 500, 0.0001)
コード例 #23
0
    def fit(self, X_train, A=None, B=None, pi=None, B_dict=None, verbose=False):

        if A is None:        
            if verbose: print "Generate new A matrix"                
            # Transition probability matrix (Initial transition probability, TODO?)
            A = self.init_trans_mat(self.nState).tolist()

        if B is None:
            if verbose: print "Generate new B matrix"                                            
            # We should think about multivariate Gaussian pdf.        
            self.mu, self.sig = self.vectors_to_mean_sigma(X_train, self.nState)

            # Emission probability matrix
            B = np.hstack([self.mu, self.sig]).tolist() # Must be [i,:] = [mu, sig]
                
        if pi is None:            
            # pi - initial probabilities per state 
            ## pi = [1.0/float(self.nState)] * self.nState
            pi = [0.] * self.nState
            pi[0] = 1.0

        # HMM model object
        self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), A, B, pi)
        
        ## print "Run Baum Welch method with (samples, length)", X_train.shape
        train_seq = X_train.tolist()
        final_seq = ghmm.SequenceSet(self.F, train_seq)        
        self.ml.baumWelch(final_seq, 10000)

        [self.A,self.B,self.pi] = self.ml.asMatrices()
        self.A = np.array(self.A)
        self.B = np.array(self.B)

        ## self.mean_path_plot(mu[:,0], sigma[:,0])        
        ## print "Completed to fitting", np.array(final_seq).shape
        
        # state range
        self.state_range = np.arange(0, self.nState, 1)

        # Pre-computation for PHMM variables
        self.mu_z   = np.zeros((self.nState))
        self.mu_z2  = np.zeros((self.nState))
        self.mu_z3  = np.zeros((self.nState))
        self.var_z  = np.zeros((self.nState))
        self.sig_z3 = np.zeros((self.nState))
        for i in xrange(self.nState):
            zp             = self.A[i,:]*self.state_range
            self.mu_z[i]   = np.sum(zp)
            self.mu_z2[i]  = self.mu_z[i]**2
            #self.mu_z3[i]  = self.mu_z[i]**3
            self.var_z[i]  = np.sum(zp*self.state_range) - self.mu_z[i]**2
コード例 #24
0
def ghmm_from_gaussian_hmm(hmm):
    hmm = deepcopy(hmm)
    domain = ghmm.Float()
    trans = hmm.transitionMatrix.tolist()
    init = hmm.initialProbabilities.tolist()
    emissions = [map(float, [d.mean, d.variance]) for d in hmm.emissionDistributions]
    # print init
    # print trans
    # print emissions
    return ghmm.HMMFromMatrices(emissionDomain=domain,
                                distribution=ghmm.GaussianDistribution(domain),
                                A=trans,
                                B=emissions,
                                pi=init)
コード例 #25
0
def ghmm_from_multivariate_continuous_hmm(hmm):
    hmm = deepcopy(hmm)
    domain = ghmm.Float()
    trans = hmm.transitionMatrix.tolist()
    init = hmm.initialProbabilities.tolist()
    emissions = [[d.mean.tolist(), d.variance.flatten().tolist()] for d in hmm.emissionDistributions]
    # print init
    # print trans
    # print emissions
    return ghmm.HMMFromMatrices(emissionDomain=domain,
                                distribution=ghmm.MultivariateGaussianDistribution(domain),
                                A=trans,
                                B=emissions,
                                pi=init)
コード例 #26
0
def _new_model(n_features, n_states, means, covars, topology):
    # Generate emissions
    emissions = []
    for i in range(n_states):
        emission = [means[i].tolist(), covars[i].ravel().tolist()]
        emissions.append(emission)

    # Create model
    domain = impl.Float()
    transitions = transition_matrix(n_states, topology).tolist()
    pi = start_probabilities(n_states, topology)
    distribution = impl.MultivariateGaussianDistribution(domain)
    model = impl.HMMFromMatrices(domain, distribution, transitions, emissions, pi)
    return model
コード例 #27
0
    def __create_hmm_from_dict(self, dictionary, qsr_type, num_symbols):
        """Creates a hmm from the xml representation. Not nice to use tempfile
        but not otherwise possible due to hidden code and swig in ghmm.

        :param xml: The xml string

        :return: the ghmm hmm object
        """
        symbols = self.hmm_types_available[qsr_type]().generate_alphabet(
            num_symbols)
        hmm = gh.HMMFromMatrices(symbols, gh.DiscreteDistribution(symbols),
                                 dictionary[self.TRANS], dictionary[self.EMI],
                                 dictionary[self.START])

        return hmm
コード例 #28
0
ファイル: wlncRNA.py プロジェクト: tomkp75/ngslib
 def trainHMM(hmmState):
     ''' Train HMM with the given chromosome. '''
     print >> sys.stderr, printTime(), "Train HMM with one chromosome."
     T = [[0.9, 0.1], [0.1, 0.9]]
     e1 = [0.1, 0.9]
     e0 = [0.9, 0.1]
     E = [e0, e1]
     pi = [0.9, 0.1]  # initial 10% are peak?
     sigma = ghmm.IntegerRange(0, 2)  # 0, 1
     m = ghmm.HMMFromMatrices(sigma, ghmm.DiscreteDistribution(sigma), T, E,
                              pi)
     m.baumWelch(ghmm.EmissionSequence(sigma, hmmState))
     print >> sys.stderr, printTime(), "Train HMM finished."
     print >> sys.stderr
     return m
コード例 #29
0
    def conditional_prob(self, x):
        '''
        Input
        @ x: dim x length
        Output
        @ A list of conditional probabilities P(x_t|x_s,lambda)

        Only single sample works
        '''
        from scipy.stats import norm, entropy

        # logp from all features
        X_test = util.convert_sequence2(x, emission=False)
        X_test = np.squeeze(X_test)
        final_ts_obj = ghmm.EmissionSequence(self.F, X_test.tolist())
        logp_all = self.ml.loglikelihood(final_ts_obj)

        # feature-wise conditional probability
        cond_prob = []
        for i in xrange(self.nEmissionDim):  # per feature

            B = copy.copy(self.B)
            for j in xrange(self.nState):
                B[j][0] = [b for idx, b in enumerate(B[j][0]) if idx != i]
                B_arr = copy.copy(B[j][1])
                B_arr = np.array(B_arr).reshape(
                    (self.nEmissionDim, self.nEmissionDim))
                B_arr = np.delete(B_arr, (i), axis=0)
                B_arr = np.delete(B_arr, (i), axis=1)
                B[j][1] = B_arr.flatten().tolist()
            ml_src = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \
                                          self.A, B, self.pi)

            # logp from remains
            X_test = util.convert_sequence2([ x[j] for j in xrange(len(x)) if j != i ], \
                                            emission=False)
            X_test = np.squeeze(X_test)
            final_ts_obj = ghmm.EmissionSequence(self.F, X_test.tolist())
            logp_src = ml_src.loglikelihood(final_ts_obj)

            cond_prob.append(logp_all - logp_src)

            if np.isnan(cond_prob[-1]) or np.isinf(cond_prob[-1]):
                print "NaN in conditional probabilities: ", np.shape(x)
                return None

        return np.array(cond_prob)
コード例 #30
0
    def predict_from_single_seq(self, x, ref_num):
        '''
        Input
        @ x: length #samples x known steps
        Output
        @ observation distribution: nDimension
        '''

        # new emission for partial sequence
        B = []
        for i in xrange(self.nState):
            B.append([
                self.B[i][0][ref_num],
                self.B[i][1][ref_num * self.nEmissionDim + ref_num]
            ])

        ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \
                                  self.A, B, self.pi)

        if type(x) is not list: x = x.tolist()
        final_ts_obj = ghmm.EmissionSequence(self.F, x)

        try:
            (alpha, scale) = ml.forward(final_ts_obj)
        except:
            print "No alpha is available !!"
            sys.exit()

        x_pred = []
        for i in xrange(self.nEmissionDim):
            if i == ref_num:
                x_pred.append(x[-1])
            else:
                src_cov_idx = ref_num * self.nEmissionDim + ref_num
                tgt_cov_idx = ref_num * self.nEmissionDim + i

                t_o = 0.0
                for j in xrange(self.nState):
                    m_j = self.B[j][0][i] + \
                      self.B[j][1][tgt_cov_idx]/self.B[j][1][src_cov_idx]*\
                      (x[-1]-self.B[j][0][ref_num])
                    t_o += alpha[-1][j] * m_j
                x_pred.append(t_o)

        return x_pred