def learn_likelihoods_progress(i, n, m, A, B, pi, F, X_train, nEmissionDim, g_mu, g_sig, nState):
    if nEmissionDim >= 2:
        ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi)
    else:
        ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi)

    l_likelihood_mean = 0.0
    l_likelihood_mean2 = 0.0
    l_statePosterior = np.zeros(nState)

    for j in xrange(n):    

        g_post = np.zeros(nState)
        g_lhood = 0.0
        g_lhood2 = 0.0
        prop_sum = 0.0

        for k in xrange(1, m):
            final_ts_obj = ghmm.EmissionSequence(F, X_train[j][:k*nEmissionDim])
            logp = ml.loglikelihoods(final_ts_obj)[0]
            # print 'Log likelihood:', logp
            post = np.array(ml.posterior(final_ts_obj))

            k_prop = norm(loc=g_mu, scale=g_sig).pdf(k)
            g_post += post[k-1] * k_prop
            g_lhood += logp * k_prop
            g_lhood2 += logp * logp * k_prop

            prop_sum  += k_prop

        l_statePosterior += g_post / prop_sum / float(n)
        l_likelihood_mean += g_lhood / prop_sum / float(n)
        l_likelihood_mean2 += g_lhood2 / prop_sum / float(n)

    return i, l_statePosterior, l_likelihood_mean, np.sqrt(l_likelihood_mean2 - l_likelihood_mean**2)
 def markov_model(self):
     mm = ghmm.HMMFromMatrices(
         self.F, ghmm.MultivariateGaussianDistribution(self.F),
         self.transition_probabilities, self.observation_probabilities,
         self.initial_probabilities)
     #print ".>"+str(mm.asMatrices())
     return mm
    def set_hmm_object(self,
                       A,
                       B,
                       pi,
                       out_a_num=None,
                       vec_num=None,
                       mat_num=None,
                       u_denom=None):
        """Set HMM's hyper parameters
        """
        if self.nEmissionDim == 1:
            self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \
                                           A, B, pi)
        else:
            self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \
                                           A, B, pi)
        self.A = A
        self.B = B
        self.pi = pi

        try:
            self.ml.setBaumWelchParams(out_a_num, vec_num, mat_num, u_denom)
        except:
            print "Install Daehyung's custom ghmm if you want partial fit functionalities."

        return self.ml
예제 #4
0
def get_hidden_markov_model(mixture_model, guess_t_matrix):
    """Get an (unoptomized) hidden markov model from the mixture model and
    a guess at the transition matrix.

    The guess transition matrix is typically created by summing over the
    outer product of time-pairs of membership vectors.
    """

    # Emission  probabilities for HMM, using their very silly
    # matrix arrangement
    emissions = [[mixture_model.means_[j], mixture_model.covars_[j].flatten()]
                 for j in xrange(mixture_model.n_components)]

    # Initial transition matrix
    if isinstance(guess_t_matrix, scipy.sparse.csr.csr_matrix):
        guess_t_matrix = guess_t_matrix.todense()
        guess_t_matrix = guess_t_matrix.tolist()

    # Initial occupancy
    # Todo: figure out if initial occupancy matters
    initial_occupancy = ([1.0 / mixture_model.n_components] *
                         mixture_model.n_components)

    # Set up distribution
    g_float = ghmm.Float()
    g_distribution = ghmm.MultivariateGaussianDistribution(g_float)

    # Put it all together
    model = ghmm.HMMFromMatrices(g_float, g_distribution, guess_t_matrix,
                                 emissions, initial_occupancy)
    return model
    def create_model(self, flag, number_states):
          
        A, B, pi = self.calculate_A_B_pi(number_states, flag)

        # generate models from parameters
        #model = ghmm.HMMFromMatrices(self.F,ghmm.GaussianDistribution(self.F), A, B, pi)
        model = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), A, B, pi)
        model.normalize()
        return model
예제 #6
0
    def test_ghmm(self):
        # this is being extended to also support mixtures of multivariate gaussians
        # Interpretation of B matrix for the multivariate gaussian case
        # (Example with three states and two mixture components with two dimensions):
        #  B = [
        #       [["mu111","mu112"],["sig1111","sig1112","sig1121","sig1122"],
        #        ["mu121","mu122"],["sig1211","sig1212","sig1221","sig1222"],
        #        ["w11","w12"] ],
        #       [["mu211","mu212"],["sig2111","sig2112","sig2121","sig2122"],
        #        ["mu221","mu222"],["sig2211","sig2212","sig2221","sig2222"],
        #        ["w21","w22"] ],
        #       [["mu311","mu312"],["sig3111","sig3112","sig3121","sig3122"],
        #        ["mu321","mu322"],["sig3211","sig3212","sig3221","sig3222"],
        #        ["w31","w32"] ],
        #      ]
        #
        # ["mu311","mu312"] is the mean vector of the two dimensional
        # gaussian in state 3, mixture component 1
        # ["sig1211","sig1212","sig1221","sig1222"] is the covariance
        # matrix of the two dimensional gaussian in state 1, mixture component 2
        # ["w21","w22"] are the weights of the mixture components
        # in state 2
        # For states with only one mixture component, a implicit weight
        # of 1.0 is assumed

        import ghmm
        F = ghmm.Float()

        Abig = [[0.0, 1.0], [1.0, 0.0]]
        Bbig = [[[1.0, 1.0, 1.0],
                 [0.9, 0.4, 0.2, 0.4, 2.2, 0.5, 0.2, 0.5, 1.0]],
                [[10.0, 10.0, 10.0],
                 [1.0, 0.2, 0.8, 0.2, 2.0, 0.6, 0.8, 0.6, 0.9]]]
        piBig = [0.5, 0.5]
        modelBig = ghmm.HMMFromMatrices(
            F, ghmm.MultivariateGaussianDistribution(F), Abig, Bbig, piBig)
        modelBig.sample(10, 100, seed=3586662)

        e = modelBig.sampleSingle(1)
        print[x for x in e]

        # get log P(seq | model)
        logp = model.loglikelihood(seq)
        print logp

        # cacluate viterbi path
        path = model.viterbi(seq)
        print path

        # train model parameters
        model.baumWelch(seq_set, 500, 0.0001)
예제 #7
0
def _new_model(n_features, n_states, means, covars, topology):
    # Generate emissions
    emissions = []
    for i in range(n_states):
        emission = [means[i].tolist(), covars[i].ravel().tolist()]
        emissions.append(emission)

    # Create model
    domain = impl.Float()
    transitions = transition_matrix(n_states, topology).tolist()
    pi = start_probabilities(n_states, topology)
    distribution = impl.MultivariateGaussianDistribution(domain)
    model = impl.HMMFromMatrices(domain, distribution, transitions, emissions, pi)
    return model
    def reset(self):
        """Reset the HMM object
        """
        [A, B, pi] = self.ml.asMatrices()

        if self.nEmissionDim == 1:
            self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \
                                           A, B, pi)
        else:
            self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \
                                           A, B, pi)
        self.A = A
        self.B = B
        self.pi = pi
예제 #9
0
def ghmm_from_multivariate_continuous_hmm(hmm):
    hmm = deepcopy(hmm)
    domain = ghmm.Float()
    trans = hmm.transitionMatrix.tolist()
    init = hmm.initialProbabilities.tolist()
    emissions = [[d.mean.tolist(), d.variance.flatten().tolist()] for d in hmm.emissionDistributions]
    # print init
    # print trans
    # print emissions
    return ghmm.HMMFromMatrices(emissionDomain=domain,
                                distribution=ghmm.MultivariateGaussianDistribution(domain),
                                A=trans,
                                B=emissions,
                                pi=init)
def computeLikelihood(idx, A, B, pi, F, X, nEmissionDim, nState, startIdx=1, \
                      bPosterior=False, converted_X=False, cov_type='full'):
    '''
    This function will be deprecated. Please, use computeLikelihoods.
    '''

    if nEmissionDim >= 2:
        ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F),
                                  A, B, pi)
        if cov_type == 'diag' or cov_type.find('diag') >= 0:
            ml.setDiagonalCovariance(1)
    else:
        ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi)

    if converted_X is False:
        X_test = util.convert_sequence(X, emission=False)
        X_test = np.squeeze(X_test)
        X_test = X_test.tolist()
    else:
        X_test = X

    l_idx = []
    l_likelihood = []
    l_posterior = []

    for i in xrange(startIdx, len(X_test) / nEmissionDim):
        final_ts_obj = ghmm.EmissionSequence(F, X_test[:i * nEmissionDim])

        try:
            logp = ml.loglikelihood(final_ts_obj)
            if bPosterior: post = np.array(ml.posterior(final_ts_obj))
        except:
            print "Unexpected profile!! GHMM cannot handle too low probability. Underflow?"

            l_idx.append(i)
            l_likelihood.append(-100000000)
            if bPosterior:
                if len(l_posterior) == 0: l_posterior.append(list(pi))
                else: l_posterior.append(l_posterior[-1])
            ## return False, False # anomaly
            continue

        l_idx.append(i)
        l_likelihood.append(logp)
        if bPosterior: l_posterior.append(post[i - 1])

    if bPosterior:
        return idx, l_idx, l_likelihood, l_posterior
    else:
        return idx, l_idx, l_likelihood
    def conditional_prob(self, x):
        '''
        Input
        @ x: dim x length
        Output
        @ A list of conditional probabilities P(x_t|x_s,lambda)

        Only single sample works
        '''
        from scipy.stats import norm, entropy

        # logp from all features
        X_test = util.convert_sequence2(x, emission=False)
        X_test = np.squeeze(X_test)
        final_ts_obj = ghmm.EmissionSequence(self.F, X_test.tolist())
        logp_all = self.ml.loglikelihood(final_ts_obj)

        # feature-wise conditional probability
        cond_prob = []
        for i in xrange(self.nEmissionDim):  # per feature

            B = copy.copy(self.B)
            for j in xrange(self.nState):
                B[j][0] = [b for idx, b in enumerate(B[j][0]) if idx != i]
                B_arr = copy.copy(B[j][1])
                B_arr = np.array(B_arr).reshape(
                    (self.nEmissionDim, self.nEmissionDim))
                B_arr = np.delete(B_arr, (i), axis=0)
                B_arr = np.delete(B_arr, (i), axis=1)
                B[j][1] = B_arr.flatten().tolist()
            ml_src = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \
                                          self.A, B, self.pi)

            # logp from remains
            X_test = util.convert_sequence2([ x[j] for j in xrange(len(x)) if j != i ], \
                                            emission=False)
            X_test = np.squeeze(X_test)
            final_ts_obj = ghmm.EmissionSequence(self.F, X_test.tolist())
            logp_src = ml_src.loglikelihood(final_ts_obj)

            cond_prob.append(logp_all - logp_src)

            if np.isnan(cond_prob[-1]) or np.isinf(cond_prob[-1]):
                print "NaN in conditional probabilities: ", np.shape(x)
                return None

        return np.array(cond_prob)
def computeLikelihoods(idx, A, B, pi, F, X, nEmissionDim, nState, startIdx=2, \
                       bPosterior=False, converted_X=False, cov_type='full'):
    '''
    Input:
    - X: dimension x length
    '''

    if nEmissionDim >= 2:
        ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F),
                                  A, B, pi)
        if cov_type == 'diag': ml.setDiagonalCovariance(1)
    else:
        ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi)

    X_test = util.convert_sequence(X, emission=False)
    X_test = np.squeeze(X_test)

    l_idx = []
    l_likelihood = []
    l_posterior = []

    for i in xrange(startIdx, len(X[0])):
        final_ts_obj = ghmm.EmissionSequence(
            F, X_test[:i * nEmissionDim].tolist())

        try:
            logp = ml.loglikelihood(final_ts_obj)
            if bPosterior: post = np.array(ml.posterior(final_ts_obj))
            l_likelihood.append(logp)
            if bPosterior: l_posterior.append(post[i - 1])
        except:
            print "Unexpected profile!! GHMM cannot handle too low probability. Underflow?"
            ## return False, False # anomaly
            ## continue
            # we keep the state as the previous one
            l_likelihood.append(-1000000000000)
            if bPosterior:
                if len(l_posterior) == 0: l_posterior.append(list(pi))
                else: l_posterior.append(l_posterior[-1])

        l_idx.append(i)

    if bPosterior: return idx, l_idx, l_likelihood, l_posterior
    else: return idx, l_idx, l_likelihood
예제 #13
0
def computeLikelihood(F, k, data, g_mu, g_sig, nEmissionDim, A, B, pi):
    if nEmissionDim >= 2:
        hmm_ml = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B, pi)
    else:
        hmm_ml = ghmm.HMMFromMatrices(F, ghmm.GaussianDistribution(F), A, B, pi)

    final_ts_obj = ghmm.EmissionSequence(F, data)
    logp = hmm_ml.loglikelihoods(final_ts_obj)[0]
    post = np.array(hmm_ml.posterior(final_ts_obj))

    k_prop = norm(loc=g_mu, scale=g_sig).pdf(k)
    g_post = post[k-1] * k_prop
    g_lhood = logp * k_prop
    g_lhood2 = logp * logp * k_prop
    prop_sum = k_prop

    # print np.shape(g_post), np.shape(g_lhood), np.shape(g_lhood2), np.shape(prop_sum)

    return g_post, g_lhood, g_lhood2, prop_sum
예제 #14
0
    def fit(self, xData, A=None, B=None, pi=None, cov_mult=None,
            ml_pkl=None, use_pkl=False):

        if ml_pkl is None:
            ml_pkl = os.path.join(os.path.dirname(__file__), 'ml_temp_n.pkl')            
        
        if cov_mult is None:
            cov_mult = [1.0]*(self.nEmissionDim**2)

        # Daehyung: What is the shape and type of input data?
        X = [np.array(data) for data in xData]

        if A is None:
            if self.verbose: print "Generating a new A matrix"
            # Transition probability matrix (Initial transition probability, TODO?)
            A = self.init_trans_mat(self.nState).tolist()

        if B is None:
            if self.verbose: print "Generating a new B matrix"
            # We should think about multivariate Gaussian pdf.  

            mus, cov = self.vectors_to_mean_cov(X, self.nState)

            for i in xrange(self.nEmissionDim):
                for j in xrange(self.nEmissionDim):
                    cov[:, j, i] *= cov_mult[self.nEmissionDim*i + j]

            if self.verbose:
                for i, mu in enumerate(mus):
                    print 'mu%i' % i, mu
                print 'cov', cov
                
            # Emission probability matrix
            B = [0] * self.nState
            for i in range(self.nState):
                B[i] = [[mu[i] for mu in mus]]
                B[i].append(cov[i].flatten())
        if pi is None:
            # pi - initial probabilities per state 
            ## pi = [1.0/float(self.nState)] * self.nState
            pi = [0.0] * self.nState
            pi[0] = 1.0

        # print 'Generating HMM'
        # HMM model object
        self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), A, B, pi)
        # print 'Creating Training Data'
        X_train = self.convert_sequence(X) # Training input
        X_train = X_train.tolist()
        
        if self.verbose: print 'Run Baum Welch method with (samples, length)', np.shape(X_train)
        final_seq = ghmm.SequenceSet(self.F, X_train)
        ## ret = self.ml.baumWelch(final_seq, loglikelihoodCutoff=2.0)
        ret = self.ml.baumWelch(final_seq, 10000)
        print 'Baum Welch return:', ret
        if np.isnan(ret): return 'Failure'

        [self.A, self.B, self.pi] = self.ml.asMatrices()
        self.A = np.array(self.A)
        self.B = np.array(self.B)

        #--------------- learning for anomaly detection ----------------------------
        [A, B, pi] = self.ml.asMatrices()
        n, m = np.shape(X[0])
        self.nGaussian = self.nState

        if self.check_method == 'change' or self.check_method == 'globalChange':
            # Get maximum change of loglikelihood over whole time
            ll_delta_logp = []
            for j in xrange(n):    
                l_logp = []                
                for k in xrange(1, m):
                    final_ts_obj = ghmm.EmissionSequence(self.F, X_train[j][:k*self.nEmissionDim])
                    logp         = self.ml.loglikelihoods(final_ts_obj)[0]

                    l_logp.append(logp)
                l_delta_logp = np.array(l_logp[1:]) - np.array(l_logp[:-1])                    
                ll_delta_logp.append(l_delta_logp)

            self.l_mean_delta = np.mean(abs(np.array(ll_delta_logp).flatten()))
            self.l_std_delta = np.std(abs(np.array(ll_delta_logp).flatten()))

            if self.verbose: 
                print "mean_delta: ", self.l_mean_delta, " std_delta: ", self.l_std_delta
        
        
        if self.check_method == 'global' or self.check_method == 'globalChange':
            # Get average loglikelihood threshold over whole time

            l_logp = []
            for j in xrange(n):
                for k in xrange(1, m):
                    final_ts_obj = ghmm.EmissionSequence(self.F, X_train[j][:k*self.nEmissionDim])
                    logp         = self.ml.loglikelihoods(final_ts_obj)[0]

                    l_logp.append(logp)

            self.l_mu = np.mean(l_logp)
            self.l_std = np.std(l_logp)
            
        elif self.check_method == 'progress':
            # Get average loglikelihood threshold wrt progress

            if os.path.isfile(ml_pkl) and use_pkl:
                if self.verbose: print 'Load detector parameters'
                d = ut.load_pickle(ml_pkl)
                self.l_statePosterior = d['state_post'] # time x state division
                self.ll_mu            = d['ll_mu']
                self.ll_std           = d['ll_std']
            else:
                if self.cluster_type == 'time':                
                    if self.verbose: print 'Begining parallel job'
                    self.std_coff  = 1.0
                    g_mu_list = np.linspace(0, m-1, self.nGaussian) #, dtype=np.dtype(np.int16))
                    g_sig = float(m) / float(self.nGaussian) * self.std_coff
                    r = Parallel(n_jobs=-1)(delayed(learn_likelihoods_progress)(i, n, m, A, B, pi, self.F, X_train,
                                                                           self.nEmissionDim, g_mu_list[i], g_sig, self.nState)
                                                                           for i in xrange(self.nGaussian))
                    if self.verbose: print 'Completed parallel job'
                    l_i, self.l_statePosterior, self.ll_mu, self.ll_std = zip(*r)

                elif self.cluster_type == 'state':
                    self.km = None                    
                    self.ll_mu = None
                    self.ll_std = None
                    self.ll_mu, self.ll_std = self.state_clustering(X)
                    path_mat  = np.zeros((self.nState, m*n))
                    likelihood_mat = np.zeros((1, m*n))
                    self.l_statePosterior=None
                    
                d = dict()
                d['state_post'] = self.l_statePosterior
                d['ll_mu'] = self.ll_mu
                d['ll_std'] = self.ll_std
                ut.save_pickle(d, ml_pkl)
예제 #15
0
    def fit(self,
            xData1,
            xData2,
            xData3,
            A=None,
            B=None,
            pi=None,
            cov_mult=[100.0] * 9,
            verbose=False,
            ml_pkl='ml_temp.pkl',
            use_pkl=False):
        ml_pkl = os.path.join(os.path.dirname(__file__), ml_pkl)
        X1 = np.array(xData1)
        X2 = np.array(xData2)
        X3 = np.array(xData3)

        if A is None:
            if verbose: print "Generating a new A matrix"
            # Transition probability matrix (Initial transition probability, TODO?)
            A = self.init_trans_mat(self.nState).tolist()

            # print 'A', A

        if B is None:
            if verbose: print "Generating a new B matrix"
            # We should think about multivariate Gaussian pdf.

            mu1, mu2, mu3, cov = self.vectors_to_mean_cov(
                X1, X2, X3, self.nState)
            cov[:, 0, 0] *= cov_mult[0]  #1.5 # to avoid No convergence warning
            cov[:, 1, 0] *= cov_mult[1]  #5.5 # to avoid No convergence warning
            cov[:, 2, 0] *= cov_mult[2]
            cov[:, 0, 1] *= cov_mult[3]
            cov[:, 1, 1] *= cov_mult[4]
            cov[:, 2, 1] *= cov_mult[5]
            cov[:, 0, 2] *= cov_mult[6]
            cov[:, 1, 2] *= cov_mult[7]
            cov[:, 2, 2] *= cov_mult[8]

            print 'mu1:', mu1
            print 'mu2:', mu2
            print 'mu3:', mu3
            print 'cov', cov

            # Emission probability matrix
            B = [0.0] * self.nState
            for i in range(self.nState):
                B[i] = [[mu1[i], mu2[i], mu3[i]],
                        [
                            cov[i, 0, 0], cov[i, 0, 1], cov[i, 0, 2],
                            cov[i, 1, 0], cov[i, 1, 1], cov[i, 1, 2],
                            cov[i, 2, 0], cov[i, 2, 1], cov[i, 2, 2]
                        ]]
        if pi is None:
            # pi - initial probabilities per state
            ## pi = [1.0/float(self.nState)] * self.nState
            pi = [0.0] * self.nState
            pi[0] = 1.0

        # HMM model object
        self.ml = ghmm.HMMFromMatrices(
            self.F, ghmm.MultivariateGaussianDistribution(self.F), A, B, pi)
        X_train = self.convert_sequence(X1, X2, X3)  # Training input
        X_train = X_train.tolist()

        print 'Run Baum Welch method with (samples, length)', np.shape(X_train)
        final_seq = ghmm.SequenceSet(self.F, X_train)
        ## ret = self.ml.baumWelch(final_seq, loglikelihoodCutoff=2.0)
        ret = self.ml.baumWelch(final_seq, 10000)
        print 'Baum Welch return:', ret

        [self.A, self.B, self.pi] = self.ml.asMatrices()
        self.A = np.array(self.A)
        self.B = np.array(self.B)
        # print 'B\'s shape:', self.B.shape, self.B[0].shape, self.B[1].shape
        # print B[0]
        # print B[1]

        #--------------- learning for anomaly detection ----------------------------
        [A, B, pi] = self.ml.asMatrices()
        n, m = np.shape(X1)
        self.nGaussian = self.nState

        # Get average loglikelihood threshold wrt progress
        self.std_coff = 1.0
        g_mu_list = np.linspace(0, m - 1,
                                self.nGaussian)  #, dtype=np.dtype(np.int16))
        g_sig = float(m) / float(self.nGaussian) * self.std_coff

        print 'g_mu_list:', g_mu_list
        print 'g_sig:', g_sig

        ######################################################################################
        if os.path.isfile(ml_pkl) and use_pkl:
            with open(ml_pkl, 'rb') as f:
                d = pickle.load(f)
                self.l_statePosterior = d[
                    'state_post']  # time x state division
                self.ll_mu = d['ll_mu']
                self.ll_std = d['ll_std']
        else:
            n_jobs = -1
            print 'Begining parallel job'
            r = Parallel(n_jobs=n_jobs)(delayed(learn_likelihoods_progress)(
                i, n, m, A, B, pi, self.F, X_train, self.nEmissionDim,
                g_mu_list[i], g_sig, self.nState)
                                        for i in xrange(self.nGaussian))
            print 'Completed parallel job'
            l_i, self.l_statePosterior, self.ll_mu, self.ll_std = zip(*r)

            d = dict()
            d['state_post'] = self.l_statePosterior
            d['ll_mu'] = self.ll_mu
            d['ll_std'] = self.ll_std
            with open(ml_pkl, 'wb') as f:
                pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)
    def fit(self,
            xData1,
            xData2=None,
            A=None,
            B=None,
            pi=None,
            cov_mult=(1.0, 1.0, 1.0, 1.0),
            verbose=False,
            ml_pkl='ml_temp.pkl',
            use_pkl=False):
        ml_pkl = os.path.join(os.path.dirname(__file__), ml_pkl)
        X1 = np.array(xData1)
        X2 = np.array(xData2)

        if A is None:
            if verbose: print "Generating a new A matrix"
            # Transition probability matrix (Initial transition probability, TODO?)
            A = self.init_trans_mat(self.nState).tolist()

        if B is None:
            if verbose: print "Generating a new B matrix"
            # We should think about multivariate Gaussian pdf.

            mu1, mu2, cov = self.vectors_to_mean_cov(X1, X2, self.nState)
            cov[:, 0, 0] *= cov_mult[0]  #1.5 # to avoid No convergence warning
            cov[:, 1, 0] *= cov_mult[1]  #5.5 # to avoid No convergence warning
            cov[:, 0, 1] *= cov_mult[2]  #5.5 # to avoid No convergence warning
            cov[:, 1, 1] *= cov_mult[3]  #5.5 # to avoid No convergence warning

            # Emission probability matrix
            B = [0.0] * self.nState
            for i in range(self.nState):
                B[i] = [[mu1[i], mu2[i]],
                        [
                            cov[i, 0, 0], cov[i, 0, 1], cov[i, 1, 0], cov[i, 1,
                                                                          1]
                        ]]

        if pi is None:
            # pi - initial probabilities per state
            ## pi = [1.0/float(self.nState)] * self.nState
            pi = [0.0] * self.nState
            pi[0] = 1.0

        # HMM model object
        self.ml = ghmm.HMMFromMatrices(
            self.F, ghmm.MultivariateGaussianDistribution(self.F), A, B, pi)
        X_train = self.convert_sequence(X1, X2)  # Training input
        X_train = X_train.tolist()

        print 'Run Baum Welch method with (samples, length)', np.shape(X_train)
        final_seq = ghmm.SequenceSet(self.F, X_train)
        ## ret = self.ml.baumWelch(final_seq, loglikelihoodCutoff=2.0)
        ret = self.ml.baumWelch(final_seq, 10000)
        print 'Baum Welch return:', ret

        [self.A, self.B, self.pi] = self.ml.asMatrices()
        self.A = np.array(self.A)
        self.B = np.array(self.B)

        #--------------- learning for anomaly detection ----------------------------
        [A, B, pi] = self.ml.asMatrices()
        n, m = np.shape(X1)
        self.nGaussian = self.nState

        if self.check_method == 'change' or self.check_method == 'globalChange':
            # Get maximum change of loglikelihood over whole time
            ll_delta_logp = []
            for j in xrange(n):
                l_logp = []
                for k in xrange(1, m):
                    final_ts_obj = ghmm.EmissionSequence(
                        self.F, X_train[j][:k * self.nEmissionDim])
                    logp = self.ml.loglikelihoods(final_ts_obj)[0]

                    l_logp.append(logp)
                l_delta_logp = np.array(l_logp[1:]) - np.array(l_logp[:-1])
                ll_delta_logp.append(l_delta_logp)

            self.l_mean_delta = np.mean(abs(np.array(ll_delta_logp).flatten()))
            self.l_std_delta = np.std(abs(np.array(ll_delta_logp).flatten()))

            print "mean_delta: ", self.l_mean_delta, " std_delta: ", self.l_std_delta

        if self.check_method == 'global' or self.check_method == 'globalChange':
            # Get average loglikelihood threshold over whole time

            l_logp = []
            for j in xrange(n):
                for k in xrange(1, m):
                    final_ts_obj = ghmm.EmissionSequence(
                        self.F, X_train[j][:k * self.nEmissionDim])
                    logp = self.ml.loglikelihoods(final_ts_obj)[0]

                    l_logp.append(logp)

            self.l_mu = np.mean(l_logp)
            self.l_std = np.std(l_logp)

        elif self.check_method == 'progress':
            # Get average loglikelihood threshold wrt progress
            self.std_coff = 1.0
            g_mu_list = np.linspace(
                0, m - 1, self.nGaussian)  #, dtype=np.dtype(np.int16))
            g_sig = float(m) / float(self.nGaussian) * self.std_coff

            ######################################################################################
            if os.path.isfile(ml_pkl) and use_pkl:
                with open(ml_pkl, 'rb') as f:
                    d = pickle.load(f)
                    self.l_statePosterior = d[
                        'state_post']  # time x state division
                    self.ll_mu = d['ll_mu']
                    self.ll_std = d['ll_std']
            else:
                n_jobs = -1
                r = Parallel(n_jobs=n_jobs)(
                    delayed(learn_likelihoods_progress)(
                        i, n, m, A, B, pi, self.F, X_train, self.nEmissionDim,
                        g_mu_list[i], g_sig, self.nState)
                    for i in xrange(self.nGaussian))
                l_i, self.l_statePosterior, self.ll_mu, self.ll_std = zip(*r)

                d = dict()
                d['state_post'] = self.l_statePosterior
                d['ll_mu'] = self.ll_mu
                d['ll_std'] = self.ll_std
                with open(ml_pkl, 'wb') as f:
                    pickle.dump(d, f, protocol=pickle.HIGHEST_PROTOCOL)
    def fit(self, xData, A=None, B=None, pi=None, cov_mult=None,
            ml_pkl=None, use_pkl=False, cov_type='full', fixed_trans=0,\
            shuffle=False):
        '''
        Input :
        - xData: dimension x sample x length
        Issues:
        - If NaN is returned, the reason can be one of followings,
        -- lower cov
        -- small range of xData (you have to scale it up.)
        '''

        # Daehyung: What is the shape and type of input data?
        if shuffle:
            X = xData
            X = np.swapaxes(X, 0, 1)
            id_list = range(len(X))
            random.shuffle(id_list)
            X = np.array(X)[id_list]
            X = np.swapaxes(X, 0, 1)
        else:
            X = [np.array(data) for data in xData]
        nData = len(xData[0])

        param_dict = {}

        # Load pre-trained HMM without training
        if use_pkl and ml_pkl is not None and os.path.isfile(ml_pkl):
            if self.verbose: print "Load HMM parameters without train the hmm"

            param_dict = ut.load_pickle(ml_pkl)
            self.A = param_dict['A']
            self.B = param_dict['B']
            self.pi = param_dict['pi']
            if self.nEmissionDim == 1:
                self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \
                                               self.A, self.B, self.pi)
            else:
                self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \
                                               self.A, self.B, self.pi)

            out_a_num = param_dict.get('out_a_num', None)
            vec_num = param_dict.get('vec_num', None)
            mat_num = param_dict.get('mat_num', None)
            u_denom = param_dict.get('u_denom', None)
            if out_a_num is not None:
                self.ml.setBaumWelchParams(out_a_num, vec_num, mat_num,
                                           u_denom)

            return True
        else:

            if ml_pkl is None:
                ml_pkl = os.path.join(os.path.dirname(__file__),
                                      'ml_temp_n.pkl')

            if cov_mult is None:
                cov_mult = [1.0] * (self.nEmissionDim**2)

            if A is None:
                if self.verbose: print "Generating a new A matrix"
                # Transition probability matrix (Initial transition probability, TODO?)
                A = util.init_trans_mat(self.nState).tolist()

            if B is None:
                if self.verbose: print "Generating a new B matrix"
                # We should think about multivariate Gaussian pdf.

                mus, cov = util.vectors_to_mean_cov(X,
                                                    self.nState,
                                                    self.nEmissionDim,
                                                    cov_type=cov_type)
                ## print np.shape(mus), np.shape(cov)

                # cov: state x dim x dim
                for i in xrange(self.nEmissionDim):
                    for j in xrange(self.nEmissionDim):
                        cov[:, i, j] *= cov_mult[self.nEmissionDim * i + j]

                if self.verbose:
                    for i, mu in enumerate(mus):
                        print 'mu%i' % i, mu
                    ## print 'cov', cov

                # Emission probability matrix
                B = [0] * self.nState
                for i in range(self.nState):
                    if self.nEmissionDim > 1:
                        B[i] = [[mu[i] for mu in mus]]
                        B[i].append(cov[i].flatten().tolist())
                    else:
                        B[i] = [np.squeeze(mus[0][i]), float(cov[i])]
            if pi is None:
                # pi - initial probabilities per state
                ## pi = [1.0/float(self.nState)] * self.nState
                pi = [0.0] * self.nState
                pi[0] = 1.0

            # print 'Generating HMM'
            # HMM model object
            if self.nEmissionDim == 1:
                self.ml = ghmm.HMMFromMatrices(self.F, ghmm.GaussianDistribution(self.F), \
                                               A, B, pi)
            else:
                self.ml = ghmm.HMMFromMatrices(self.F, ghmm.MultivariateGaussianDistribution(self.F), \
                                               A, B, pi)
            if cov_type == 'diag': self.ml.setDiagonalCovariance(1)

            # print 'Creating Training Data'
            X_train = util.convert_sequence(X)  # Training input
            X_train = X_train.tolist()
            if self.verbose: print "training data size: ", np.shape(X_train)

            if self.verbose:
                print 'Run Baum Welch method with (samples, length)', np.shape(
                    X_train)
            final_seq = ghmm.SequenceSet(self.F, X_train)
            ## ret = self.ml.baumWelch(final_seq, loglikelihoodCutoff=2.0)
            ret = self.ml.baumWelch(final_seq,
                                    10000)  #, fixedTrans=fixed_trans)
            if np.isnan(ret):
                print 'Baum Welch return:', ret
                return 'Failure'
            print 'Baum Welch return:', ret / float(nData)

            [self.A, self.B, self.pi] = self.ml.asMatrices()
            self.A = np.array(self.A)
            self.B = np.array(self.B)

            param_dict['A'] = self.A
            param_dict['B'] = self.B
            param_dict['pi'] = self.pi

            try:
                [out_a_num, vec_num, mat_num,
                 u_denom] = self.ml.getBaumWelchParams()
                param_dict['out_a_num'] = out_a_num
                param_dict['vec_num'] = vec_num
                param_dict['mat_num'] = mat_num
                param_dict['u_denom'] = u_denom
            except:
                print "Install new ghmm!!"

            if ml_pkl is not None: ut.save_pickle(param_dict, ml_pkl)
            return ret / float(nData)
예제 #18
0
파일: hmm.py 프로젝트: 2050utopia/hwr
 def _get_hmm(self, hmm):
     return ghmm.HMMFromMatrices(
         DOMAIN, ghmm.MultivariateGaussianDistribution(DOMAIN), hmm.A,
         hmm.B, hmm.pi)
예제 #19
0
#           B = [
#        [["mu111","mu112"],["sig1111","sig1112","sig1121","sig1122"],
#         ["mu121","mu122"],["sig1211","sig1212","sig1221","sig1222"],
#         ["w11","w12"] ],
#        [["mu211","mu212"],["sig2111","sig2112","sig2121","sig2122"],
#         ["mu221","mu222"],["sig2211","sig2212","sig2221","sig2222"],
#         ["w21","w22"] ],
#        [["mu311","mu312"],["sig3111","sig3112","sig3121","sig3122"],
#         ["mu321","mu322"],["sig3211","sig3212","sig3221","sig3222"],
#         ["w31","w32"] ],
#       ]
B = [[[5.0], [2.0], [6.0], [1.0], [0.3, 0.7]],
     [[2.0], [1.0], [1.5], [0.4], [0.4, 0.7]]]  # parameters of mixture models

pi = [0.1, 0.1]  # initial probabilities per state
model = ghmm.HMMFromMatrices(F, ghmm.MultivariateGaussianDistribution(F), A, B,
                             pi)
# modify model parameters (examples)
p = model.getInitial(0)
model.setInitial(0, 0.5)
# re-set transition from state 0 to state 1
trans = model.getTransition(0, 1)
model.setTransition(0, 1, 0.6)
# re-setting emission of state 0 component 1
model.setEmission(0, 1, [[4.0], [2.0]])

model.normalize()  # re-normalize model parameters
print model
#    for 2 dimensional data, the data structure is like this [x11, x12, x21, x22, x31, x32...]
seq = EmissionSequence(F, [5.5, 0.1])
#  sample single sequence of length 50