예제 #1
0
def fit_batch(traj_data, n_components=2, subsample_factor=1,
              features=['speed', 'rotation'], **kwargs):
    '''
    Fits model to concatenated traj_data
    Args:
        traj_data - list of paths of training dataset (trajectory csv)
        n_components - number of hidden states
        subsample_factor - subsample factor to apply to all files
        features - columns to fit model to
        **kwargs passed to GaussianHMM
    Returns:
        model - fitted model
    '''
    # Concatenate data
    feature_list = []
    lengths_list = []
    for path in traj_data:
        X, l = features_from_csv(path, features=features,
                                 subsample_factor=subsample_factor)
        feature_list.append(X)
        lengths_list.append(l)
    print 'Concatenating features...'
    X = np.vstack(feature_list)
    l = np.hstack(lengths_list)

    # Fit HMM
    print 'Fitting model...'
    model = GaussianHMM(n_components, **kwargs)
    model.fit(X, lengths=l)

    return model
예제 #2
0
 def addModel(self, nom, data, nbEtats, n_iter, startprob_prior=None, transmat_prior=None):
     '''
     ajoute un model à tabModels
     
     paramètres :
     nom = nom du modèle
     data = tableau à trois dimension représentant un cluster possèdant des mouvements possèdant lui même des positions        
     nbEtats = nombre d'états cachés pour chaque modèle
     n_iter = nombre d'itérations pour l'algorithme de Baum-Welch
     startprob_prior = la matrice initiale à priori
     transmat_prior = la matrice de transition à priori des états
     '''
     model = GaussianHMM(nbEtats, covariance_type="diag", n_iter=n_iter, startprob_prior=startprob_prior, transmat_prior=transmat_prior)      
     model.fit(data)
     verif_set_transMat(model)
     taille = len(self.tabModels)
     if(taille == 0):
         self.tabModels.append([nom])
         self.tabModels[0].append(model)
         return
     for i in range(taille):        
         if(self.tabModels[i][0] == nom):
             self.tabModels[i].append(model)
             return
     self.tabModels.append([nom])
     self.tabModels[-1].append(model)
예제 #3
0
	def fit(self):

		if self.verbose:
			print "[Clustering] Clearing old model and segmentation"
		
		self.segmentation = []
		self.model = []


		new_segments = []
		new_model = []

		g = GaussianHMM(n_components=self.n_components)

		all_demos = self._demonstrations[0]
		lens = [np.shape(self._demonstrations[0])[0]]
		for i in range(1, len(self._demonstrations)):
			all_demos = np.concatenate([all_demos,self._demonstrations[i]])
			lens.append(np.shape(self._demonstrations[i])[0])

		g.fit(all_demos,lens) 
			
		for d in self._demonstrations:
			new_segments.append(self.findTransitions(g.predict(d)))
			#print g.predict(d)
			new_model.append(g)

		self.segmentation = new_segments
		self.model = new_model
예제 #4
0
def main(args):
    x, X = loadDiffRows(args.diffFile)
    model = GaussianHMM(n_components=3,
                        covariance_type="diag",
                        n_iter=100000000000)
    model.transmat_ = numpy.array([[0.5, 0.5, 0.0],
                                   [0.0, 0.5, 0.5],
                                   [0.0, 0.0, 1.0]])
    model.fit(X)
    print(model.transmat_)
    model.transmat_[0][2] = 0.
    model.transmat_[1][0] = 0.
    model.transmat_[2][0] = 0.
    model.transmat_[2][1] = 0.
    
    exp = args.outFile.split('/')[-1].split('_')[0]
    with open(args.outFile, 'w') as fout:
        print('exp\tbin\treads\tstate', file=fout)
        for seq in X:
            hiddenStates = model.predict(seq)
            for idx,v in enumerate(zip(x,hiddenStates)):
                r,h = v
                print(exp + '\t' + str(idx) + '\t'
                      + str(r) + '\t' + str(h),
                      file=fout)
예제 #5
0
    def select(self):
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        # I'm going to use the same general skeleton I had been using for
        # the previous methods
        # Variables to hold update scores
        best_dic = -math.inf
        best_model = GaussianHMM()

        # Iterate across a range of model states
        for num_hidden_states in range(self.min_n_components,
                                       self.max_n_components + 1):

            try:

                # Fit a model based on state
                model = GaussianHMM(n_components=num_hidden_states, n_iter=100)
                model.fit(self.X, self.lengths)  # What are we fitting on?
                #model = self.base_model(num_hidden_states)

                # Compute elements we need. Full equation:
                # DIC = log(P(X(i)) - 1/(M-1)SUM(log(P(X(all but i))
                sigma_scores = 0
                M = 0

                #pdb.set_trace()

                # FIXME
                # Get WITHIN sample logL
                logL = model.score(self.X, self.lengths)

                # Compute sum(logL) for all words that are not the X[i]
                for word in self.hwords:

                    if word != self.this_word:

                        # Pull values to score this word
                        temp_X, temp_length = self.hwords[word]

                        # Score
                        sigma_scores += model.score(temp_X, temp_length)
                        M += 1

                # Now we compute DIC
                current_dic = logL - (1.0 / (float(M)) * sigma_scores)

                # Control flow to update DIC score
                if current_dic >= best_dic:
                    #print("We're updating scores!")
                    best_model, best_dic = model, current_dic

                else:
                    #print("We're NOT updating scores...")
                    continue

            except:
                #print("We missed the try block")
                continue

        return best_model
예제 #6
0
def train():
    models = []

    for label in [
            x for x in os.listdir(training_files)
            if os.path.isdir(os.path.join(training_files, x))
    ]:
        features = None

        print('calculating', label)

        for filename in os.listdir(os.path.join(training_files, label)):
            filepath = os.path.join(training_files, label, filename)

            feature = count_mfcc(filepath)

            if features is None:
                features = feature
            else:
                features = np.append(features, feature, axis=0)

        print(len(os.listdir(os.path.join(training_files, label))))

        model = GaussianHMM(n_components=3, n_iter=1000)
        model.fit(features)
        models.append((model, label))
        model = None

    # save training model
    pickle.dump(models, open(pickle_1, 'wb'))

    print('done training')
    return 'done training'
예제 #7
0
    def train(self, k, train_set, valid_set):
        train_wavs, train_folds, train_labels = zip(*list(chain(*train_set)))
        train_wavs, train_folds, train_labels = np.array(train_wavs), np.array(
            train_folds), np.array(train_labels)

        train_sample = len(train_wavs)
        train_x, _ = self.fix_frame(train_sample, train_wavs, train_folds,
                                    train_labels)

        # Test Model
        valid_wavs, valid_folds, valid_labels = zip(*valid_set)
        valid_wavs, valid_folds, valid_labels = np.array(valid_wavs), np.array(
            valid_folds), np.array(valid_labels)

        valid_sample = len(valid_wavs)
        valid_x, valid_y = self.fix_frame(valid_sample, valid_wavs,
                                          valid_folds, valid_labels)

        if config.isPCA:
            pca = PCA(n_components=config.n_pca)
            pca.fit(train_x)
            train_x = pca.transform(train_x)
            valid_x = pca.transform(valid_x)

        hmm = GaussianHMM(n_components=self.component)
        hmm.fit(train_x)
        joblib.dump(hmm, f"{self.model_path}/hmm10-{k}.pkl")

        score = purity_score(np.argmax(valid_y, axis=1),
                             np.argmax(hmm.predict_proba(valid_x)))
        print('Accuracy:{0:.3f}'.format(score))
예제 #8
0
def hidden_markov_model(hidden_states_count, train, test, time, sample_size,
                        data_name, f):
    #Create an HMM and fit it to data
    model = GaussianHMM(algorithm='map',
                        n_components=hidden_states_count,
                        covariance_type='diag',
                        n_iter=10000)
    model.fit(train)

    #Decode the optimal sequence of internal hidden state (Viterbi)
    hidden_states = model.predict(test)

    #Prob next step
    prob_next_step = model.transmat_[hidden_states[-1], :]

    #Generate new sample (visible, hidden)
    X, Z = model.sample(sample_size)

    #Plot Data
    plot_time_series(test, hidden_states, hidden_states_count, None,
                     data_name + ' - Predict')
    points = get_points(model)
    plot_gaussians(train, points, hidden_states_count,
                   data_name + ' - Gaussian Predict')
    #    plot_time_series(X, Z, hidden_states_count, None, title=data_name+' - Sample')

    #Write Data
    f.write('\n' + data_name + '\n')
    f.write('Transition Matrix:\n' + str(model.transmat_) + '\n')
    f.write('\nNext Step ' + str(prob_next_step) + '\n')
    for point in points:
        f.write('\nHidden Variable NO° ' + str(point['no']) + '\n\tMean: ' +
                str(point['mean']) + '\n\tSigma: ' + str(point['sigma']) +
                '\n')
    f.write('\n#######################################\n')
예제 #9
0
def clusterMatrix(Cij, n_components, covariance_type='diag', n_iter=1000):
    '''
    clusterMatrix(Cij, n_components, covariance_type = 'diag', n_iter = 1000)

    applies a GaussianHMM clustering to the processed genomewide interchromosomal
    contact matrix Cij as generated by constructClusterContactMatrix

    :param Cij:             processed genomewide interchromosomal contact matrix
                            as generated by constructClusterContactMatrix
    :param n_components:    number of clusters to find
    :param covariance_type: type of the covariance matrix to use (see hmmlearn documentation for more details)
    :param n_iter:          number of iterations allowed

    :return:                numpy.array containing numbers from 0 to n_components - 1
                            specifying the cluster to which each bin belongs and the model with
                            which it was calculated (i.e. fitted hmmlearn.hmm.GaussianHMM object)
    '''
    # initializing HMM object
    model = GaussianHMM(n_components=n_components,
                        covariance_type=covariance_type,
                        n_iter=n_iter,
                        verbose=True)

    # fitting parameters
    model.fit(Cij)

    # compute the most likely state sequence using viterbi
    clusters = model.predict(Cij)

    return clusters, model
 def fit_HMM(self,error_metric):
     print "Looking for optimal number of states and fitting HMM"
     for i in xrange(2,5):
         candidate = GaussianHMM(n_components=i, covariance_type="full", n_iter=1000)
         candidate.fit(self.X_train)
         if error_metric == HMM_MAD:
             error = HMM_MAD(candidate,self.X_test)
             if i == 2:
                 best_guess = error
                 best_model = candidate
                 opt_n_states = i
             else:
                 if error < best_guess:
                     opt_n_states = i
                     best_model = candidate
                     best_guess = error
         else:
             error = error_metric(candidate,self.X_test)
             if i == 2:
                 best_guess = error
                 best_model = candidate
                 opt_n_states = i
             else:
                 if error > best_guess:
                     opt_n_states = i
                     best_model = candidate
                     best_guess = error
     self.model = best_model
     self.n_states = opt_n_states
     print "Done. Lowest error of {} achieved with {} states".format(best_guess, opt_n_states)
 def train(self):
     print('start train')
     Hstate_num = list(range(len(self.p_state)))
     Ostate_num = list(range(len(self.p_state)))
     Ostate = []
     global value, index
     for (index, value) in enumerate(self.p_state):
         Ostate += value  #观察状态序列
         Hstate_num[index] = len(
             set(np.array(value).reshape(1, len(value))[0]))
         Ostate_num[index] = len(value)
     self.Ostate = Ostate
     self.Hstate_num = Hstate_num
     self.n = int(round(np.array(Hstate_num).mean()))  #隐藏状态数
     model = GaussianHMM(n_components=self.n,
                         n_iter=1000,
                         init_params="mcs",
                         covariance_type="full")
     model.fit(np.array(Ostate), lengths=Ostate_num)
     s = model.transmat_.sum(axis=1).tolist()
     try:
         print('transmat')
         model.transmat_[s.index(0.0)] = np.array([1.0 / self.n] * self.n)
     except ValueError:
         pass
     self.model = model
    def select(self):
        warnings.filterwarnings("ignore", category=DeprecationWarning)
        # TODO implement model selection using CV

        # create a temporary neg infinity best score
        best_score = float('-Inf')
        # create an empty best number of states
        best_num_components = None
        # define the number of splits for splitting method
        n_splits = 3
        # set the split method to kfold
        split_method = KFold(n_splits=n_splits)
        # loop through the options for number of components aka hidden states
        for num_components in range(self.min_n_components,
                                    self.max_n_components + 1):
            # store the scores for this number of components
            n_scores = []
            # test if the length of the sequences is smaller than the number of splits
            if len(self.sequences) < n_splits:
                #print('sequences {} less than splits {} -> skipping...'.format(len(self.sequences), n_splits))
                # skip this test of folds
                continue
            # loop through the training and testing folds of the sequences
            for train_idx, test_idx in split_method.split(self.sequences):
                # get the training data with combine sequence utility function
                x_train, lengths_train = combine_sequences(
                    train_idx, self.sequences)
                # get the testing data with combine sequence utility function
                x_test, lengths_test = combine_sequences(
                    test_idx, self.sequences)
                # add try/except to eliminate non-viable models
                try:
                    # create the model
                    model = GaussianHMM(n_components=num_components,
                                        n_iter=1000,
                                        random_state=self.random_state)
                    # fit the model
                    model.fit(x_train, lengths_train)
                    # calculate the score of the model aka Log Likelihood
                    score = model.score(x_test, lengths_test)
                    # add score to list
                    n_scores.append(score)
                except:
                    pass
            # calculate the mean score
            n_mean = np.mean(n_scores)
            # test if this mean is better than current best score
            if n_mean > best_score:
                # update the best number of components
                best_num_components = num_components
                # update the best score
                best_score = n_mean
        # test if a best model was found
        if best_num_components is not None:
            #print('best number of components found -> {}'.format(best_num_components))
            return self.base_model(best_num_components)
        else:
            #print('best number of components not found -> returning constant {}'.format(self.min_n_components))
            return self.base_model(self.min_n_components)
def test_GaussHMM_decode(cases: str) -> None:
    np.random.seed(12346)
    cases = int(cases)
    i = 1
    N_decimal = 4
    max_iter = 100
    tol=1e-3
    while i < cases:
        n_samples = np.random.randint(10, 50)
        hidden_states = np.random.randint(3, 6)
        n_features = np.random.randint(4, 9)
        X = []
        lengths = []
        for _ in range(n_samples):
            seq_length = np.random.randint(4, 9)
            this_x = np.random.rand(seq_length,n_features)

            X.append(this_x)
            lengths.append(seq_length)

        hmm_gold = GaussianHMM(n_components=hidden_states,
                               covariance_type='full',
                               algorithm='viterbi',
                               n_iter=max_iter,
                               tol=tol)

        X_gold = np.concatenate(X)
        hmm_gold.fit(X_gold, lengths)

        gold_means = hmm_gold.means_
        gold_pi = hmm_gold.startprob_
        gold_n_features = hmm_gold.n_features
        gold_transmat = hmm_gold.transmat_
        gold_means = hmm_gold.means_
        gold_covars = hmm_gold.covars_

        hmm_mine = GaussHMM(hidden_states=hidden_states,
                               A=gold_transmat,
                               n_features=gold_n_features,
                               means=gold_means,
                               covar=gold_covars,
                               pi=gold_pi,
                               tol=tol,
                               max_iter=max_iter)
        gold_logprob,gold_state_seq = hmm_gold.decode(X_gold, lengths)
        mine_logprob_list = []
        mine_state_seq_list = []
        for this_x in X:
            this_logprob, this_state_seq = hmm_mine.decode(this_x)
            mine_logprob_list.append(this_logprob)
            mine_state_seq_list.append(this_state_seq)
        mine_logprob = sum(mine_logprob_list)
        mine_state_seq = np.concatenate(mine_state_seq_list)
        assert_almost_equal(mine_logprob, gold_logprob, decimal=N_decimal)
        assert_almost_equal(mine_state_seq, gold_state_seq, decimal=N_decimal)
        i+=1

    print('Successfully testing the decode function in Gaussian HMM!')
예제 #14
0
def HMM_algo():
    for n in hiddenstates:
        for ts in timestamp:
            print("Combination: ", (n, ts))
            hmm = GaussianHMM(n_components=n)
            feature_vector = feature_extraction(train_data)
            hmm.fit(feature_vector)
            mse = predict_for_next_n_days(test_data, 150, ts, hmm)
            print(mse)
def HMM_feat(ts, hmm_n=4):
    X = np.column_stack([ts])
    model = GaussianHMM(n_components=hmm_n, covariance_type="diag", 
                        n_iter=1000)
    model.fit(X)
    id = np.argsort(model.means_, axis=0).T[0]
    return np.concatenate((np.reshape(model.transmat_[id,:][:,id], -1), 
                                     np.reshape(model.covars_[id], -1), 
                                                np.reshape(model.means_[id], -1)))
예제 #16
0
def getHiddenStatus(data):
    """
    使用Gaussian HMM对数据进行建模,并得到预测值
    """
    cols = ["r_5", "r_20", "a_5", "a_20"]
    model = GaussianHMM(n_components=3, covariance_type="full", n_iter=1000,
        random_state=2010)
    model.fit(data[cols])
    hiddenStatus = model.predict(data[cols])
    return hiddenStatus
예제 #17
0
    def select(self):

        # Use these variables to store best model
        bestLogL = None
        bestModel = None

        # Iterate over all possible models
        for num_states in range(self.min_n_components,
                                self.max_n_components + 1):

            try:
                # Define split method. Use n_splits = 3 wherever possible. This call needs to be
                # in try/except block, as it throws out exception for n_split = 1. To improve CV
                # performance, we can increase split count up to len(self.sequences), but this
                # will also hit performance significantly
                split_method = KFold(n_splits=(len(self.sequences) if (
                    len(self.sequences) < 3) else 3))

                cnt = 0
                sumLogL = 0

                # Create new Gaussian HMM
                hmm_model = GaussianHMM(n_components=num_states,
                                        covariance_type="diag",
                                        n_iter=1000,
                                        random_state=self.random_state,
                                        verbose=self.verbose)

                if self.verbose:
                    print("model created for {} with {} states".format(
                        self.this_word, num_states))

                # fit model with training sequences from KFold and calculate SUM logL
                for cv_train_idx, cv_test_idx in split_method.split(
                        self.sequences):

                    hmm_model.fit(
                        *combine_sequences(cv_train_idx, self.sequences))
                    sumLogL += hmm_model.score(
                        *combine_sequences(cv_test_idx, self.sequences))
                    cnt += 1

                # Calculate average LogL
                avgLogL = sumLogL / cnt

                # Maximaze average logL to find best model
                if bestLogL is None or avgLogL > bestLogL:
                    bestModel = hmm_model
                    bestLogL = avgLogL
            except:
                if self.verbose:
                    print("failure on {} with {} states".format(
                        self.this_word, num_states))

        return bestModel
예제 #18
0
class GaussHMM:
    def __init__(self, init):
        self.init = init

    def fit(self, signals, channels):

        self.hmm = GaussianHMM(n_components=len(self.init),
                               covariance_type="full",
                               n_iter=100)
        self.hmm.fit(np.array(signals).reshape([-1, 1])[:100])
        self.hmm.means_ = self.get_mean(signals, channels)
        self.hmm.covars_ = self.get_cov(signals, channels)
        self.hmm.startprob_ = self.init
        self.hmm.transmat_ = self.markov_p_trans(channels)

    def predict(self, signals):
        pred = self.hmm.predict(signals.reshape([-1, 1]))
        return pred

    def predict_proba(self, signals):
        prob = self.hmm.predict_proba(signals.reshape([-1, 1])).round(3)
        return prob

    def get_mean(self, signals, channels):

        sig_mean = []
        for chan_i in range(len(np.unique(channels))):
            sig_mean.append(signals[channels == chan_i].mean())

        return np.array(sig_mean).reshape([-1, 1])

    def get_cov(self, signals, channels):

        sig_cov = []
        for chan_i in range(len(np.unique(channels))):
            sig_cov.append(np.cov(signals[channels == chan_i]))

        return np.array(sig_cov).reshape([-1, 1, 1])

    def markov_p_trans(self, states):
        max_state = np.max(states)
        states_next = np.roll(states, -1)
        matrix = []
        for i in range(max_state + 1):
            current_row = np.histogram(states_next[states == i],
                                       bins=np.arange(max_state + 2))[0]
            if np.sum(current_row
                      ) == 0:  # if a state doesn't appear in states...
                current_row = np.ones(max_state + 1) / (
                    max_state + 1)  # ...use uniform probability
            else:
                current_row = current_row / np.sum(
                    current_row)  # normalize to 1
            matrix.append(current_row)
        return np.array(matrix)
예제 #19
0
def NMF_HMM(N, X):
    nmf = NMF(N + 4)
    hmm = HMM(N)

    nmf.fit(X)
    Transition = nmf.components_

    hmm.fit(Transition.T)
    P = hmm.predict(Transition.T)

    return (P, Transition)
예제 #20
0
    def select(self):
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        # TODO implement model selection using CV
        #print("**** train for word: ", self.this_word)
        #print("number of sequences: ", len(self.sequences))
        if len(self.sequences) <= 2:
            return self.base_model(self.n_constant)

        max_score = float('-inf')
        best_n = None

        for n in range(self.min_n_components, self.max_n_components + 1):
            try:
                model = GaussianHMM(n_components=n,
                                    covariance_type="diag",
                                    n_iter=1000,
                                    random_state=self.random_state,
                                    verbose=False)
                scores = []

                split_method = KFold(n_splits=min(3, len(self.sequences)))

                for cv_train_idx, cv_test_idx in split_method.split(
                        self.sequences):
                    train_X, train_lengths = combine_sequences(
                        cv_train_idx, self.sequences)
                    test_X, test_lengths = combine_sequences(
                        cv_test_idx, self.sequences)
                    model.fit(train_X, train_lengths)
                    scores.append(model.score(test_X, test_lengths))

                cv_score = np.mean(scores)

                if max_score < cv_score:
                    max_score = cv_score
                    best_n = n

            except:
                pass

        if best_n == None:
            #all fails
            best_n = 3

        best_model = GaussianHMM(n_components=best_n,
                                 covariance_type="diag",
                                 n_iter=1000,
                                 random_state=self.random_state,
                                 verbose=False)

        best_model.fit(self.X, self.lengths)

        return best_model
예제 #21
0
    def select(self):
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        # Split into 3 folds when number of sequences for a word is atleast three
        # Make separate list of train and test folds in train_folds and test_folds
        n_splits = min(3, len(self.sequences))
        if n_splits > 1:
            split_method = KFold(n_splits)
            train_folds = list()
            test_folds = list()
            for cv_train, cv_test in split_method.split(self.sequences):
                train_folds.append(cv_train)
                test_folds.append(cv_test)

        best_score = -float('inf')
        best_model = None
        n_components = None

        for n in range(self.min_n_components, self.max_n_components + 1):
            model = GaussianHMM(n_components=n,
                                n_iter=1000,
                                random_state=self.random_state,
                                verbose=False)
            mean_logL = 0.0
            # If number of sequences is one, we train and score on the same single fold
            if n_splits == 1:
                try:
                    fit_model = model.fit(self.X, self.lengths)
                    mean_logL = fit_model.score(self.X, self.lengths)
                except:
                    continue
            else:
                count = 0  #count keeps record of the total number of folds which were successfully tested and scored.
                for ii in range(
                        len(train_folds)):  #  Iterate and train over each fold
                    train_X, train_lengths = combine_sequences(
                        train_folds[ii], self.sequences)
                    test_X, test_lengths = combine_sequences(
                        test_folds[ii], self.sequences)
                    try:
                        fit_model = model.fit(train_X, train_lengths)
                        mean_logL += fit_model.score(test_X, test_lengths)
                        count += 1
                    except:
                        continue
                if count > 0:
                    mean_logL = mean_logL / count  # Take mean of all the scores

            if mean_logL > best_score:
                best_score = mean_logL
                n_components = n
                best_model = fit_model

        return best_model
예제 #22
0
 def HMMSe(self, globalstatenumber, X):
     '使用hmm产生序列'
     model = GaussianHMM(n_components=globalstatenumber,
                         n_iter=1000,
                         random_state=1,
                         covariance_type='diag',
                         params='stcm',
                         init_params='stcm')
     model.fit(X)
     hidden_states = model.predict(X)
     return hidden_states
    def select(self):
        """ select the best model for self.this_word based on
        average log Likelihood of cross-validation folds for n between 
        self.min_n_components and self.max_n_components

        :return: GaussianHMM object
        """
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        if len(self.sequences) < 3:
            logging.warning(
                "Not enough sequences to split into folds - using self.n_constant states"
            )
            return self.base_model(self.n_constant)

        results = list()
        split_method = KFold()
        for num_components in range(self.min_n_components,
                                    self.max_n_components + 1):
            score = 0
            model = GaussianHMM(n_components=num_components,
                                covariance_type="diag",
                                n_iter=1000,
                                random_state=self.random_state,
                                verbose=False)
            logging.debug(
                "Built model with {} hidden states".format(num_components))
            folds_run = 0
            for cv_train_idx, cv_test_idx in split_method.split(
                    self.sequences):
                try:
                    logging.debug(
                        "Train fold indices:{} Test fold indices:{}".format(
                            cv_train_idx,
                            cv_test_idx))  # view indices of the folds
                    train_X, train_lengths = combine_sequences(
                        cv_train_idx, self.sequences)
                    cv_X, cv_lengths = combine_sequences(
                        cv_test_idx, self.sequences)
                    model.fit(train_X, train_lengths)
                    score += model.score(cv_X, cv_lengths)
                    folds_run += 1
                except Exception as e:
                    logging.warning("{} caught: {}".format(type(e), e))
                    pass
            logging.debug(
                "Adding score={} and model to results list".format(score))
            if score > 0 and folds_run > 0:
                results.append((score / folds_run, model))
        if len(results) == 0:
            return self.base_model(self.n_constant)
        else:
            return max(results)[1]
예제 #24
0
    def select(self):
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        # TODO implement model selection based on DIC scores
        M = len(self.words)

        models = []

        dic_score = float("-inf")
        scores_data = {}

        for n in range(self.min_n_components, self.max_n_components):
            try:
                model = GaussianHMM(n_components=n, n_iter=1000).fit(self.X, self.lengths)
                logL = model.score(self.X, self.lengths)

                sum = 0

                for word in self.words:
                    if word == self.this_word:
                        continue

                    if not scores_data.__contains__(word):
                        scores_data[word] = {}

                    try:
                        if not scores_data[word].__contains__(n):
                            X, lengths = self.all_word_Xlengths[word] # todo optimize
                            #m = GaussianHMM(n_components=n, n_iter=1000).fit(self.X, self.lengths)
                            model.fit(X, lengths)
                            ll = model.score(X, self.lengths)

                            scores_data[word][n] = ll

                        sum += scores_data[word][n]
                    except:
                        continue

                dic = logL - sum/(M-1)

                if dic_score < dic:
                    dic_score = dic
                    if len(models) == 0:
                        models.append(model)
                    else:
                        models[0] = model
            except:
                continue

        if len(models) > 0:
            return models[0]

        return None
예제 #25
0
    def select(self):
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        # TODO implement model selection using CV
        split_method = KFold()
        best_score = -float("inf")
        best_model = None

        # if the length of sequences is less than 3,
        # it's meaningless to do KFold so here's an exception
        if len(self.sequences) < 3:
            # iterate through given components arguments
            for n in range(self.min_n_components, self.max_n_components):
                try:
                    model = GaussianHMM(n_components=n,
                                        random_state=self.random_state)
                    model = model.fit(self.X, self.lengths)
                    score = model.score(self.X, self.lengths)
                    if score > best_score:
                        best_score = score
                        best_model = model
                except:
                    pass
            return best_model

        # else (length of sequences is 3 or longer)
        for cv_train_idx, cv_test_idx in split_method.split(self.sequences):
            # get training sample
            train_X, train_lengths = combine_sequences(cv_train_idx,
                                                       self.sequences)
            # get testing sample
            test_X, test_lengths = combine_sequences(cv_test_idx,
                                                     self.sequences)

            # iterate through given components
            for n in range(self.min_n_components, self.max_n_components):
                try:
                    model = GaussianHMM(n_components=n,
                                        random_state=self.random_state)
                    # train on training samples
                    model = model.fit(train_X, train_lengths)
                    # test on testing samples
                    score = model.score(test_X, test_lengths)
                    # score is just a simple log-likelihood score
                    # so it's bigger the better
                    if score > best_score:
                        best_score = score
                        best_model = model
                except:
                    pass

        return best_model
예제 #26
0
def HMM_Stack_Transform(k, X):
    hmm = HMM(k, random_state=1)

    hmm.fit(X.T)
    P = hmm.predict(X.T)

    C = Compress_Progression(P)

    ST = np.zeros((np.shape(hmm.means_)[1], len(C)))
    for i in range(len(C)):
        ST[:, i] = hmm.means_[C[i]]

    return normalize(ST, 'l1', 0)
예제 #27
0
def hmmlearnHMM():
    model = GaussianHMM(3, "full")
    model.fit(input_data.iloc[:, :-1])

    actual = input_data.iloc[:, -1]
    pred = model.predict(input_data.iloc[:, :-1])
    pred = [2 if x == 2 else x for x in pred]
    pred = [7 if x == 1 else x for x in pred]
    pred = [3 if x == 0 else x for x in pred]

    accuracy = sum(pred == actual) / float(len(actual))

    print accuracy
예제 #28
0
def model_run(name):
    msft = db[name]
    msft_cursor = msft.find({})

    data = []
    time = datetime.datetime(2020, 6, 15, 9, 30, 00)
    for i in msft_cursor:
        if i['time'] >= time:
            data.append(float(i['price'].replace(',', '')))

    data = np.array(data)
    segments = segment.topdownsegment(data, fit.interpolate,
                                      fit.sumsquared_error, max_error)
    box_feature = build_box_feature(segments, data)
    print(box_feature)
    train_data, test_data = train_test_split(box_feature,
                                             test_size=0.33,
                                             shuffle=False)
    feature_vector = feature_extract(train_data)
    print(feature_vector)

    hmm = GaussianHMM(n_components=4)
    hmm.fit(feature_vector)
    possibile_outcomes = compute_all_possible_outcomes(n_steps_frac_low,
                                                       n_steps_frac_high,
                                                       n_steps_frac_change)
    predict = predict_close_price_for_days(500, name)


# for stock in stocks:
#     msft = db[stock]
#     msft_cursor = msft.find({})
#
#     data = []
#     time = datetime.datetime(2020, 6, 15, 9, 30, 00)
#     for i in msft_cursor:
#         if i['time'] >= time:
#             data.append(float(i['price'].replace(',', '')))
#
#     data = np.array(data)
#     segments = segment.topdownsegment(data, fit.interpolate, fit.sumsquared_error, max_error)
#     box_feature = build_box_feature(segments, data)
#     print(box_feature)
#     train_data, test_data = train_test_split(box_feature, test_size=0.33, shuffle=False)
#     feature_vector = feature_extract(train_data)
#     print(feature_vector)
#
#     hmm = GaussianHMM(n_components=4)
#     hmm.fit(feature_vector)
#     possibile_outcomes = compute_all_possible_outcomes(n_steps_frac_low, n_steps_frac_high, n_steps_frac_change)
#     predict = predict_close_price_for_days(500,stock)
예제 #29
0
    def select(self):
        '''
        Selects the best model as determined by the average score
        across n_splits cross-validation sets
        '''
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        best_score = float("-inf")
        best_model = None
        for num_states in range(self.min_n_components,
                                self.max_n_components + 1):
            hmm_model = GaussianHMM(n_components=num_states,
                                    covariance_type="diag",
                                    n_iter=1000,
                                    random_state=self.random_state,
                                    verbose=False)
            scores = []
            try:
                n_splits = self.get_n_splits()
                split_method = KFold(n_splits=n_splits)
                for cv_train_idx, cv_test_idx in split_method.split(
                        self.sequences):
                    # train
                    train_x, train_length = combine_sequences(
                        cv_train_idx, self.sequences)
                    hmm_model.fit(train_x, train_length)
                    # test
                    test_x, test_length = combine_sequences(
                        cv_test_idx, self.sequences)
                    # evaluate
                    score = self.score(hmm_model, test_x, test_length)
                    scores.append(score)
                    if self.verbose:
                        print(
                            "model created for {} with {} states, withs score: {}"
                            .format(self.this_word, num_states, score))
            except ValueError as e:
                if self.verbose:
                    print("ValueError error({0}):".format(e))
                    print("failure on {} with {} states".format(
                        self.this_word, num_states))
                scores = []

            if len(scores) > 0:
                mean_score = np.mean(scores)
                if mean_score > best_score:
                    best_score = mean_score
                    best_model = hmm_model

        return best_model
def hmm_gen_TS(N, hmm_n=4):
    TS = np.zeros([N,T])
    model = GaussianHMM(n_components=hmm_n, covariance_type="diag", n_iter=1000)
    for n in range(N):
        if n%10 == 0:
            p('generating {0}th sample by HMM'.format(n))
        r = np.random.randint(0, data_all.shape[0])
        ts_real = np.array(data_all.loc[r, '0':])
        X = np.column_stack([ts_real])
        model.fit(X)
        ts, Z = model.sample(T)
        ts = ts[:,0]
        TS[n,:] = (ts-np.mean(ts))/np.std(ts)
    return TS
def test_GaussHMM_posterior(cases: str) -> None:
    np.random.seed(12346)
    cases = int(cases)
    i = 1
    N_decimal = 4
    max_iter = 100
    tol=1e-3
    while i < cases:
        n_samples = np.random.randint(10, 50)
        hidden_states = np.random.randint(3, 6)
        n_features = np.random.randint(4, 9)
        X = []
        lengths = []
        for _ in range(n_samples):
            seq_length = np.random.randint(4, 9)
            this_x = np.random.rand(seq_length,n_features)

            X.append(this_x)
            lengths.append(seq_length)

        hmm_gold = GaussianHMM(n_components=hidden_states,
                               covariance_type='full',
                               n_iter=max_iter,
                               tol=tol)

        X_gold = np.concatenate(X)
        hmm_gold.fit(X_gold, lengths)

        gold_means = hmm_gold.means_
        gold_pi = hmm_gold.startprob_
        gold_n_features = hmm_gold.n_features
        gold_transmat = hmm_gold.transmat_
        gold_means = hmm_gold.means_
        gold_covars = hmm_gold.covars_

        hmm_mine = GaussHMM(hidden_states=hidden_states,
                               A=gold_transmat,
                               n_features=gold_n_features,
                               means=gold_means,
                               covar=gold_covars,
                               pi=gold_pi,
                               tol=tol,
                               max_iter=max_iter)
        _,gold_posteriors = hmm_gold.score_samples(X_gold, lengths)
        mine_posterior_list = [hmm_mine.posterior(this_x) for this_x in X]
        mine_posterior_list = np.concatenate(mine_posterior_list)
        assert_almost_equal(mine_posterior_list, gold_posteriors, decimal=N_decimal)
        i+=1

    print('Successfully testing the function of computing posteriors in Gaussian HMM!')
    def select(self):
        warnings.filterwarnings("ignore", category=DeprecationWarning)

        # TODO implement model selection using CV

        num_splits = 2

        if num_splits > len(self.sequences):
            return None  # Cannot proceed, not enough data

        best_num_states = self.n_constant
        best_average_logL_score = float("-inf")

        for num_states in range(self.min_n_components,
                                self.max_n_components + 1):

            all_scores = []
            split_method = KFold(n_splits=num_splits)

            for cv_train_idx, cv_test_idx in split_method.split(
                    self.sequences):
                train_X, train_lengths = combine_sequences(
                    cv_train_idx, self.sequences)
                test_X, test_lengths = combine_sequences(
                    cv_test_idx, self.sequences)

                try:

                    hmm_model = GaussianHMM(n_components=num_states,
                                            covariance_type="diag",
                                            n_iter=1000,
                                            random_state=self.random_state,
                                            verbose=False)

                    hmm_model.fit(train_X, train_lengths)
                    score = hmm_model.score(test_X, test_lengths)
                    all_scores.append(score)
                except:
                    pass

            logL = float("-inf")
            if len(all_scores) > 0:
                logL = np.average(all_scores)

            if logL > best_average_logL_score:
                best_average_logL_score = logL
                best_num_states = num_states

        return self.base_model(best_num_states)
예제 #33
0
    def select(self):

        # Use these variables to store best model
        bestDIC = None
        bestModel = None

        # Iterate over all possible models
        for num_states in range(self.min_n_components,
                                self.max_n_components + 1):

            try:
                # Create new Gaussian HMM
                hmm_model = GaussianHMM(n_components=num_states,
                                        covariance_type="diag",
                                        n_iter=1000,
                                        random_state=self.random_state,
                                        verbose=self.verbose)

                if self.verbose:
                    print("model created for {} with {} states".format(
                        self.this_word, num_states))

                # Fit model with current data
                hmm_model.fit(self.X, self.lengths)

                # Calculate logL
                logL = hmm_model.score(self.X, self.lengths)

                otherScores = 0

                # Calculate likelihood SUM for all other words
                for otherWord in self.hwords:
                    if otherWord != self.this_word:
                        otherScores += hmm_model.score(*self.hwords[otherWord])

                # Caluclate dicusing formula DIC = log(P(X(i)) - 1/(M-1)SUM(log(P(X(all but i))
                dic = logL - (float(1) / (len(self.hwords) - 1)) * otherScores

                # Find model with highest DIC
                if bestDIC is None or dic > bestDIC:
                    bestModel = hmm_model
                    bestDIC = dic
            except:
                if self.verbose:
                    print("failure on {} with {} states".format(
                        self.this_word, num_states))

        return bestModel
예제 #34
0
def train(start_date, end_date, mode, input_days, span, n_components):

    training_x, _ = DataGenerator.make_features(start_date,
                                                end_date,
                                                mode=mode,
                                                input_days=input_days,
                                                span=span,
                                                is_training=True)

    # TODO: set model parameters
    model = GaussianHMM(n_components, n_iter=100)
    model.fit(training_x)

    # TODO: fix pickle file name
    filename = 'team11_model.pkl'
    pickle.dump(model, open(filename, 'wb'))
예제 #35
0
def fit_hmm(df, n_components, features=['speed', 'rotation'],
            **kwargs):
    '''
    Fits a Gaussian HMM to the velocity data
    Args:
        df - dataframe containing positional data to be processed
        n_components - number of hidden states
        features - features to use in model fitting
        **kwargs passed to GaussianHMM
    Returns:
        model
    '''
    X, lengths = get_features(df, features=features)
    model = GaussianHMM(n_components, **kwargs)
    model.fit(X, lengths=lengths)

    return model
예제 #36
0
    def test_backward_with_hmmlearn(self):
        r = np.random.randn
        obs = [np.array([[-600 + r(), 100 + r()], [-300 + r(), 200 + r()], [0 + r(), 300 + r()]]) for _ in xrange(10)]
        hmm = GaussianHMM(n_components=3)
        hmm.fit(obs)

        # Calculcate bwdlattice using hmmlearn algorithm
        framelogprob = hmm._compute_log_likelihood(obs[0])
        start = timeit.default_timer()
        bwdlattice1 = hmm._do_backward_pass(framelogprob)
        print('hmmlearn took %fs' % (timeit.default_timer() - start))

        # Calculate bwdlattice using fhmm algorithm with #chains = 1. This should yield the exact same results
        start = timeit.default_timer()
        bwdlattice2 = np.zeros(bwdlattice1.shape)
        fhmmc._backward(obs[0].shape[0], 1, hmm.n_components, [(x,) for x in xrange(hmm.n_components)],
                        hmm._log_startprob.reshape(1, 3), hmm._log_transmat.reshape(1, 3, 3), framelogprob, bwdlattice2)
        print('fhmm took %fs' % (timeit.default_timer() - start))
        self.assertTrue(np.allclose(bwdlattice1, bwdlattice2))
예제 #37
0
class HmmClassifier():
    def __init__(self, referenceSeqs, inputSeq):
        self.referenceSeqs = referenceSeqs
        self.inputSeq = inputSeq

        # feel free to change this model
        self.model = GaussianHMM(n_components=2, covariance_type="full", n_iter=2000)

    def predict(self):
        probs = []
        for referenceSeq in self.referenceSeqs:
            #print "reference: {}".format(referenceSeq)
            self.model.fit(referenceSeq)
            hidden_states = self.model.predict(referenceSeq)
            prob = self.model.score(self.inputSeq)
            probs.append(prob)

        # return the index of the max prob
        return probs.index(max(probs))
예제 #38
0
파일: hmm.py 프로젝트: mkdmkk/infaas
class HMM:
    __slots__ = [
        "model"
    ]

    def __init__(self):
        pass


    def draw(self, data):
        figure()
        plot(range(len(data)),data,alpha=0.8,color='red')
        show()


    def train(self, data, n_components):
        print("Training Data: %s" % data)
        self.data = data
        self.model = GaussianHMM(n_components, algorithm='viterbi', covariance_type='diag')
        X = np.reshape(data, (len(data),1))
        self.model = self.model.fit([X])

        self.hidden_states = self.model.predict(X)
        print("Sequence of States: " % self.hidden_states)


    def eval(self, obs):
        print("Testing Data: %s" % obs)
        X = np.reshape(obs, (len(obs),1))
        print("Eval: %s" % str(self.model.score(X)))


    def plot(self):
        fig = figure(facecolor="white")
        ax = fig.add_subplot(111)

        for i in range(self.model.n_components):
            # use fancy indexing to plot data in each state
            idx = (self.hidden_states == i)
            ax.plot(np.array(range(len(self.data)))[idx], np.array(self.data)[idx], '.', label="State %d" % (i+1))

        ax.legend()
        show()
예제 #39
0
def runHmm(patient_record,date_list,group_id,empirical_states):
###############################################################################
# Processing the data
	max_state_number = (group_id+1)*10
	
	X = np.zeros(shape=(max(len(patient_record),2),20))
	index = 0
	for date in date_list:
		tmp_list = []
		#print(date)
		for key, value in patient_record[date].iteritems():
			tmp_list.append(value)
		X[index] = np.array(tmp_list)
		index+=1
		
	# if no lab test is available, train with an all zero array
	if X.shape[0]  == 0:
		X = np.zeros(shape=(2,20))
	elif X.shape[0] == 1:
		X[1] = np.zeros(shape=(1,20))
		
	#print(X)	
	#print(X.shape)
	
###############################################################################
# Run Gaussian HMM
	print("fitting to HMM and decoding ...")
	n_components = 2
	
	# make an HMM instance and execute fit
	model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)
	
	# Train n number of HMM to avoid loacl minimal 
	max_score = 0
	max_proba_states = []
	transmat = [[]]
	n = 2
	for i in range(1,n):
		model.fit([X])
		score = model.decode(X)[0]
		if i==1 or max_score < score:
			max_score = score
			max_proba_states = model.predict(X)
			transmat = model.transmat_
		
		'''	
		print "score", score
		# predict the optimal sequence of internal hidden state
		hidden_states = model.predict(X)
		print hidden_states
		'''
	# end multiple training
	
	#print max_score, max_proba_states, transmat
	
	# Compare the state with empirical states
	max_proba_states = max_proba_states.tolist()
	max_proba_states_inver = []
	for s in max_proba_states:
		max_proba_states_inver.append(0 if s == 1 else 1)
	
	#print empirical_states, max_proba_states, max_proba_states_inver
	
	difference_state = np.subtract(np.array(max_proba_states),np.array(empirical_states)).tolist()
	difference_state_inver = np.subtract(np.array(max_proba_states_inver),np.array(empirical_states)).tolist()
	
	difference = np.sum(np.power(difference_state,2))
	difference_inver = np.sum(np.power(difference_state_inver,2))
	
	#print difference, difference_inver
	
	if(difference_inver < difference):
		max_proba_states = max_proba_states_inver
	# end switch bits
	
	# Predict future state
	future_states_proba = np.dot([0,1],transmat)
	future_state = 0
	if future_states_proba[1] > future_states_proba[0]:
		future_state = 1	
	# End
	
	result_states = max_proba_states+[future_state for i in range(0,max_state_number-len(max_proba_states))];
	
	return result_states
	'''
	state = [0,1]
	transmat = np.array(model.transmat_)
	
	print np.dot(state,transmat)
	
	print np.array(model.transmat_)
	
	#print (hidden_states)
	#print (hidden_states.shape)
	'''
		
	print("done\n")
예제 #40
0
파일: casml.py 프로젝트: evenmarbles/rlpy
class CasmlApproximator(FunctionApproximator):
    """

    """

    class _RetentionMethod(RetentionMethod):
        """The retention method for the transition case base implementation for :class:`Casml`.

        When the new problem-solving experience can be stored or not stored in memory,
        depending on the revision outcomes and the CBR policy regarding case retention.

        Parameters
        ----------
        owner : CaseBase
            A pointer to the owning case base.
        tau : float, optional
            The maximum permitted error when comparing most similar solution.
            Default is 0.8.
        sigma : float, optional
            The maximum permitted error when comparing actual with estimated
            transitions. Default is 0.2
        plot_retention_method : callable, optional
            Callback function plotting the retention step. Default is None.

        Notes
        -----
        The Casml retention method for the transition case base considers query cases as
        predicted correctly if both:

        1. the difference between the actual and the estimated transitions are less
           than or equal to the permitted error :math:`\\sigma`:

           .. math::

              d(\\text{case}.\\Delta_\\text{state}, T(s_{i-1}, a_{i-1}) <= \\sigma

        2. and the query case is within the maximum permitted error :math:`\\tau` of
           the most similar solution case:

           .. math::

              d(\\text{case}, 1\\text{NN}(C_T, \\text{case})) <= \\tau

        """

        def __init__(self, owner, tau=None, sigma=None, plot_retention_method=None):
            super(CasmlApproximator._RetentionMethod, self).__init__(owner, plot_retention_method,
                                                                     {'tau': tau, 'sigma': sigma})

            self._tau = tau if tau is not None else 0.8
            """:type: float"""

            self._sigma = sigma if sigma is not None else 0.2
            """:type: float"""

        def execute(self, features, matches, plot=True):
            """Execute the retention step.

            Parameters
            ----------
            features : list[tuple[str, ndarray]]
                A list of features of the form (`feature_name`, `data_points`).
            matches : dict[str, dict[int, tuple[float, ndarray]]]
                The solution identified through the similarity measure.
            plot: bool, optional
                Plot the data during the retention step.

            Returns
            -------
        int :
            The case id if the case was retained, -1 otherwise.

            """
            f = dict(features)

            do_add = True
            if matches:
                for id_, val in matches['state'].iteritems():
                    delta_error = np.linalg.norm(self._owner.get_feature('delta_state', id_).value - f['delta_state'])
                    if delta_error <= self._sigma:
                        # At least one of the cases in the case base correctly estimated the query case,
                        # the query case does not add any new information, do not add.
                        do_add = False
                        break

            basis_id = -1
            if do_add or matches['state'].values()[0][0] > self._tau:
                basis_id = self._owner.insert(features, matches)

            if plot:
                self.plot_data(features, matches)

            return basis_id

    class Approximation(FunctionApproximator.Approximation):
        """

        """

        def __init__(self, approximator, state, act, kernelfn):
            super(CasmlApproximator.Approximation, self).__init__(state)

            self._act = act

            self._approximator = approximator
            """:type: CasmlApproximator"""

            self._kernelfn = kernelfn
            self._sum = 0.0

            self._neighbors = []
            """:type: list"""
            self._deltas = []
            """:type: list"""

            self.update(state.features, act.features)

        def __del__(self):
            assert (self.state, Hashable(self._act.features)) not in self._approximator._queries
            # noinspection PyTypeChecker
            # if not next((True for elem in self._approximator._fit_X if np.all(elem == self.state.features)), False):
            if (self.state, Hashable(self._act.features)) not in self._approximator._bases:
                self._approximator._querycb.remove([('state', self.state.features), ('act', self._act.features)])

        def include(self, d, state, delta):
            assert d >= 0

            val = (d, state)
            if len(self._neighbors) <= 0 or val < self._neighbors[-1]:
                # noinspection PyTypeChecker
                # if not next((True for (dist, v) in self._neighbors if dist == d and np.all(v == state)), False):
                ind = bisect.bisect_left(self._neighbors, val)
                bisect.insort(self._neighbors, val)
                self._deltas.insert(ind, delta)
                self._compute_weights()
                self.dispatch('average_change')
            else:
                assert self._sum > 0.0
                w = self._kernelfn(d)
                if w / self._sum >= self._approximator._minfraction:
                    self._neighbors.append(val)
                    self._deltas.append(delta)
                    self._compute_weights()
                    self.dispatch('average_change')

        def update(self, state, act):
            neighbors = dict(self._approximator._basiscb.retrieve([('state', state), ('act', act)]))
            if 'state' in neighbors:
                self._deltas = [self._approximator._basiscb.get_feature('delta_state', id_).value for id_ in
                                neighbors['state'].iterkeys()]
                self._neighbors = neighbors['state'].values()

            self._compute_weights()

        def _compute_weights(self):
            self._weights.clear()
            self._sum = 0.0

            i = 0
            total = 0

            # calculate successor states from the current state and solution delta state
            for (d, succ), delta in zip(self._neighbors, self._deltas):
                w = self._kernelfn(d)
                if self._sum == 0.0 or w / self._sum >= self._approximator._minfraction:
                    sequence = [np.asarray(self._state.features), np.asarray(self._state.features + delta)]

                    proba = np.exp(self._approximator._hmm.score(sequence))
                    self._weights[MDPState.create(succ)] = (w, proba)       # proba
                    self._sum += w
                    total += proba
                    i += 1
                else:
                    break
            del self._neighbors[i:]
            del self._deltas[i:]

            for succ, (w, p) in self._weights.iteritems():
                self._weights[succ] = (w, p / total)        # total
            pass

            # sequences = np.zeros((len(self._neighbors), 2, len(self._state)), dtype=float)
            #
            # for i, delta in enumerate(self._deltas):
            #     sequences[i, 0] = np.array(self._state.features)
            #     sequences[i, 1] = np.asarray(self._state.features + delta)
            #
            # # use HMM to calculate probability for observing sequence <current_state, next_state>
            # # noinspection PyTypeChecker
            # weights = np.exp(self._approximator._hmm.score(sequences))
            # for (_, succ), w in zip(self._neighbors, weights):
            #     self._weights[MDPState.create(succ)] = w
            #
            # sum_ = weights.sum()
            # for (_, succ), w in zip(self._neighbors, weights):
            #     if len(weights) <= 1:
            #         w *= 0.9
            #     self._weights[MDPState.create(succ)] = w / sum_

    # -----------------------------
    # CasmlApproximator
    # -----------------------------
    def __init__(self, feature_metadata, minfraction, scale, kernelfn, tau=None, sigma=None, ncomponents=1, n_iter=1):
        super(CasmlApproximator, self).__init__()

        self._minfraction = minfraction
        self._scale = scale
        self._kernelfn = kernelfn
        self._new_sequence = True

        #: Contains all the existing CasmlAppoximations created by
        #: this CasmlApproximator. The keys serve as both queries and
        #: bases (queries are a superset of bases), so a datum may be
        #: None if the associated key is just a basis, not a query.
        self._queries = weakref.WeakValueDictionary()
        """:type: dict[tuple[MDPState, MDPAction], Approximation]"""
        #: The subset of keys of queries that are also bases.
        #: The order in which the bases have been received is preserved
        self._bases = set()
        """:type: set[tuple[MDPState, Hashable]"""
        self._fit_X = []
        """:type: list[ndarray]"""

        #: The case base maintaining the observations in the form
        #:     c = <s, a, ds>, where ds = s_{i+1} - s_i
        #: to identify possible successor states.
        self._basiscb = CaseBase(feature_metadata,
                                 retention_method=self._RetentionMethod,
                                 retention_method_params=(tau, sigma), name='basiscb')
        """:type: CaseBase"""
        del feature_metadata['delta_state']
        #: Invariant: contains all the keys in queries
        self._querycb = CaseBase(feature_metadata, name='querycb')
        """:type: CaseBase"""
        #: The hidden Markov model maintaining the observations in the form
        #:     seq = <s_{i}, s_{i+1}>
        #: to reason on the transition probabilities of successor states.
        self._hmm = GaussianHMM(ncomponents, n_iter=n_iter)  # , covariance_type='full'
        # self._hmm = GaussianHMM(ncomponents)
        """:type: GaussianHMM"""

        self._not_add_bases = 0
        self._not_add_count = 0

    def initialize(self):
        """Prepare for a new episode."""
        self._new_sequence = True

    def add_basis(self, state, act, succ=None):
        """Adds a state to the set of bases used to approximate query
        states.

        Parameters
        ----------
        state : MDPState
            The state to add
        act : MDPAction
            The action performed in that state
        succ : MDPState:
            The successor state.

        Returns
        -------
        MDPState :
            The approximated state.

        """
        # update the hmm with the new sequence
        self._fit_hmm(state, succ)

        # retain the case in the query case base
        features = [('state', state.features), ('act', act.features)]
        self._querycb.retain(features)

        a = Hashable(act.features)
        if (state, a) in self._bases:
            self._not_add_bases += 1
            return state

        self._bases.add((state, a))

        # retain the case in the basis case base
        if succ is None:
            succ = state
        delta = succ - state
        features.append(('delta_state', delta))
        basis_id = self._basiscb.run(features)

        if basis_id <= -1:
            self._not_add_count += 1

        if basis_id >= 0:
            if self._querycb.similarity_uses_knn:
                for c in self._querycb.itervalues():
                    try:
                        approx = self._queries[(MDPState.create(c['state'].value), Hashable(c['act'].value))]
                    except KeyError:
                        pass
                    else:
                        approx.update(c['state'].value, c['act'].value)
            else:
                neighbors = dict(self._querycb.retrieve([('state', state.features), ('act', act.features)]))
                for id_, (d, s) in neighbors['state'].iteritems():
                    try:
                        approx = self._queries[(MDPState.create(s), Hashable(neighbors['act'][id_][1]))]
                    except KeyError:
                        pass
                    else:
                        approx.include(d, state.features, delta)

        return state

    def approximate(self, state, act):
        """Approximates a given state using an Approximation.

        Parameters
        ----------
        state : MDPState
            The state to approximate.
        act : MDPAction
            The action performed in that state

        Returns
        -------
        Approximation :
            The Approximation approximating state.

        """
        self._querycb.retain([('state', state.features), ('act', act.features)])

        a = Hashable(act.features)
        try:
            approx = self._queries[(state, a)]
        except KeyError:
            approx = CasmlApproximator.Approximation(self, state, act, self._kernelfn)
            self._queries[(state, a)] = approx
        return approx

    def _fit_hmm(self, state, succ):
        # try:
        #     x = self._hmm._fit_X.copy()
        # except AttributeError:
        #     x = np.zeros(1, dtype=np.object)
        # else:
        #     if self._new_sequence:
        #         x = self._hmm._fit_X.tolist()
        #         x.append(np.zeros(1))
        #         x = np.array(x)
        #
        # if self._new_sequence:
        #     self._new_sequence = False
        #     x[-1] = np.hstack([np.reshape(state.features, (-1, state._nfeatures)).T])
        #
        # x[-1] = np.hstack([x[-1].tolist(), np.reshape(succ.features, (-1, succ._nfeatures)).T])
        # self._hmm.fit(x, n_init=1)

        if self._new_sequence:
            self._new_sequence = False
            self._fit_X.append([])
            self._fit_X[-1].append(state.features)

        if succ is not None:
            self._fit_X[-1].append(succ.features)
            self._hmm.fit(np.concatenate(self._fit_X), lengths=[len(x) for x in self._fit_X])
def predict_states(X,group_id,empirical_states):
	#print("fitting to HMM and decoding ...")
	max_state_number = (group_id+1)*10
	n_components = 2
	
	# make an HMM instance and execute fit
	model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)
	
	# Train n number of HMM to avoid loacl minimal 
	max_score = 0
	max_proba_states = []
	transmat = [[]]
	n = 2
	for i in range(1,n):
		model.fit([X])
		score = model.decode(X)[0]
		if i==1 or max_score < score:
			max_score = score
			max_proba_states = model.predict(X)
			transmat = model.transmat_
		
		'''	
		print "score", score
		# predict the optimal sequence of internal hidden state
		hidden_states = model.predict(X)
		print hidden_states
		'''
	# end multiple training
	
	#print max_score, max_proba_states, transmat
	
	# Compare the state with empirical states
	max_proba_states = max_proba_states.tolist()
	max_proba_states_inver = []
	for s in max_proba_states:
		max_proba_states_inver.append(0 if s == 1 else 1)
	
	#print empirical_states, max_proba_states, max_proba_states_inver
	
	difference_state = np.subtract(np.array(max_proba_states),np.array(empirical_states)).tolist()
	difference_state_inver = np.subtract(np.array(max_proba_states_inver),np.array(empirical_states)).tolist()
	
	difference = np.sum(np.power(difference_state,2))
	difference_inver = np.sum(np.power(difference_state_inver,2))
	
	#print difference, difference_inver
	
	if(difference_inver < difference):
		max_proba_states = max_proba_states_inver
	# end switch bits
	
	# Predict future state
	future_states_proba = np.dot([0,1],transmat)
	future_state = 0
	if future_states_proba[1] > future_states_proba[0]:
		future_state = 1	
	# End
	
	result_states = max_proba_states+[future_state for i in range(0,max_state_number-len(max_proba_states))];
	return result_states		
	print("done\n")
예제 #42
0
y_test = test_set[1:]


# HMMMLearn
####################################################################################
####################################################################################
####################################################################################

import numpy as np
from hmmlearn.hmm import GaussianHMM

new_x = np.asarray(x_train)

n_comps = 6
model = GaussianHMM(n_comps)
model.fit([new_x])
hidden_states = model.predict(new_x)


###############################################################################
# print trained parameters and plot

import pylab as pl
from matplotlib.finance import quotes_historical_yahoo
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter

print("Transition matrix")
print(model.transmat_)
print()

print("means and vars of each hidden state")
std_devs = []

for i in range(len(spoken)):
	#print "fitting to HMM and decoding ..."

	n_components = 3
	arr = []

	# make an HMM instance and execute fit
	model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)

	for j in range(n_samples):
		(rate,sig) = wav.read(fpaths[i][j])
		features = get_features(sig)
		arr.append(len(features))
		model.fit([features])

	models.append(model)
	means.append(np.mean(arr))
	std_devs.append(np.std(arr))
	#print("done\n")

correct_answers = []
with open('Test/'+test_folder+'/answer.txt') as answers:
    for entry in answers:
        correct_answers.append(entry.split())

tot_words = len(correct_answers)
right = 0.0
threshold = 1.5
diff = close_v[1:] - close_v[:-1]
dates = dates[1:]
close_v = close_v[1:]

# pack diff and volume for training
X = np.column_stack([diff, volume])

###############################################################################
# Run Gaussian HMM
print("fitting to HMM and decoding ...", end="")
n_components = 5

# make an HMM instance and execute fit
model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)

model.fit([X])

# predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)

print("done\n")

###############################################################################
# print trained parameters and plot
print("Transition matrix")
print(model.transmat_)
print()

print("means and vars of each hidden state")
for i in range(n_components):
    print("%dth hidden state" % i)
예제 #45
0
            obs = obs[1:]
            obs = obs.T
            obs = scale(obs)

            model = GaussianHMM(algorithm='viterbi', covariance_type='diag', covars_prior=0.01,
                  covars_weight=1, init_params='mc', means_prior=0, means_weight=0,
                  min_covar=0.001, n_components=3, n_iter=1000, params='mc',
                  random_state=None, startprob_prior=1.0, tol=0.01, transmat_prior=1.0,
                  verbose=False)

            model.startprob_ = numpy.array([1., 0, 0])
            model.startprob_prior = model.startprob_
            model.transmat_ = numpy.array([[0.9, 0.1, 0], [0, 0.9, 0.1], [0, 0, 1]])
            model.transmat_prior = model.transmat_

            model.fit(obs)

            pi = model.startprob_
            A = model.transmat_
            w = numpy.ones((n, m), dtype=numpy.double)
            hmm_means = numpy.ones((n, m, d), dtype=numpy.double)
            hmm_means[0][0] = model.means_[0]
            hmm_means[1][0] = model.means_[1]
            hmm_means[2][0] = model.means_[2]
            hmm_covars = numpy.array([[ numpy.matrix(numpy.eye(d,d)) for j in xrange(m)] for i in xrange(n)])
            hmm_covars[0][0] = model.covars_[0]
            hmm_covars[1][0] = model.covars_[1]
            hmm_covars[2][0] = model.covars_[2]
            gmmhmm = GMHMM(n,m,d,A,hmm_means,hmm_covars,w,pi,init_type='user',verbose=False)

            # hidden_state = model.predict(obs)
예제 #46
0
import numpy as np
import matplotlib.pyplot as plt
from hmmlearn.hmm import GaussianHMM

# 从输入文件中加载数据
input_file = 'CNY.csv'
data = np.loadtxt(input_file, delimiter=',')

# 提取需要的值
closing_values = np.array(data[:, 6])
volume_of_shares = np.array(data[:, 8])[:-1]

# 计算每天收盘价变化率
diff_percentage = 100.0 * np.diff(closing_values) / closing_values[:-1]

# 将变化率与交易量组合起来
X = np.column_stack((diff_percentage, volume_of_shares))

# 创建并训练高斯隐马尔科夫模型
print(u"训练高斯隐马尔科夫模型中......")
model = GaussianHMM(n_components=5, covariance_type='diag', n_iter=1000)
model.fit(X)

# 用模型生成数据
num_samples = 500
samples, _ = model.sample(num_samples)
plt.plot(np.arange(num_samples), samples[:, 0], c='black')
plt.figure()
plt.plot(np.arange(num_samples), samples[:, 1], c='red')
plt.show()
예제 #47
0
파일: hmm.py 프로젝트: xldenis/instabike
import pickle
import pylab as pl
import numpy as np
from hmmlearn.hmm import GaussianHMM
from matplotlib.dates import YearLocator, MonthLocator, DateFormatter
import nyc

###############################################################################
# print trained parameters and plot
###############################################################################

new_x = np.asarray(train_set)

n_comps = 6
model = GaussianHMM(n_comps)
model.fit([new_x])
hidden_states = model.predict(new_x)

print("means and vars of each hidden state")
for i in range(n_comps):
    print("%dth hidden state" % i)
    print("mean = ", model.means_[i])
    print("var = ", np.diag(model.covars_[i]))
    print()

years = YearLocator()   # every year
months = MonthLocator()  # every month
yearsFmt = DateFormatter('%Y')
fig = pl.figure()
ax = fig.add_subplot(111)