Example #1
0
    def get_hmms(self):

        for gesture_type in self.gesture_types:

            print_status("Get_Hmms",
                         "Fitting for gesture_type: " + gesture_type)

            ### Step 1: fill hmm_examples appropriately ###
            hmm_examples = []
            for gesture in self.gestures[gesture_type]:
                hmm_rep = gesture.get_hmm_rep()
                hmm_examples.append(hmm_rep)

            ### Step 2: fit parameters for the hmm ###
            hmm = GaussianHMM(self.num_hmm_states)
            hmm.fit(hmm_examples)

            ### Step 3: store the hmm in self.hmms ###
            self.hmms[gesture_type] = hmm

            print_inner_status(
                gesture_type,
                "predicted the following sequences: (score: sequence)")
            for example in hmm_examples:
                print "		", hmm.score(example), ": ", hmm.predict(example)
Example #2
0
def use_hmm(img_times, change_vals, fps=10, min_secs_for_train_to_pass=8):
    
    from sklearn.hmm import GaussianHMM    
    X = np.column_stack(change_vals)    
    n_components = 2
    model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)
    model.fit([X.T])
    
    #thresh = 10**-15
    #model.transmat_ = np.array([[1-thresh,thresh],[1-thresh,thresh]])
    hidden_states = model.predict(X.T)
    
    # print trained parameters and plot
    print("Transition matrix")
    print(model.transmat_)
    print()
    
    print("means and vars of each hidden state")
    for i in range(n_components):
        print("%dth hidden state" % i)
        print("mean = ", model.means_[i])
        print("var = ", np.diag(model.covars_[i]))
        print()    
    
    if model.means_[0][0] > model.means_[1][0]: # assume most most frames have no train, switch labels if necessary
        hidden_states = 1 - hidden_states
        
    train_spotted = filter_out_short_motions(hidden_states, min_secs_for_train_to_pass, fps)
    
    plot_timeline(img_times, change_vals, hidden_states, train_spotted)
    
    utils.copy_image_subset(config.experiment_data_frames, config.experiment_output_frames_hmm, np.nonzero(train_spotted)[0])
    
    return train_spotted
Example #3
0
def predictWithHMM(index, window = 252):
    training_X = X[range(index-window,index),:]
    training_y = actual_y[range(index-window,index)]
    testing_X  = X[index,:].reshape(1,training_X.shape[1])
    testing_y  = y[index]

    # PCA DATA
    if perform_pca:
        pca        = PCA(n_components= pca_components)
        pca.fit(training_X)
        training_X = pca.transform(training_X)
        testing_X  = pca.transform(testing_X)


    model = GaussianHMM(n_components, "diag",n_iter=1000)
    model.fit([training_X])

    hidden_states          = model.predict(training_X)
    predicted_hidden_state = model.predict(testing_X)

    # DO PROBALISTIC APPROACH
    # pr = model.predict_proba(testing_X)
    # print pr

    prob = 0
    state_idx  = (hidden_states == predicted_hidden_state)
    median_val = np.mean(training_y[state_idx])

    return int(median_val>0), testing_y, prob
Example #4
0
def train(X, n_components):
    ###############################################################################
    # Run Gaussian HMM
    print ("fitting to HMM and decoding ...")

    # make an HMM instance and execute fit
    model = GaussianHMM(n_components, covariance_type="diag", n_iter=2000)

    model.fit([X])

    # predict the optimal sequence of internal hidden state
    hidden_states = model.predict(X)

    print ("done\n")

    ###############################################################################
    # print trained parameters and plot
    print ("Transition matrix")
    print (model.transmat_)
    print ()

    print ("means and vars of each hidden state")
    for i in range(n_components):
        print ("%dth hidden state" % i)
        print ("mean = ", model.means_[i])
        print ("var = ", np.diag(model.covars_[i]))
        print ()

    return hidden_states, model
Example #5
0
def predictWithHMM(index, window=252):
    training_X = X[range(index - window, index), :]
    training_y = actual_y[range(index - window, index)]
    testing_X = X[index, :].reshape(1, training_X.shape[1])
    testing_y = y[index]

    # PCA DATA
    if perform_pca:
        pca = PCA(n_components=pca_components)
        pca.fit(training_X)
        training_X = pca.transform(training_X)
        testing_X = pca.transform(testing_X)

    model = GaussianHMM(n_components, "diag", n_iter=1000)
    model.fit([training_X])

    hidden_states = model.predict(training_X)
    predicted_hidden_state = model.predict(testing_X)

    # DO PROBALISTIC APPROACH
    # pr = model.predict_proba(testing_X)
    # print pr

    prob = 0
    state_idx = (hidden_states == predicted_hidden_state)
    median_val = np.mean(training_y[state_idx])

    return int(median_val > 0), testing_y, prob
Example #6
0
def create_hmm_by_label(label):
    
    seqs = get_sequences_by_label(label)
    
    n_states = 3
    hmm = GaussianHMM(n_states, covariance_type="diag", n_iter=1000)
    hmm.fit([seqs])
    
    return hmm
Example #7
0
    def run(self, protos):
        models = []
        for nstate, label, seq in protos:
            train  = self._training.run(seq)
            f1, f2 = self._feature.run(train, True)

            o = np.vstack((f1[:,1], f2)).T

            (start, trans) = self.init_left_right_model(nstate)
            clf = GaussianHMM(n_components=nstate, covariance_type=self._covar,
                              transmat=trans, startprob=start)
            clf.fit(np.array([o]))
            models.append({'id':label, 'model':clf})

        self._models = models
        return models
Example #8
0
def create_hmm_by_labels(labels, dbs):
    
    seqs_all= []
    for label in labels:
        seqs = get_sequences_by_label_multi_dbs(label, dbs)
        seqs_all.append(seqs)
    
    seqs_all = np.array(seqs_all)[0]
    
    #print seqs_all
    #print np.shape(seqs_all)

    n_states = 3
    hmm = GaussianHMM(n_states, covariance_type="full", n_iter=1000)
    hmm.fit(seqs_all)
    
    return hmm
Example #9
0
    def run(self, protos):
        models = []
        for nstate, label, seq in protos:
            train = self._training.run(seq)
            f1, f2 = self._feature.run(train, True)

            o = np.vstack((f1[:, 1], f2)).T

            (start, trans) = self.init_left_right_model(nstate)
            clf = GaussianHMM(n_components=nstate,
                              covariance_type=self._covar,
                              transmat=trans,
                              startprob=start)
            clf.fit(np.array([o]))
            models.append({'id': label, 'model': clf})

        self._models = models
        return models
Example #10
0
def HMM(data, sid, means_prior=None):
    # data is _not_ an event-frame, but an array
    # of the most recent trade events

    # Create scikit-learn model using the means
    # from the previous model as a prior
    model = GaussianHMM(HIDDEN_STATES, covariance_type="diag", n_iter=10, means_prior=means_prior, means_weight=0.5)

    # Extract variation and volume
    diff = data.variation[sid].values
    volume = data.volume[sid].values
    X = np.column_stack([diff, volume])

    if len(diff) < HIDDEN_STATES:
        return None

    # Estimate model
    model.fit([X])

    return model
	def get_hmms (self):

		for gesture_type in self.gesture_types:

			print_status ("Get_Hmms", "Fitting for gesture_type: " + gesture_type)
			### Step 1: fill hmm_examples appropriately ###
			hmm_examples = []
			for gesture in self.gestures[gesture_type]:
				hmm_rep = gesture.get_hmm_rep ()
				hmm_examples.append (hmm_rep)

			### Step 2: fit parameters for the hmm ###
			hmm = GaussianHMM (self.num_hmm_states)
			hmm.fit (hmm_examples)

			### Step 3: store the hmm in self.hmms ###
			self.hmms[gesture_type] = hmm

			print_inner_status (gesture_type, "predicted the following sequences: (score: sequence)")
			for example in hmm_examples:
				print "		", hmm.score (example), ": ", hmm.predict (example)
def gaussian_hmm_model(stock_market_quote, n_components=5):
    close_v = np.asarray(stock_market_quote.get_closing_price())
    volume = np.asanyarray(stock_market_quote.get_volume())
    volume = volume[:-1]
    diff = close_v[1:] - close_v[:-1]
    close_v = close_v[1:]
    X = np.column_stack([diff, volume])
    model = GaussianHMM(n_components, covariance_type="diag")
    model.fit([X])
    hidden_states = model.predict(X)
    
    print "Transition matrix"
    print model.transmat_
    print ""
    
    print "means and vars of each hidden state"
    for i in xrange(n_components):
        print "%dth hidden state" % i
        print "mean = ", model.means_[i]
        print "var = ", np.diag(model.covars_[i])
        print ""
    
    '''Visualization of Closing Price with respect to Volume, clustered by
    hidden states of data
    '''
    fig = mlp.figure()
    ax = fig.add_subplot(111)
    for i in xrange(n_components):
        idx = (hidden_states == i)
        ax.plot(volume[idx], close_v[idx], 'o', label="%dth hidden state" % i)
    ax.legend()
    ax.set_xlabel('Volume of Stock', fontsize=20)
    ax.set_ylabel('Closing Price of Stock', fontsize=20)
    ax.set_title("""Quote's Volume and closing volume change 
                    in different hidden states""")
    ax.grid(True)
    mlp.show()
Example #13
0
def hmm(samples):
	model = GaussianHMM(n_components=3)
	samples = samples.dropna()
	idx = samples.index
	if samples.values.ndim < 2:
		#import pdb; pdb.set_trace()
		m = samples.values.shape
		samples = samples.values.reshape(m[0],1)
	
	model.fit([samples])
	#_, states = model.decode(samples, algorithm='map')
	framelogprob = model._compute_log_likelihood(samples)
	logprob, fwdlattice = model._do_forward_pass(framelogprob)
	
	n, _ = model.means_.shape
	frame = pd.DataFrame(
    	framelogprob, index=idx, columns=map(lambda x: "frame_"+str(x), range(n)) )
	forward = pd.DataFrame(
    	fwdlattice, index=idx, columns=map(lambda x: "forward_"+str(x), range(n)) )
	#import pdb; pdb.set_trace()
	predict = pd.DataFrame(
		(fwdlattice-framelogprob)[1:, :], index=idx[:-1], columns=map(lambda x: "predict_"+str(x), range(n)))
	import pdb; pdb.set_trace()
	return model, frame.join(forward)
Example #14
0
def gaussian_hmm_model(stock_market_quote, n_components=5):
    close_v = np.asarray(stock_market_quote.get_closing_price())
    volume = np.asanyarray(stock_market_quote.get_volume())
    volume = volume[:-1]
    diff = close_v[1:] - close_v[:-1]
    close_v = close_v[1:]
    X = np.column_stack([diff, volume])
    model = GaussianHMM(n_components, covariance_type="diag")
    model.fit([X])
    hidden_states = model.predict(X)

    print "Transition matrix"
    print model.transmat_
    print ""

    print "means and vars of each hidden state"
    for i in xrange(n_components):
        print "%dth hidden state" % i
        print "mean = ", model.means_[i]
        print "var = ", np.diag(model.covars_[i])
        print ""
    '''Visualization of Closing Price with respect to Volume, clustered by
    hidden states of data
    '''
    fig = mlp.figure()
    ax = fig.add_subplot(111)
    for i in xrange(n_components):
        idx = (hidden_states == i)
        ax.plot(volume[idx], close_v[idx], 'o', label="%dth hidden state" % i)
    ax.legend()
    ax.set_xlabel('Volume of Stock', fontsize=20)
    ax.set_ylabel('Closing Price of Stock', fontsize=20)
    ax.set_title("""Quote's Volume and closing volume change 
                    in different hidden states""")
    ax.grid(True)
    mlp.show()
Example #15
0
def main():
    """
    First ARG: list of training files
    Second ARG: save name for model
    """
    file1 = sys.argv[1]
    outname = sys.argv[2]
    file_list = [f[0:-1] for f in open(file1, 'r')]
    models, transitions, priors = calc_transmat(file_list)
    hmm = GaussianHMM(
        transitions.shape[0],
        "full",
        #startprob=priors,
        n_iter=500,
        transmat=transitions,
        init_params='mcs',
        params='mcs',
    )
    feats, _ = load_feats_labels(file_list)
    feat, lab = load_feats_labels(file_list)
    #hmm.means_ = np.transpose(models['mean'])
    #hmm.covars_ = models['sigma']
    print 'Fitting'

    start = timeit.default_timer()
    hmm.fit([np.transpose(feat)])
    stop = timeit.default_timer()
    print 'Training Time: ' + str(stop - start)

    features, labels = load_feats_labels(['audio.arff'])
    _, seq = hmm.decode(np.transpose(features))
    #print filter(lambda(x,y): x==y, zip(labels, map(int2label, seq)))
    print len(filter(lambda (x, y): x == y, zip(labels, map(int2label, seq))))
    pickle.dump(hmm, open(outname, "wb"))
    plt.imshow(transitions, interpolation='nearest')
    plt.show()
Example #16
0
def main():
    """
    First ARG: list of training files
    Second ARG: save name for model
    """
    file1 = sys.argv[1]
    outname = sys.argv[2]
    file_list = [f[0:-1] for f in open(file1,'r')]
    models, transitions, priors = calc_transmat(file_list)
    hmm = GaussianHMM(
        transitions.shape[0],
        "full",
        #startprob=priors,
        n_iter=500,
        transmat=transitions,
        init_params='mcs',
        params='mcs',
    )
    feats, _ = load_feats_labels(file_list)
    feat, lab = load_feats_labels(file_list)
    #hmm.means_ = np.transpose(models['mean'])
    #hmm.covars_ = models['sigma']
    print 'Fitting'

    start = timeit.default_timer()
    hmm.fit([np.transpose(feat)])
    stop = timeit.default_timer()
    print 'Training Time: ' + str(stop - start)

    features, labels = load_feats_labels(['audio.arff'])
    _, seq = hmm.decode(np.transpose(features))
    #print filter(lambda(x,y): x==y, zip(labels, map(int2label, seq)))
    print len(filter(lambda(x,y): x==y, zip(labels, map(int2label, seq))))
    pickle.dump(hmm, open(outname, "wb"))
    plt.imshow(transitions, interpolation='nearest')
    plt.show()
Example #17
0
class GaussianHmmLib:
    """
    ref: http://scikit-learn.org/0.14/auto_examples/applications/plot_hmm_stock_analysis.html
    https://www.quantopian.com/posts/inferring-latent-states-using-a-gaussian-hidden-markov-model
    bear market: smaller mean, higher variant
    bull market: higher mean, smaller variant
    """
    def __init__(self, dbhandler, *args, **kwargs):
        self.dbhandler = dbhandler
        self.sids = self.dbhandler.stock.ids
        self.n_components = int(kwargs.pop('n_components')) or 5
        self.n_iter = int(kwargs.pop('n_iter')) or 1000

    def run(self, data):
        sid = self.sids[0]
        self.dates = data[sid]['price'].values
        self.close_v = data[sid]['close_v'].values
        self.volume = data[sid]['volume'].values[1:]

        # take diff of close value
        # this makes len(diff) = len(close_t) - 1
        # therefore, others quantity also need to be shifted
        self.diff = self.close_v[1:] - self.close_v[:-1]

        # pack diff and volume for training
        self.X = np.column_stack([self.diff, self.volume])

        # make an HMM instance and execute fit
        self.model = GaussianHMM(self.n_components,
                                 covariance_type="diag",
                                 n_iter=self.n_iter)
        self.model.fit([self.X], n_iter=self.n_iter)

        # predict the optimal sequence of internal hidden state
        self.hidden_states = self.model.predict(self.X)

    def report(self):
        # print trained parameters and plot
        print "Transition matrix"
        print self.model.transmat_
        print ""

        print "means and vars of each hidden state"
        for i in xrange(self.n_components):
            print "%dth hidden state" % i
            print "mean = ", self.model.means_[i]
            print "var = ", np.diag(self.model.covars_[i])
            print ""

        years = YearLocator()  # every year
        months = MonthLocator()  # every month
        yearsFmt = DateFormatter('%Y')
        fig = plt.figure()
        ax = fig.add_subplot(111)

        for i in xrange(self.n_components):
            # use fancy indexing to plot data in each state
            idx = (self.hidden_states == i)
            ax.plot_date(self.dates[idx],
                         self.close_v[idx],
                         'o',
                         label="%dth hidden state" % i)
        ax.legend()

        # format the ticks
        ax.xaxis.set_major_locator(years)
        ax.xaxis.set_major_formatter(yearsFmt)
        ax.xaxis.set_minor_locator(months)
        ax.autoscale_view()

        # format the coords message box
        ax.fmt_xdata = DateFormatter('%Y-%m-%d')
        ax.fmt_ydata = lambda x: '$%1.2f' % x
        ax.grid(True)

        fig.autofmt_xdate()
        plt.savefig("gaussianhmm_%s.png" % (self.sids[0]))
Example #18
0
# therefore, others quantity also need to be shifted
diff = close_v[1:] - close_v[:-1]
dates = dates[1:]
close_v = close_v[1:]

# pack diff and volume for training
X = np.column_stack([diff, volume])

###############################################################################
# Run Gaussian HMM
print "fitting to HMM and decoding ...",
n_components = 5

# make an HMM instance and execute fit
model = GaussianHMM(n_components, "diag")
model.fit([X], n_iter=1000)

# predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)

print "done\n"

###############################################################################
# print trained parameters and plot
print "Transition matrix"
print model.transmat_
print ""

print "means and vars of each hidden state"
for i in xrange(n_components):
    print "%dth hidden state" % i
adaptor = XMLAdaptorMultiWindow1()
for file_path in glob.glob(source):
    observation_sequence = adaptor.convert(file_path)
    if observation_sequence:
        observation_sequence.save(output_dir + "training_observations/" + os.path.basename(file_path) + '.csv')

print "Loading observations from CSV..."

# Load observation sequences from CSV
observation_sequences, filenames = readObservationSequences(output_dir + "training_observations/*.csv", return_filenames=True)
training_sequences = [ observation_sequence.getNumpyArray() for observation_sequence in observation_sequences ]

print "Training Multivariate Gaussian HMM model..."
n_components = 3
model = GaussianHMM(n_components, covariance_type="full", n_iter=10)
model.fit(training_sequences)

# save Gaussian HMM model to file
model_dir = output_dir + '%sstates/' % n_components
mkdir_p(model_dir)
serialiser = HMMSerialiser(model, feature_names=adaptor.getFeatures())
serialiser.saveXML(model_dir + 'model.xml')
 
print "Tagging observation sequences..."
tagged_sequences_dir = model_dir + "tagged_sequences/"
mkdir_p(tagged_sequences_dir)
for i, training_sequence in enumerate(training_sequences):
    hidden_state_sequence = model.predict(training_sequence)
    for j, state in enumerate(hidden_state_sequence):
        observation_sequences[i].getObservation(j).setState( "H%s" % state )
    # save tagged sequence to file
        if save:
            filename = filenames[i].replace('.csv', '.tagged.csv')
            observation_sequences[i].save(save + os.sep + filename, include_state=True)
    return likelihood_of_training_data, observations_per_state

### PROTOTYPE ROUTINE

print "Loading training data..."
# Load observation sequences from CSV
observation_sequences, filenames = readObservationSequences(training_data, return_filenames=True)
training_sequences = [ observation_sequence.getNumpyArray() for observation_sequence in observation_sequences ]

print "Training multivariate Gaussian HMM (base model)..."
# Implements (1.), (2.)
base_model = GaussianHMM(n_states, covariance_type=covariance_type, n_iter=num_EM_iterations)
base_model.fit(training_sequences)
# save base model
print "\tSaving base model to file..."
saveModel(base_model, 'base_model', observation_sequences[0].getFeatureNames())
# tag training data using base model
print "\tTagging training data using base model..."
# Implements (3.), (4.)
likelihood_of_training_data, observations_per_state = tagTrainingData( base_model, 
                                                                       training_sequences, 
                                                                       list(observation_sequences), # pass a copy 
                                                                       save='base_model/tagged_training_data', 
                                                                       filenames=filenames )
print "\tTotal log lokelihood of the training data according to base model: %.4f" % likelihood_of_training_data

previous_model = base_model
for i in range(num_GMM_models):
Example #21
0
def makeGaussHMM(d):
    for i in range(len(d)):
        d[i] = normalize(d[i])
    new_mod = GaussianHMM(4, n_iter = 10000)
    new_results = new_mod.fit(d)
    return new_results
#n_features = sum(good_features2)
n_features = X_new.shape[1]

print(n_features)
# # clf = svm.SVC()
# # clf.fit(X_new, y)

# hmm = MultinomialHMM()
# pos = np.where(np.diff(y) != 0)[0]
# d = np.hstack([0, pos+1, len(y)])
# lens = np.diff(d)
# hmm.fit(X_new, y, lens)

hmm = GaussianHMM(n_components=20)
hmm.fit([X_new])

clusters = pred = hmm.predict(X_new)

# neigh = KNeighborsClassifier(n_neighbors=10, weights='distance')
# scores = cross_validation.cross_val_score(neigh, X_new, y, cv=5)
# print(scores)
# # neigh.fit(X_new, y)

# good_features = ETC.feature_importances_ >= 0.0005
# print(np.sum(good_features))
# X_new2 = X[..., good_features]

# n_features = 20
# pca = PCA(n_components = n_features)
# pca.fit(X_new)
Example #23
0
        algorithm="viterbi",
        covariance_type="full",
        covars_prior=0.01,
        covars_weight=1,
        means_prior=None,
        means_weight=0,
        n_components=5,
        random_state=None,
        startprob=None,
        startprob_prior=1.0,
        transmat=None,
        transmat_prior=1.0,
    )

    print "Fitting model..."

    model.fit([farm1.get_output()], n_iter=1000)

    print "Predicting hidden states..."
    hidden_states = model.predict(farm1.get_output())

    print "Transition matrix"
    print model.transmat_
    print ""
    print "mean and vars of the hidden states"
    for i in range(5):
        print "%dth hidden state" % i
        print "mean = ", model.means_[i]
        print "var = ", np.diag(model.covars_[i])
        print ""
Example #24
0
class GaussianHmmLib:
    """
    ref: http://scikit-learn.org/0.14/auto_examples/applications/plot_hmm_stock_analysis.html
    https://www.quantopian.com/posts/inferring-latent-states-using-a-gaussian-hidden-markov-model
    bear market: smaller mean, higher variant
    bull market: higher mean, smaller variant
    """

    def __init__(self, dbhandler, *args, **kwargs):
        self.dbhandler = dbhandler
        self.sids = self.dbhandler.stock.ids
        self.n_components = int(kwargs.pop('n_components')) or 5
        self.n_iter = int(kwargs.pop('n_iter')) or 1000

    def run(self, data):
        sid = self.sids[0]
        self.dates = data[sid]['price'].values
        self.close_v = data[sid]['close_v'].values
        self.volume = data[sid]['volume'].values[1:]

        # take diff of close value
        # this makes len(diff) = len(close_t) - 1
        # therefore, others quantity also need to be shifted
        self.diff = self.close_v[1:] - self.close_v[:-1]

        # pack diff and volume for training
        self.X = np.column_stack([self.diff, self.volume])

        # make an HMM instance and execute fit
        self.model = GaussianHMM(self.n_components, covariance_type="diag", n_iter=self.n_iter)
        self.model.fit([self.X], n_iter=self.n_iter)

        # predict the optimal sequence of internal hidden state
        self.hidden_states = self.model.predict(self.X)

    def report(self):
        # print trained parameters and plot
        print "Transition matrix"
        print self.model.transmat_
        print ""

        print "means and vars of each hidden state"
        for i in xrange(self.n_components):
            print "%dth hidden state" % i
            print "mean = ", self.model.means_[i]
            print "var = ", np.diag(self.model.covars_[i])
            print ""

        years = YearLocator()   # every year
        months = MonthLocator()  # every month
        yearsFmt = DateFormatter('%Y')
        fig = plt.figure()
        ax = fig.add_subplot(111)

        for i in xrange(self.n_components):
            # use fancy indexing to plot data in each state
            idx = (self.hidden_states == i)
            ax.plot_date(self.dates[idx], self.close_v[idx], 'o', label="%dth hidden state" % i)
        ax.legend()

        # format the ticks
        ax.xaxis.set_major_locator(years)
        ax.xaxis.set_major_formatter(yearsFmt)
        ax.xaxis.set_minor_locator(months)
        ax.autoscale_view()

        # format the coords message box
        ax.fmt_xdata = DateFormatter('%Y-%m-%d')
        ax.fmt_ydata = lambda x: '$%1.2f' % x
        ax.grid(True)

        fig.autofmt_xdate()
        plt.savefig("gaussianhmm_%s.png" %(self.sids[0]))
Example #25
0
    ax.set_xlabel("No. of Clusters")
    ax.set_ylabel("Information Loss")
    ax.set_xticks(range(start,end+1),minor=True)
    ax.legend()
    ax.grid(True,which='both')
    plt.show()
##############################################################################
# Run HMM
X_hmm = np.column_stack((y_train,X_train[['hour_of_day','weather','day_of_week']]))
#X_hmm = np.column_stack((y_train,X_train[['hour_of_day','weather']]))
#X_hmm = y_train
from sklearn.hmm import GaussianHMM
n_clusters = 9
#n_clusters = 17
model = GaussianHMM(n_clusters,covariance_type='diag',n_iter=1000)
model.fit([X_hmm])
hidden_states = model.predict(X_hmm)
viterbi_states = model.decode(X_hmm)
x_ax = np.asarray(range(len(X_hmm)))
x_ax = X_train['hour_of_day'] + X_train['day_of_week']*24
#x_ax = X_train['hour_of_day']
x_ax = np.asarray([item.to_datetime() for item in X_train.index])
def plot_HMM(n_clusters,hidden_states,x_ax,y_ax):
    #PLOT HIDDEN STATES
    fig = plt.figure()
    ax = fig.add_subplot(111)
    for i in xrange(n_clusters):
        print i
        idx = (hidden_states==i)
        if i<7:
            ax.plot(x_ax[idx],y_ax[idx],'o',label='%dth state'%i)
Example #26
0
close_v1 = close_v1[1:]
diff2 = close_v2[1:] - close_v2[:-1]
close_v2 = close_v2[1:]

# pack diff and volume for training
X1 = np.column_stack([diff1, volume1])
X2 = np.column_stack([diff2, volume2])
###############################################################################
# Run Gaussian HMM
print("fitting to HMM and decoding ...", end='')
n_components = 5
# make an HMM instance and execute fit
model1 = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)
model2 = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)

model1.fit([X1])
model2.fit([X2])

# predict the optimal sequence of internal hidden state
hidden_states1 = model1.predict(X1)
hidden_states2 = model2.predict(X2)

print("done\n")

# calculate similarity measure
states1 = range(n_components)
states2 = list(itertools.permutations(states1))
print(states1)
print(len(states2))
sims = []
for i in range(len(states2)):
Example #27
0

data = t1dmread('trimmedDataFiles/MYFILE101.no_gaps_trimmed.csv')
timeStamps101 = np.array(data['timestamp'])
skinTemp101 = np.array(data['skin temp'])
airTemp101 = np.array(data['air temp'])
steps101 = np.array(data['steps'])
hr101 = np.array(data['hr'])
cgm101 = np.array(data['cgm'])
normskintemp101 = skinTemp101 - airTemp101

toFit = np.column_stack([cgm101,normskintemp101])
print("Fitting to HMM")
n_components = 4
model = GaussianHMM(n_components,covariance_type='diag',n_iter=1000)
model.fit([toFit])

hidden_states = model.predict(toFit)

print("done\n")

print("Transition Matrix for Normed Skin Temperature")
print(model.transmat_)
print("\nMeans and variances of each hidden state: \n")

for i in range(n_components):
    print("%dth hidden state:" % i)
    print("Mean = ",model.means_[i])
    print("Variance = ",np.diag(model.covars_[i]))
    print()
		if count < args.maf:
			trimmed_count = 0
		counts.append(trimmed_count)
		kmer_stash.append(kmer)
	i += 1

if not len(counts):
	sys.exit("No k-mer counts remain after filtering; check thresholds and try again.")

## fit HMM to counts
if len(args.mu) != len(args.sigmasq):
	sys.exit("Vectors of prior means and variances must be same length.")

counts = np.reshape(np.log1p(np.array(counts, dtype = "int")), (-1,1))
hmm = GaussianHMM( len(args.mu) )
hmm.fit([counts])

if args.verbose:
	sys.stderr.write("Fitting HMM to k-mer counts, assuming {} hidden states...\n".format(len(args.mu)))
	sys.stderr.write("means:\n" + str(hmm.means_) + "\n")
	sys.stderr.write("covariances:\n" + str(hmm.covars_) + "\n")
	sys.stderr.write("\n")
	sys.stderr.write("Processing possible variant sites...\n")
	sys.stderr.write("\trejecting haplotypes with read count < {}\n".format(args.maf))
	sys.stderr.write("\taccepting as TE/ME any haplotype with max count > {}\n".format(args.maxhits))

## find positions of transitions
states =  hmm.predict(counts)
breaks = np.where(np.diff(states))[0]
break_coords = []
break_kmers = []
Example #29
0
def test_1():
    vm = VonMisesHMM(n_states=5)
    gm = GaussianHMM(n_components=5)
    X1 = np.random.randn(100, 2)
    yield lambda: vm.fit([X1])
    yield lambda: gm.fit([X1])
        print "Doing replicate", repInx, "/", numReps, "with", numState, "states"
        sys.stdout.flush()
        
        # cluster all the available data and use that as initial point
        means = cluster.KMeans(n_clusters=numState).fit(indata.iloc[:,0:num_data]).cluster_centers_
        cv = np.cov(indata.iloc[:,0:num_data].T)
        covars = mixture.distribute_covar_matrix_to_match_covariance_type(cv, "tied", num_data)
        covars[covars==0] = 1e-5
        
        model = GaussianHMM(numState, covariance_type="tied", n_iter=1000, init_params='abdefghijklnopqrstuvwxyzABDEFGHIJKLNOPQRSTUVWXYZ')
        model.means_ = means
        model.covars_ = covars
        
        print("Fitting model...")
        sys.stdout.flush()
        model.fit(data)

        print("Decoding states...")
        sys.stdout.flush()
        # do a loop over everything and record in one long array
        states = np.array([])
        score = 0
        for i in range(0, len(data)):
            hidden_states = model.decode(data[i])
            states = np.append(states, hidden_states[1])
            score = score + model.score(data[i])

        print("Saving data...")
        sys.stdout.flush()

        # save the states and LLH
Example #31
0
nclasses = len(np.unique(classlabels))
hmmclass = []
#print classlabels
print quantized_set.shape
for i in range(0, nclasses):
    newtrainset = []
    for k in range(0, len(classlabels)):
        if classlabels[k] == i:
            #print i
            #print k
            newtrainset.append(quantized_set[:, k])

    newtrainset = np.asarray(newtrainset)
    #print newtrainset.shape
    hmm = HMM(64)
    hmm.fit([newtrainset])
    hmmclass.append(hmm)

#print testingset.shape
rowdivision = datasample.shape[0]
t = []
for i in xrange(int(round(testingset.shape[0] / rowdivision))):
    t.append(
        quantize_data(testingset[rowdivision * i:rowdivision * (i + 1), :],
                      kmms))
#print t.shape
t = np.asarray(t)
rlabels = []
for ts in t:
    i = 0
    index = 0
            t, last_index = overlapped_samples(file_path, incident_reported_time=int(incident_time), overlap=5, window=10, with_end=2)
            if t is None:
                print file_path, 'is bad'
            else:
                model.means_ = means
                model.covars_ = covs
                print 'shape intial', np.shape(covs)
                '''
                best_seq = model.decode(t)
                print 'intial,', best_seq
                print 'final means', model.means_
                print 'initial trans', tmat
                print 'initial startprobs', smat, sum(smat)
                '''
                model.fit([t])
                best_seq = model.decode(t)
                print 'file', file_path
                print 'final,', best_seq
                #print 'final means', model.means_
                #print 'final trans', model.transmat_
                #print 'final startprob', model.startprob_

                if np.isnan(model.means_).any() == False and np.isnan(model.covars_).any() == False:
                    means = model.means_
                    covs = np.array([np.diag(model.covars_[0])])
                    for i in range(1, model.n_components):
                        covs = np.vstack((covs, [np.diag(model.covars_[i])]))
                    print 'shape after', np.shape(covs)
                    tmat = model.transmat_
# therefore, others quantity also need to be shifted
diff = close_v[1:] - close_v[:-1]
dates = dates[1:]
close_v = close_v[1:]

# pack diff and volume for training
X = np.column_stack([diff, volume])

###############################################################################
# Run Gaussian HMM
print "fitting to HMM and decoding ...",
n_components = 5

# make an HMM instance and execute fit
model = GaussianHMM(n_components, "diag")
model.fit([X], n_iter=1000)

# predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)

print "done\n"

###############################################################################
# print trained parameters and plot
print "Transition matrix"
print model.transmat_
print ""

print "means and vars of each hidden state"
for i in xrange(n_components):
    print "%dth hidden state" % i
Example #34
0
class HMM(object):
    '''
    class for creating and manipulating HMM model
    '''
    def __init__(self,**kwargs):
        if 'steam_obj' not in kwargs:
            self.steam_obj = Steam()
        else:
            self.steam_obj = kwargs['steam_obj']
        if 'weather_obj' not in kwargs:
            self.weather_obj = Weather()
        else:
            self.weather_obj = kwargs['weather_obj']
        steam_obj = self.steam_obj
        weather_obj = self.weather_obj
        hour_of_day = steam_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0))
        day_of_week = steam_obj.ts.index.map(lambda x: x.dayofweek)
        df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \
                'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=steam_obj.ts.index)
        #its imp that the order for columns is maintain 
        #while slicing the HMM model 
        self.df_hmm,self.X_hmm = self.gen_meta_data(steam_obj,weather_obj) 
        if 'n_states' not in kwargs:
            self.plot_elbow(3,15)
        else:
            self.n_states = kwargs['n_states']

    def __len__(self):
        return len(self.X_hmm)

    def build_model(self):
        n_states = self.n_states
        X_hmm = self.X_hmm
        self.model = GaussianHMM(n_states,covariance_type='diag',n_iter=1000)
        self.model.fit([X_hmm])
        self.hidden_states = self.model.predict(X_hmm)

    def build_forecast_model(self):
        model = self.model
        n_states = self.n_states
        model_forecast = copy.deepcopy(model)
        model_forecast.n_features = model.n_features-1
        model_forecast._means_ = model.means_[:,1:]
        model_forecast._covars_ = model._covars_[:,1:]
        self.model_forecast = model_forecast

    def gen_meta_data(self,steam_obj=None,weather_obj=None):
        if steam_obj!=None:
            hour_of_day = steam_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0))
            day_of_week = steam_obj.ts.index.map(lambda x: x.dayofweek)           
            df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \
                        'hour_of_day':hour_of_day},index=steam_obj.ts.index)
            #df_hmm = pd.DataFrame({'steam':steam_obj.ts,'weather':weather_obj.ts, \
            #            'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=steam_obj.ts.index)
           # X_hmm = df_hmm.as_matrix(columns=['steam','weather'])
            X_hmm = df_hmm.as_matrix(columns=['steam','weather','hour_of_day'])
            #X_hmm = df_hmm.as_matrix(columns=['steam','weather','hour_of_day','day_of_week'])
        else:
            hour_of_day = weather_obj.ts.index.map(lambda x: x.hour + (x.minute/60.0))
            day_of_week = weather_obj.ts.index.map(lambda x: x.dayofweek)           
            df_hmm = pd.DataFrame({'weather':weather_obj.ts, \
                    'hour_of_day':hour_of_day},index=weather_obj.ts.index)
            #df_hmm = pd.DataFrame({'weather':weather_obj.ts, \
            #        'hour_of_day':hour_of_day,'day_of_week':day_of_week},index=weather_obj.ts.index)
           # X_hmm = df_hmm.as_matrix(columns=['weather'])
            X_hmm = df_hmm.as_matrix(columns=['weather','hour_of_day'])
            #X_hmm = df_hmm.as_matrix(columns=['weather','hour_of_day','day_of_week'])
        return df_hmm,X_hmm

    def plot_model(self,x_ax=None,y_ax=None):
        X_hmm = self.X_hmm
        steam_ts = self.steam_obj.ts
        if x_ax == None:
            x_ax = np.asarray([item.to_datetime() for item in steam_ts.index])
        if y_ax == None:
            y_ax = X_hmm[:,0]
        hidden_states = self.hidden_states
        n_states = self.n_states
        fig = plt.figure()
        ax = fig.add_subplot(111)
        for i in xrange(n_states):
            print i
            idx = (hidden_states==i)
            if i<7:
                ax.plot(x_ax[idx],y_ax[idx],'o',label='%dth state'%i)
            elif i<14:
                ax.plot(x_ax[idx],y_ax[idx],'x',label='%dth state'%i)
            elif i<21:
                ax.plot(x_ax[idx],y_ax[idx],'+',label='%dth state'%i)
            elif i<28:
                ax.plot(x_ax[idx],y_ax[idx],'*',label='%dth state'%i)
        ax.set_title('%d State HMM'%(n_states))
        ax.legend()
        ax.set_ylabel('Load (Mlb/Hr)')
        ax.set_xlabel('Time')
        ax.grid(True)
        plt.show()


    def plot_elbow(self,start,end):
        '''
        Fit GMM and plot elbow using AIC & BIC
        '''
        from sklearn.mixture import GMM,DPGMM
        obs = self.X_hmm
        aics = []
        bics = []
        for i in range(start,end+1):
            n_iter=1000
            for j in range(1,11):
                g = GMM(n_components=i,n_iter=n_iter)
                g.fit(obs)
                print i
                converged =  g.converged_
                if converged:
                    print 'j:%d'%(j)
                    break
                n_iter += 1000
            aics.append(g.aic(obs))
            bics.append(g.bic(obs))
        if not converged:
            print 'Not Converged!!'
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(range(start,end+1),aics,label='AIC')
        ax.plot(range(start,end+1),bics,label='BIC')
        ax.set_xlabel("No. of Clusters")
        ax.set_ylabel("Information Loss")
        ax.set_xticks(range(start,end+1),minor=True)
        ax.legend()
        ax.grid(True,which='both')
        plt.show()
Example #35
0
		#===[ Local Data ]===
		self.motion_sequences 	= _motion_sequences
		self.dataframes 		= [r.get_dataframe () for r in self.motion_sequences]
		self.feature_extractor 	= FeatureExtractor ()
		self.train ()


	# Function: fit
	# -------------
	# fits a generative model (HMM) to the data, returns it
	def fit_hmm (self):

		X = [self.feature_extractor.extract(x) for x in self.dataframes]
		print [x.shape for x in X]
		hmm = GaussianHMM ()
		hmm.fit (X)
		return hmm


	# Function: score_df
	# ------------------
	# scores a dataframe
	def score_df (self, df):

		return self.hmm.score (self.feature_extractor.extract (df))

	# Function: score
	# ---------------
	# returns a score of the current example, rep. as motion sequence
	def score (self, ms):
                initial_map, initial_best_sep_map = model.decode(feat_from_list, algorithm='map')
                sum_initial_score += model.score(feat_from_list)
                sum_inital_ll += initial_ll
                sum_initial_map += initial_map
            else:
                remove_idx.append(idx)
                print 'too few samples in file', list_of_patient_file_paths[idx], np.shape(feat_from_list)
        print 'initial viterbi log-likelihood,', sum_inital_ll
        print 'initial score log-likelihood,', sum_initial_score
        print 'initial map log-likelihood', sum_initial_map
        remove_idx.sort()
        remove_idx.reverse()
        print 'removing...', remove_idx
        for r in remove_idx:
            del feats_as_list[r]
        model.fit(feats_as_list)
        sum_final_ll = 0.0
        sum_final_score = 0.0
        for feat_from_list in feats_as_list:
            print np.shape(feat_from_list)
            final_ll, final_best_seq = model.decode(feat_from_list)
            final_score = model.score(feat_from_list)
            sum_final_ll += final_ll
            sum_final_score += final_score
        print 'final viterbi log-likelihood,', sum_final_ll
        print 'final score log-likelihood,', sum_final_score

        #save all the files that have been generated by training

        mean_name = root + save_model_to + condition + '/' + condition + '-cond-' + feature + '-feat-' + str(n_states) + '-states-' + str(
            model.n_iter) + '-iter-mean.txt'
Example #37
0
File: HMM.py Project: ranulfo0s/HMM
c = 0.0014999999999999458
e = 0.001
d = 0.050

##EX transitions_prob = np.mat([row0 = [a,c,d,c,d], row1 = [ e,a,b,e,e], row2 = [c,d,a,c,d] , row3 = [d,c,c,a,d] , row4  [d,c,d,c ,a]])

transitions_prob = np.mat([[a, c, d, c, d], [e, a, b, e, e], [c, d, a, c, d], [d, c, c, a, d], [d, c, d, c, a]])


HMM = GaussianHMM(n_components=5, covariance_type="diag", transmat=transitions_prob)


#
# Must always fit the obs data before change means and covars
#
HMM.fit([Resul])

HMM.means_ = np.identity(5)

HMM.covars_ = 0.2 * np.ones((5, 5))

# Use of LR probability to predict the states.
HResul = HMM.predict(Resul)

# Get the probability of success HMM
Hscore = comp(HResul, target)

# print HResul

print "HMM = "
print Hscore
Example #38
0
def train_hmm(X):
    hmm = GaussianHMM(n_components=8)
    hmm.fit(X);
    print hmm.score(X[0])
    print np.shape(X[0])
    return hmm
Example #39
0
def test_1():
    vm = VonMisesHMM(n_states=5)
    gm = GaussianHMM(n_components=5)
    X1 = np.random.randn(100,2)
    yield lambda: vm.fit([X1])
    yield lambda: gm.fit([X1])
Example #40
0
diff = close_v[1:] - close_v[:-1]
dates = dates[1:]
close_v = close_v[1:]

# pack diff and volume for training
X = np.column_stack([diff, volume])

###############################################################################
# Run Gaussian HMM
print "fitting to HMM and decoding ...",
n_components = 2

# make an HMM instance and execute fit
model = GaussianHMM(n_components, covariance_type="diag", n_iter=1000)

model.fit([X])

# predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)

print "done\n"

###############################################################################
# print trained parameters and plot
print "Transition matrix"
print model.transmat_
print ""

print "means and vars of each hidden state"
for i in xrange(n_components):
    print "%dth hidden state" % i
Example #41
0
    model = GaussianHMM(algorithm='viterbi',
                        covariance_type='full',
                        covars_prior=0.01,
                        covars_weight=1,
                        means_prior=None,
                        means_weight=0,
                        n_components=5,
                        random_state=None,
                        startprob=None,
                        startprob_prior=1.0,
                        transmat=None,
                        transmat_prior=1.0)

    print "Fitting model..."

    model.fit([farm1.get_output()], n_iter=1000)

    print "Predicting hidden states..."
    hidden_states = model.predict(farm1.get_output())

    print "Transition matrix"
    print model.transmat_
    print ""
    print "mean and vars of the hidden states"
    for i in range(5):
        print "%dth hidden state" % i
        print "mean = ", model.means_[i]
        print "var = ", np.diag(model.covars_[i])
        print ""
		if row[1] != 'close':
			#list = []
			#for i in range(len(row)-2):
			#	list.append(float(row[i+1]))
			label = float(row[7])
			volume.append(float(row[2]))
			if label > 0:
				indices.append(1)
			else:
				indices.append(0)
			#matrix.append(list)

X = numpy.column_stack([numpy.array(indices), numpy.array(volume)])
model = GaussianHMM(2, covariance_type="diag", n_iter=1000)

model.fit([X])


"""
reading the dato to be classified
"""
with open('hackathon-master/AAPL-test.csv', 'rb') as csvfile:
	data = csv.reader(csvfile, delimiter=',')
	#matrix = []
	volume = []
	labels = []
	for row in data:
		if row[1] != 'close':
			list = []
			volume.append(float(row[2]))
			#for i in range(len(row)-2):
Example #43
0
        counts.append(trimmed_count)
        kmer_stash.append(kmer)
    i += 1

if not len(counts):
    sys.exit(
        "No k-mer counts remain after filtering; check thresholds and try again."
    )

## fit HMM to counts
if len(args.mu) != len(args.sigmasq):
    sys.exit("Vectors of prior means and variances must be same length.")

counts = np.reshape(np.log1p(np.array(counts, dtype="int")), (-1, 1))
hmm = GaussianHMM(len(args.mu))
hmm.fit([counts])

if args.verbose:
    sys.stderr.write(
        "Fitting HMM to k-mer counts, assuming {} hidden states...\n".format(
            len(args.mu)))
    sys.stderr.write("means:\n" + str(hmm.means_) + "\n")
    sys.stderr.write("covariances:\n" + str(hmm.covars_) + "\n")
    sys.stderr.write("\n")
    sys.stderr.write("Processing possible variant sites...\n")
    sys.stderr.write("\trejecting haplotypes with read count < {}\n".format(
        args.maf))
    sys.stderr.write(
        "\taccepting as TE/ME any haplotype with max count > {}\n".format(
            args.maxhits))