def seqHMM(): clf = MultinomialHMM() clf.fit(input_data.iloc[:, :-1], input_data.iloc[:, -1], lengths) pred = clf.predict(input_data.iloc[:, :-1]) actual = input_data.iloc[:, -1] accuracy = sum(pred == actual) / float(len(actual)) print accuracy
def test_hmm(): n_features = X.shape[1] clf = MultinomialHMM() clf.fit(X, y, lengths) assert_array_equal(clf.classes_, ["Adj", "DT", "IN", "N", "V"]) assert_array_equal(clf.predict(X), y) clf.set_params(decode="bestfirst") assert_array_equal(clf.predict(X), y) n_classes = len(clf.classes_) assert_array_almost_equal(np.ones(n_features), np.exp(clf.coef_).sum(axis=0)) assert_array_almost_equal(np.ones(n_classes), np.exp(clf.intercept_trans_).sum(axis=0)) assert_array_almost_equal(1., np.exp(clf.intercept_final_).sum()) assert_array_almost_equal(1., np.exp(clf.intercept_init_).sum())
def hmm_pred(a, X_train, X_test, y_train, y_test): # скрытая марковская модель hmm = MultinomialHMM(alpha=a) hmm.fit(X_train, y_train, lengths=np.array([1 for i in y_train])) y_pred = hmm.predict(X_test) accuracy = accuracy_score(y_test, y_pred) precision = precision_score(y_test, y_pred, average='weighted') recall = recall_score(y_test, y_pred, average='weighted') F1 = f1_score(y_test, y_pred, average='weighted') return [accuracy, precision, recall, F1]
def train_and_test_markov(decode, alpha, X_train, y_train, sequence_length_train, X_test, y_test, sequence_length_test, *args, **kwargs): clf = MultinomialHMM(decode=decode, alpha=alpha) #print("Training {}".format(clf)) start = time.time() clf.fit(X_train, y_train, sequence_length_train) mid = time.time() y_pred = clf.predict(X_test, sequence_length_test) stop = time.time() accuracy = 100 * accuracy_score(y_pred, y_test) fit_time = 1000 * (mid - start) pred_time = 1000 * (stop - mid) #print("Fit time: {:.3f}ms, Predict time: {:.3f}ms, Accuracy: {:.2f}".format(fit_time, pred_time, accuracy)) return (fit_time, pred_time, accuracy)
def _hmm(self, ind: Individual, train: Dataset, dev: Dataset): train_lengths = [len(s) for s in train.sentences] xtrain, ytrain = train.by_word() xdev, _ = dev.by_word() dev_lengths = [len(s) for s in dev.sentences] try: hmm = MultinomialHMM(decode=ind.choose('viterbi', 'bestfirst'), alpha=ind.nextfloat()) hmm.fit(xtrain, ytrain, train_lengths) return hmm.predict(xdev, dev_lengths) except ValueError as e: if 'non-negative integers' in str(e): raise InvalidPipeline(str(e)) elif 'unknown categories' in str(e): raise InvalidPipeline(str(e)) else: raise
# Saves the image into a TXT file for line in line_mapping: for word in line: if word["matrix"].shape[1] == 0: print "Zero matrix... Skipping..." else: f_handle = file('test.txt', 'a') np.savetxt(f_handle, word['matrix'], delimiter=" ", fmt="%i", newline=" ", header='', footer="" + word["word"] + "\n\n", comments='') f_handle.close() # # Extracts features from the datasets X_train, y_train, lengths_train = load_conll("test.txt", features) # # Models it as an HMM clf = MultinomialHMM() clf.fit(X_train, y_train, lengths_train) print X_train, y_train # Validation after training X_test, y_test, lengths_test = load_conll("test.txt", features) y_pred = clf.predict(X_test, lengths_test) print y_pred # # Final score # print(bio_f_score(y_test, y_pred))
model.fit(training_data, training_labels, training_data_length) #----- testing ------- test_data = [] test_labels = [] test_data_length = [] files = getDataFileNames("test") for trainingFile in files: dataFile = pd.read_csv(DATA_FOLDER + trainingFile, header=0) data = [dataFile['accX'], dataFile['accY'], dataFile['accZ']] #data = [dataFile['alpha'], dataFile['beta'], dataFile['gamma'], dataFile['accX'], dataFile['accY'], dataFile['accZ']] length = len(dataFile['accX']) test_data_length.append([length, length, length]) test_data.append(data) if "updown" in trainingFile: test_labels.append("updown") elif "leftright" in trainingFile: test_labels.append("leftright") elif "rotateclock" in trainingFile: test_labels.append("rotateclockwise") print("label size:", len(test_data)) print("data size:", len(test_labels)) for index, t in enumerate(test_data): print("HMM prediction for " + str(test_labels[index]) + " = " + str(model.predict(t)))
y_ground_truth_flatten = [ item for sublist in y_ground_truth_seqlearn for item in sublist ] # change type to array seqlearn_X_train = np.array(X_train_flatten) seqlearn_y_ground_truth = np.array(y_ground_truth_flatten) # HMM seqlearn MultimodalHMM model_seqlearn = MultinomialHMM() # training model_seqlearn.fit(seqlearn_X_train, seqlearn_y_ground_truth, len_train) # state prediction y_pred_seqlearn = model_seqlearn.predict(X_test_flatten) # print output time remarks outputSteps(y_pred_seqlearn) #state prediction for random sequence y_pred_seqlearn_random = model_seqlearn.predict(X_random) # state prediction for heuristic sequence y_pred_seqlearn_random = model_seqlearn.predict(X_heuristic) # target names for evaluation target_names = list(step_set) target_names = sorted(target_names) # confusion matrix
X_train = (((X_tr[:, None] & (1 << np.arange(8)))) > 0).astype( int) # vector-> binary matrix Y_train = np.array(Y_train) # X_test = np.array(X_test).reshape(-1,1) X_te = np.array(X_test) X_test = (((X_te[:, None] & (1 << np.arange(8)))) > 0).astype(int) Y_test = np.array(Y_test) return [X_train, X_test, Y_train, Y_test] data = load_dataset() kf = SequenceKFold(seq_lengths(data[1]), 2) for tuple in kf: train_len = tuple[1] test_len = tuple[3] split = dataset_split(tuple[0], tuple[2]) #train the model clf = MultinomialHMM() clf.fit(split[0], split[2], train_len) #evaluate the model Y_pred = clf.predict(split[1], test_len) print('Accuracy:') print(clf.score(split[1], split[3], test_len)) print('Confusion matrix:') labels = list(data[2].values()) print(confusion_matrix(split[3], Y_pred, labels)) print('Report:') target_names = list(data[2].keys()) print(classification_report(split[3], Y_pred, target_names=target_names))
Y2 = mat1['Y'] mat_test = scipy.io.loadmat('test_subject1_psd04.mat') test_X = mat_test['X'] true_label = np.loadtxt('test_subject1_true_label.csv', delimiter=",") X = mat['X'] Y = mat['Y'] new_X = np.concatenate((X, X1, X2), axis=0) new_Y = np.concatenate((Y, Y1, Y2), axis=0) clf = MultinomialHMM() clf.fit(new_X, new_Y, len(new_X)) clf.set_params(decode="bestfirst") ans = clf.predict(test_X) print 'sub-1, custom', accuracy_score(ans, true_label) print confusion_matrix(true_label, ans) #1440/3504: subject 1 accuracy #start subject-2 sub2_1 = scipy.io.loadmat('train_subject2_psd01.mat') sub2_X1 = sub2_1['X'] sub2_Y1 = sub2_1['Y'] sub2_2 = scipy.io.loadmat('train_subject2_psd02.mat') sub2_X2 = sub2_2['X'] sub2_Y2 = sub2_2['Y'] sub2_3 = scipy.io.loadmat('train_subject2_psd03.mat') sub2_X3 = sub2_3['X']
from hmmlearn.hmm import GaussianHMM input_data = pd.read_csv('../data/scaled_data/scaled_pca.csv') lengths = [len(input_data)] d1 = pd.read_csv('../data/train_subject1_psd01.csv',header=None) d2 = pd.read_csv('../data/train_subject1_psd02.csv',header=None) d3 = pd.read_csv('../data/train_subject1_psd03.csv',header=None) #input_data = pd.concat([d1, d2, d3], axis=0) lengths = [len(d1), len(d2), len(d3)] clf = MultinomialHMM() clf.fit(input_data.iloc[:,:-1], input_data.iloc[:,-1], lengths) pred = clf.predict(input_data.iloc[:,:-1]) actual = d3.iloc[:,-1] accuracy = sum(pred == actual)/float(len(actual)) print accuracy # Random Forest from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import cross_val_score from sklearn import svm import pandas as pd import numpy as np import pickle
padded_X[i][0:len(j)] = j import pdb; pdb.set_trace() print("TOTAL NUMBER OF SAMPLES: ", len(padded_X)) #separate into train and validate train_X = padded_X[:10000] train_Y = Y[:10000] val_X = padded_X[10000:] val_Y = Y[10000:] #fit the model to training data # print evaluate(model.predict(val_X), val_Y, val_X) # tuning on the dev set for the optimal number of hidden states best_accuracy = 0 best_number = 1 for n in range(50): model.fit(train_X, train_Y, n) accuracy = evaluate(model.predict(val_X), val_Y, val_X) print("n = ", n) print(accuracy) if accuracy > best_accuracy: best_accuracy = accuracy best_number = n # try to figure out how the HMM is learning. #evaluation on best number of hidden states model.fit(train_X, train_Y, best_number) #best accuracy: 0.48 print "Accuray: " , evaluate(model.predict(val_X), val_Y, val_X) print evaluate_with_output(model.predict(val_X), val_Y, val_X)