def main(): X, y = loadDataSet() kf = KFold(n_splits=10, shuffle=True) kf.get_n_splits(X) sorted_indices = np.loadtxt('final_sorted_indices.txt', dtype=int) rLIMIT = 201; f1 = [0 for r in range(0, rLIMIT)] for train_index, test_index in kf.split(X): # print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] for r in range(1, rLIMIT): selected_feature_indices = sorted_indices[:r] X_train_selected_features = X_train[:, selected_feature_indices] X_test_selected_features = X_test[:, selected_feature_indices] clf = GaussianNB() y_test_pred = clf.fit(X_train_selected_features, y_train).predict(X_test_selected_features) f1[r] += fscore(y_test, y_test_pred) for r in range(1,rLIMIT): f1[r] = f1[r]/10 print(f1[r])
def evaluate(self, data): valid_y, valid_true = [], [] for x_true, y_true in data: y_pred = self.model.predict(x_true).argmax(axis=1) y_true = np.argmax(y_true, axis=1) valid_y.extend(y_pred) valid_true.extend(y_true) # print(len(valid_y)) valid_y, valid_true = np.array(valid_y), np.array(valid_true) right = (valid_true == valid_y).sum() acc = right / len(valid_y) res = fscore(valid_true, valid_y, average='macro') return acc, res[0], res[1], res[2]
def get_metrics(x, y, num_labels): """ Get F1 Score and accuracy for a predicted and target values. :param x: np.array :param y: np.array :param num_labels: number of unique labels in dataset :returns (total_f1_score: float, total_accuracy: float) """ total_f1_score = 0 total_accuracy = 0 for inp, out in zip(x, y): f1 = fscore(inp, list(out), labels=np.arange(num_labels), average='weighted') total_f1_score += f1 total_accuracy += get_accuracy(inp, out) return total_f1_score/len(x), total_accuracy/len(x)
def main(): X, y = loadDataSet() kf = KFold(n_splits=10, shuffle=True) kf.get_n_splits(X) for train_index, test_index in kf.split(X): # print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] y_train_index0 = np.where(y_train == 0)[0] y_train_index1 = np.where(y_train == 1)[0] mean0 = np.mean(X_train[y_train_index0], axis=0) var0 = np.var(X_train[y_train_index0], axis=0) mean1 = np.mean(X_train[y_train_index1], axis=0) var1 = np.var(X_train[y_train_index1], axis=0) lgd = computeLGD(mean0, mean1, var0, var1) sorted_indices = np.argsort(lgd) with open('sorted_indices.txt', 'a+') as file: for i in sorted_indices: file.write(str(i) + ' ') file.write('\n') for r in range(1, 201): selected_feature_indices = sorted_indices[:r] X_train_selected_features = X_train[:, selected_feature_indices] X_test_selected_features = X_test[:, selected_feature_indices] clf = GaussianNB() y_test_pred = clf.fit(X_train_selected_features, y_train).predict(X_test_selected_features) f1 = fscore(y_test, y_test_pred) with open('f1score.txt', 'a+') as file: file.write(str(f1) + '\n')
def crossValidate(X, y, nfold): kf = KFold(n_splits=nfold, shuffle=True) kf.get_n_splits(X) sorted_indices = np.loadtxt('final_sorted_indices.txt', dtype=int) r = 16 print("K-fold: K=", nfold) f1 = 0 acc = 0 prec = 0 rec = 0 spec = 0 for train_index, test_index in kf.split(X): # print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] selected_feature_indices = sorted_indices[:r] X_train_selected_features = X_train[:, selected_feature_indices] X_test_selected_features = X_test[:, selected_feature_indices] clf = GaussianNB() y_pred = clf.fit(X_train_selected_features, y_train).predict(X_test_selected_features) f1 += fscore(y_test, y_pred) acc += accuracy(y_test, y_pred) prec += precision(y_test, y_pred) rec += recall(y_test, y_pred) spec += specificity(y_test, y_pred) print('fscore', f1 / nfold) print('accuracy', acc / nfold) print('precision', prec / nfold) print('recall', rec / nfold) print('specificity', spec / nfold)
def find_fs(model): predictions = model.predict([test_sent, test_x], batch_size=BATCH_SIZE)[0] predictions = predictions.argmax(axis=1) fs = fscore(test_y.argmax(axis=1), predictions) return fs
def score(preds, targs): with warnings.catch_warnings(): warnings.simplefilter("ignore") p, r, f1, _ = fscore(targs, preds, pos_label=None, average='weighted') return p, r, f1
def f_score(true, pred): score = fscore(true, pred, average='weighted') return score