def score(self, X, y): """ check the performance of the model; @X: NxD numpyarray, the data whose each row is an example; @y: 1D numpyarray, labels; """ y_pred, _ = self.predict(X) y_pred[y_pred == -1] = 0 accuracy = pm.accuracy(y, y_pred) precision = pm.precision(y, y_pred) recall = pm.recall(y, y_pred) Fmeasure = pm.F_measure(y, y_pred) return (accuracy, precision, recall, Fmeasure)
def oobTest(X, y, oobs, forest): """ use the out of bags to test the randorm forest classifier; @X: NxD numpyarray, the data whose each row is an example; @y: 1D numpyarray, labels; @oobs: list, a 2D list to store the list of oob; @forest: list, store the trees """ # for binary classification if len(np.unique(y)) == 2: accuracy = 0 precision = 0 recall = 0 Fmeasure = 0 for i in range(len(forest)): X_i = X[oobs[i]] y_i = y[oobs[i]] y_pred = predict(X_i, forest) accuracy += pm.accuracy(y_i, y_pred) recall += pm.recall(y_i, y_pred) precision += pm.precision(y_i, y_pred) Fmeasure += pm.F_measure(y_i, y_pred) return (accuracy/len(forest), precision/len(forest), recall/len(forest), Fmeasure/len(forest)) else: accuracy = 0 for i in range(len(forest)): X_i = X[oobs[i]] y_i = y[oobs[i]] y_pred = predict(X_i, forest) accuracy += pm.accuracy(y_i, y_pred) return accuracy/len(forest)
accuracy = 0 precision = 0 recall = 0 F_measure = 0 for train, val in kfold.split(data): X_train = data[train, :-1] y_train = data[train, -1] X_val = data[val, :-1] y_val = data[val, -1] knn = KNearestNeighbors() knn.train(X_train, y_train) y_pred = knn.predict(X_val, k+1, "l2") accuracy += pm.accuracy(y_val, y_pred) precision += pm.precision(y_val, y_pred) recall += pm.recall(y_val, y_pred) F_measure += pm.F_measure(y_val, y_pred) accuracy = accuracy/10 precision = precision/10 recall = recall/10 F_measure = F_measure/10 accuracy_hist[k] = accuracy precision_hist[k] = precision recall_hist[k] = recall F_measure_hist[k] = F_measure ''' import PCA data_2dim = PCA.PCA(data, 2) data_df = pd.DataFrame(data_2dim, index = data[:,-1])