def bpnn(X_train, Y_train, X_test, Y_test, params): start = time.time() mlp = MLPClassifier(hidden_layer_sizes=params['hidden_layer_sizes'], activation=params['activation'], learning_rate_init=params['learning_rate_init'], solver=params['solver'], max_iter=100000) mlp.fit(X_train, Y_train) Y_pred = mlp.predict(X_test) end = time.time() precision, recall, fscore, train_support = score(Y_test, Y_pred, pos_label='1', average='binary') print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format( round(precision, 3), round(recall, 3), round(fscore, 3), round(acs(Y_test, Y_pred), 3))) print("Execution time: " + str(end - start)) cm = confusion_matrix(Y_test, Y_pred) class_label = ["0", "1"] df_cm = pd.DataFrame(cm, index=class_label, columns=class_label) sns.heatmap(df_cm, annot=True, fmt='d') plt.title("Confusion Matrix") plt.xlabel("Predicted Label") plt.ylabel("True Label") plt.show() sklearn.metrics.plot_roc_curve(mlp, X_test, Y_test) plt.title("ROC Curve") plt.show()
def svm(X_train, Y_train, X_test, Y_test): start = time.time() svclassifier = SVC() svclassifier.fit(X_train, Y_train) Y_pred = svclassifier.predict(X_test) end = time.time() precision, recall, fscore, train_support = score(Y_test, Y_pred, pos_label='1', average='binary') print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format( round(precision, 3), round(recall, 3), round(fscore, 3), round(acs(Y_test, Y_pred), 3))) print("Execution Time: " + str(end - start)) cm = confusion_matrix(Y_test, Y_pred) class_label = ["0", "1"] df_cm = pd.DataFrame(cm, index=class_label, columns=class_label) sns.heatmap(df_cm, annot=True, fmt='d') plt.title("Confusion Matrix") plt.xlabel("Predicted Label") plt.ylabel("True Label") plt.show() sklearn.metrics.plot_roc_curve(svclassifier, X_test, Y_test) plt.title("ROC Curve") plt.show()
def kmeans(X_train, Y_train, X_test, Y_test, params): start = time.time() mlp = KMeans(n_clusters=params['n_clusters'], max_iter=100000) mlp.fit(X_train, Y_train) Y_pred = mlp.predict(X_test) end = time.time() # typecast to string to match X_test format Y_pred = [str(x) for x in Y_pred] precision, recall, fscore, train_support = score(Y_test, Y_pred, pos_label='1', average='binary') print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format( round(precision, 3), round(recall, 3), round(fscore, 3), round(acs(Y_test, Y_pred), 3))) print("Execution Time: " + str(end - start)) cm = confusion_matrix(Y_test, Y_pred) class_label = ["0", "1"] df_cm = pd.DataFrame(cm, index=class_label, columns=class_label) sns.heatmap(df_cm, annot=True, fmt='d') plt.title("Confusion Matrix") plt.xlabel("Predicted Label") plt.ylabel("True Label") plt.show()
def print_metrics(self, predicted_output): """ Print some MVP metrics. sklearn is used for calculation of all the metric values. Confusion matrix values (true positive, false negative, false positive and true negative), precision, recall, f1-score and accuracy is calculated. There are few other metrics which comes under classification report, but meh to them. We need the actual labels and the predicted labels to calculate the metrics. We can get the actual labels from the class variable and the predicted output or predicted labels are passed as a parameter after running each algorithm. :param predicted_output: Predicted labels """ res = cm(self.y_test, predicted_output) tp = res[0][0] fn = res[1][0] fp = res[0][1] tn = res[1][1] print("Accuracy: ", acs(self.y_test, predicted_output)) print("TP: ", tp, ", FN: ", fn, ", FP: ", fp, "TN: ", tn) print(cr(self.y_test, predicted_output))
seq.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='rmsprop') seq.fit(xtrain, ztrain, validation_data=(xtest, ztest), epochs=2000, batch_size=16) # Crosschecking the final model against validation data for checking the overall accuracy # In[ ]: from sklearn.metrics import accuracy_score as acs print(acs(ztest, seq.predict_classes(xtest))) # Predict survivors from test dataset # In[ ]: a = seq.predict_classes(test) # preparing result file that has to be submitted to competition # In[ ]: o = pd.DataFrame(a, columns=['Survived']) o.index = pd.read_csv('../input/test.csv')['PassengerId'] o.index.name = 'PassengerId'
import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import load_iris iris = load_iris() x = iris.data y = iris.target from sklearn.model_selection import train_test_split x_train, y_train, x_test, y_test = train_test_split(x, y, test_size=0.3, random_state=0) from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier(n_estimators=10, criterion='entropy', random_state=0) classifier.fit(x_train, y_train) y_pred = classifier.predict(x_test) from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) from sklearn.metrics import accuracy_score as acs print(acs(y_pred, y_test))
model = grc(RandomForestClassifier(), rfC) model.fit(xtrain, ztrain) # **Summarizing my findings** # In[ ]: print(model.best_params_, model.best_score_) # **Checking the accuracy of my model using sklearn.metrics** # In[ ]: from sklearn.metrics import accuracy_score as acs print(acs(ztest, model.predict(xtest))) # **Preparing result as csv file** # In[ ]: a = pd.DataFrame(model.predict(test[train.columns])) a.index = pd.read_csv('../input/test.csv')['PassengerId'] a.columns = ['Survived'] a.index.name = 'PassengerId' # In[ ]: a.to_csv('result.csv') # In[ ]:
while tree not in range(100): ##是字典说明还未完成分类 comparison = next(iter(tree)) ##取出第一个判断条件 tree = tree[comparison] ##第一个条件对应的字典 attribute_num = int(comparison.split()[0]) ##属性号 attribute_value = float(comparison.split()[1]) ##属性值 if X[index][attribute_num] <= attribute_value: tree = tree['<='] else: tree = tree['>'] res.append(tree) return res if __name__ == '__main__': iris = datasets.load_wine() ##用于手写数字识别只有87% X = iris.data y = iris.target X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=19901120, stratify=y) epsilon = 1e-2 DT = CreateDecisionTree(X_train, y_train, epsilon) print(DT) print(acs(Classifier(DT, X_test), y_test)) clf = dtc(criterion='entropy').fit(X_train, y_train) print(clf.score(X_test, y_test))
def main(): # load data training_data = load_data.read_data("train.csv") testing_data = load_data.read_data("test.csv") testing_labels = load_data.read_data("submission.csv") X_train, X_test = load_data.vectorize_data(training_data, testing_data) X_train = X_train.toarray() X_test = X_test.toarray() Y_train = np.array(training_data)[:, -1] Y_test = np.array(testing_labels)[:, -1] # the training and testing datasets should have the same dimension _, nftrain = X_train.shape _, nftest = X_test.shape assert nftrain == nftest # ask the user to input which discriminant function to use prompt = ''' Type of discriminant functions supported assuming Gaussian pdf: 1 - minimum Euclidean distance classifier 2 - minimum Mahalanobis distance classifier 3 - quadratic classifier ''' print(prompt) str = input('Please input 1, 2, or 3: ') cases = int(str) # ask the user to input prior probability that needs to sum to 1 prop_str = input( "Please input prior probabilities in float numbers, separated by space, and they must add to 1: \n" ) numbers = prop_str.split() P = np.zeros(len(numbers)) Psum = 0 for i in range(len(numbers)): P[i] = float(numbers[i]) Psum += P[i] if Psum != 1: print("Prior probabilities do not add up to 1. Please check!") sys.exit(1) # derive the decision rule from the training set and apply on the test set t0 = time.time() # start time Y_pred = mpp(X_train, Y_train, X_test, cases, P) t1 = time.time() # ending time print(Y_pred) Y_pred = Y_pred.astype("int") Y_pred = Y_pred.astype("str") # calculate accuracy precision, recall, fscore, train_support = score(Y_test, Y_pred, pos_label='1', average='binary') print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format( round(precision, 3), round(recall, 3), round(fscore, 3), round(acs(Y_test, Y_pred), 3))) cm = confusion_matrix(Y_test, Y_pred) class_label = ["0", "1"] df_cm = pd.DataFrame(cm, index=class_label, columns=class_label) sns.heatmap(df_cm, annot=True, fmt='d') plt.title("Confusion Matrix") plt.xlabel("Predicted Label") plt.ylabel("True Label") plt.show() print(f'The learning process takes {t1 - t0} seconds.')
from sklearn.ensemble import RandomForestRegressor reg = RandomForestRegressor(n_estimators=200, random_state=0) reg.fit(X_train, y_train) results.append(ac(y_test, reg.predict(X_test))) else : # this is for classification # Logisitic Classification from sklearn.linear_model import LogisticRegression cla = LogisticRegression() cla.fit(X_train_Scaler, y_train) results.append(acs(y_test, cla.predict(X_test_Scaler))) # K_Nearnest from sklearn.neighbors import KNeighborsClassifier cla = KNeighborsClassifier(n_neighbors=10) cla.fit(X_train_Scaler, y_train) results.append(acs(y_test, cla.predict(X_test_Scaler))) # SVM from sklearn.svm import SVC cla = SVC(kernel = 'linear',random_state=0) cla.fit(X_train_Scaler, y_train)
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier from sklearn.metrics import precision_recall_fscore_support as score from sklearn.metrics import accuracy_score as acs import matplotlib.pyplot as plt import seaborn as sns # In[7]: rf = RandomForestClassifier(n_estimators=150, max_depth=None, n_jobs=-1) rf_model = rf.fit(X_train_vect, y_train) y_pred = rf_model.predict(X_test_vect) precision, recall, fscore, train_support = score(y_test, y_pred, pos_label='male', average='binary') print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format( round(precision, 3), round(recall, 3), round(fscore,3), round(acs(y_test,y_pred), 3))) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) class_label = ["1- yes", "0 - no"] df_cm = pd.DataFrame(cm, index=class_label,columns=class_label) sns.heatmap(df_cm, annot=True, fmt='d') plt.title("Confusion Matrix") plt.xlabel("Predicted Label") plt.ylabel("True Label") plt.show()
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier from sklearn.metrics import precision_recall_fscore_support as score from sklearn.metrics import accuracy_score as acs import matplotlib.pyplot as plt import seaborn as sns rf = RandomForestClassifier(n_estimators=150, max_depth=None, n_jobs=-1) rf_model = rf.fit(X_train_vect, y_train) y_pred = rf_model.predict(X_test_vect) precision, recall, fscore, train_support = score(y_test, y_pred, pos_label='1', average='binary') print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format( round(precision, 3), round(recall, 3), round(fscore, 3), round(acs(y_test, y_pred), 3))) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) class_label = ["ham", "spam"] df_cm = pd.DataFrame(cm, index=class_label, columns=class_label) sns.heatmap(df_cm, annot=True, fmt='d') plt.title("Confusion Matrix") plt.xlabel("Predicted Label") plt.ylabel("True Label") plt.show()
import matplotlib.pyplot as plt plt.plot(PA_score1, label='Passive Aggressive Classifier') plt.title('ROCAUC-Passive Aggressive Classifier-Training') plt.xlabel('Sample Size') plt.ylabel('ROCAUC') plt.legend() plt.show() precision, recall, fscore, train_support = score(y_l1, y_pred_l1, pos_label=1, average='binary') print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format( round(precision, 3), round(recall, 3), round(fscore, 3), round(acs(y_l1, y_pred_l1), 3))) import seaborn as sns # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_l1, y_pred_l1) class_label = [1, 0] df_cm = pd.DataFrame(cm, index=class_label, columns=class_label) sns.heatmap(df_cm, annot=True, fmt='d') plt.title("Confusion Matrix-Passive Aggressive Classifier-Trainig") plt.xlabel("Predicted Label") plt.ylabel("True Label") plt.show() # save with open('PA_model1.pkl', 'wb') as f: pickle.dump(PA_model, f)
models.append(('RF', RandomForestClassifier())) models.append(('MNB', MultinomialNB())) models.append(('LDA', LinearDiscriminantAnalysis())) models.append(('SVM', SVC(gamma='auto', max_iter=3000, probability=True))) # evaluate each model in turn results = [] names = [] print('Testing Data') for name, model in models: model = model.fit(X_train, Y_train) y_pred = model.predict(X_validation) precision, recall, fscore, train_support = score(Y_validation, y_pred, pos_label='Yes', average='binary') accuracy = acs(Y_validation, y_pred) results.append(recall) results.append(fscore) results.append(accuracy) names.append(name) msg = "%s: %f %f %f" % (name, recall.mean(), fscore.mean(), accuracy.mean()) print(msg) print('\n') print('Training Data') for name, model in models: model = model.fit(X_train, Y_train) trainpred = model.predict(X_train) precision, recall, fscore, train_support = score(Y_train,
#--------------------------------- Validation of models # 1. Random Forest Classifier # higher estimator, the better the accuracy so far. 20 vs. 200 class_rnf_val = RandomForestClassifier(n_estimators = 200, max_depth = None, n_jobs = -1) rnf_val_model = class_rnf_val.fit(tfidf_train, y_train) y_val_rnf_pred = rnf_val_model.predict(tfidf_validate) precision, recall, fscore, train_support = score(y_validate, y_val_rnf_pred, pos_label='5', average='binary') print('Precision: {} / Recall: {} / F1-Score: {} / Accuracy: {}'.format( round(precision, 3), round(recall, 3), round(fscore,3), round(acs(y_validate, y_val_rnf_pred), 3))) # Making the Confusion Matrix cm = confusion_matrix(y_validate, y_val_rnf_pred) class_label = ['1', '5'] df_cm = pd.DataFrame(cm, index=class_label,columns=class_label) sns.heatmap(df_cm, annot=True, fmt='d') plt.title('Confusion Matrix') plt.xlabel('Predicted Star') plt.ylabel('Actual Star') plt.show() # 2. Logistic Regression
X = Total_employees.iloc[:, -10:-1] Y = Total_employees.iloc[:, -1] from sklearn.preprocessing import LabelEncoder le = LabelEncoder() X['dept'] = le.fit_transform(X['dept']) X['salary'] = le.fit_transform(X['salary']) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=0) #RandomFscore = [] from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier(n_estimators=71, random_state=0) classifier.fit(X_train, y_train) X_pred = classifier.predict(X_train) y_pred = classifier.predict(X_test) #Testing the model accuracy (Training) from sklearn.metrics import accuracy_score as acs from sklearn.metrics import confusion_matrix as cm score = acs(y_train, X_pred) matrix = cm(y_train, X_pred) #Testing the Model Validity Tscore = acs(y_test, y_pred) Tmatrix = cm(y_test, y_pred)
predictions = [] for row in x_test: label = self.closest(row) predictions.append(label) return predictions def closest(self, row): best_dist = eu(row, self.x_train[0]) best_index = 0 for i in range (1, len(self.x_train)): dist = eu(row, self.x_train[i]) if dist < best_dist: best_dist = dist best_index = i return self.y_train[best_index] from sklearn.datasets import load_iris iris = load_iris() x = iris.data y = iris.target from sklearn.cross_validation import train_test_split x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2) clf = KNN() clf.fit(x_train, y_train) prediction = clf.predict(x_test) from sklearn.metrics import accuracy_score as acs print(acs(prediction, y_test))
""" #Decision Tree Algorithm of flower import pandas as pd data = pd.read_csv('Iris.csv', index_col=0) y = data.iloc[:, [-1]].values x = data.iloc[:, :-1] from sklearn.preprocessing import LabelEncoder X = LabelEncoder() y = X.fit_transform(y.ravel()) from sklearn.model_selection import train_test_split as tts X_train, X_test, y_train, y_test = tts(x, y, test_size=0.3) from sklearn.tree import DecisionTreeClassifier classifier = DecisionTreeClassifier() classifier.fit(X_train, y_train) X_pred = classifier.predict(X_train) #Predicting the Value of y y_pred = classifier.predict(X_test) #Displaying the result print(y_pred) #Testing the accuracy of the model from sklearn.metrics import accuracy_score as acs, confusion_matrix as cm y_pred_cm = cm(y_test, y_pred) print(y_pred_cm) #The accuracy score of the model y_pred_acs = acs(y_test, y_pred) print('The Model accuracy score is ', y_pred_acs)