def plot_just_right(X, y): knn_k9 = KNeighborsClassifier(n_neighbors=9) knn_k9.fit(X, y) plt.figure(figsize=(10, 10)) plot_decision_regions(X.values, y.values, clf=knn_k9, legend=2) plt.title("KNN (k=9)") plt.show()
def plot_super_flexible(X, y): knn_k1 = KNeighborsClassifier(n_neighbors=1) knn_k1.fit(X, y) plt.figure(figsize=(10, 10)) plot_decision_regions(X.values, y.values, clf=knn_k1, legend=2) plt.title("KNN (k=1)") plt.show()
def plot_super_conservative(X, y): lr = LogisticRegression() lr.fit(X, y) plt.figure(figsize=(10, 10)) plot_decision_regions(X.values, y.values, clf=lr, legend=2) plt.title("Logistic Regression (LR)") plt.show()
def runPCA(): df_wine = pd.read_csv( 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data', header=None) # 加载葡萄酒数据集 X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values # 把数据与标签拆分开来 X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=0) # 把整个数据集的70%分为训练集,30%为测试集 # 下面3行代码把数据集标准化为单位方差和0均值 sc = StandardScaler() X_train_std = sc.fit_transform(X_train) X_test_std = sc.fit_transform(X_test) pca = PCA(n_components=2) # 保留2个主成分 lr = LogisticRegression() # 创建逻辑回归对象 X_train_pca = pca.fit_transform(X_train_std) # 把原始训练集映射到主成分组成的子空间中 X_test_pca = pca.transform(X_test_std) # 把原始测试集映射到主成分组成的子空间中 lr.fit(X_train_pca, y_train) # 用逻辑回归拟合数据 plot_decision_regions(X_train_pca, y_train, clf=lr, legend=2) print(lr.score(X_test_pca, y_test)) # 0.98 在测试集上的平均正确率为0.98 plt.xlabel('PC1') plt.ylabel('PC2') plt.legend(loc='lower left') plt.show() return
def decision_boundary(df): """Plot decision boundary for Logistic Regression.""" X = df.drop(columns=['status']) Y = df.status # Scaling std_scaler = StandardScaler() std_scaled_df = std_scaler.fit_transform(X) std_scaled_df = pd.DataFrame(std_scaled_df, columns=X.columns) X_train, _, y_train, _ = train_test_split(std_scaled_df, Y, random_state=0) # Projection to 2d from 47d pca = PCA(n_components=2) pca.fit(X_train.fillna(0)) pca_train = pca.transform(X_train.fillna(0)) df_train_pca = pd.DataFrame(data=pca_train, columns=['pca-1', 'pca-2']) model = LogisticRegression(max_iter=1000) model.fit(df_train_pca.fillna(0), y_train) plot_decision_regions(df_train_pca.values, y_train.values, clf=model, res=0.02, zoom_factor=5) st.pyplot()
def SVC(self): from sklearn.svm import SVC from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler self.y_train = self.y_train.flatten() # Training a classifier svm = SVC(gamma='auto') svm.fit(self.X_train, self.y_train) # Plotting decision regions fig, axarr = plt.subplots(2, 2, figsize=(10, 8), sharex=True, sharey=True) values = [-4.0, -1.0, 1.0, 4.0] width = 0.75 for value, ax in zip(values, axarr.flat): plot_decision_regions(self.X_train, self.y_train, clf=svm, filler_feature_values={2: value}, filler_feature_ranges={2: width}, legend=2, ax=ax) ax.set_xlabel('Feature 1') ax.set_ylabel('Feature 2') ax.set_title('Feature 3 = {}'.format(value)) # Adding axes annotations fig.suptitle('SVM on make_blobs') plt.tight_layout() plt.show()
def SVM_Linear_2(file): X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, n_samples=200, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) # 线性核函数的支持向量机去训练样本 C = 0.1 clf = SVC(kernel="linear", C=C) # 在训练之前对数据进行normalization X = StandardScaler().fit_transform(X) clf.fit(X, y) y_pred = clf.predict(X) prec = precision_score(y_true=y, y_pred=y_pred, pos_label=1) rec = recall_score(y_true=y, y_pred=y_pred, pos_label=1) f1 = f1_score(y_true=y, y_pred=y_pred, pos_label=1) precision_score_ = "Precision score is : {:.2f}".format(prec) recall_score_ = "Recall score is : {:.2f}".format(rec) f1_score_ = "f1 score is : {:.2f}".format(f1) confusion_matrix_ = "Confusion matrix is :{}".format( confusion_matrix(y_pred=y_pred, y_true=y)) plot_decision_regions(X, y, clf=clf, colors='orange,navy') plt.title("SVM with linear kernel") plt.savefig(file) plt.close() return precision_score_, recall_score_, f1_score_, confusion_matrix_
def main(): wine_data_frame = pd.read_csv('wine_data_frame.csv') attrib_train, attrib_test, label_train, label_test = split_data_train_test( wine_data_frame) # Create Instance knn = KNeighborsClassifier(n_neighbors=20) # Create Prediction model clf_result = knn.fit(attrib_train, label_train) # prediction label_pred = clf_result.predict(attrib_test) # Calculate Prediction Accuracy accuracy_score = metrics.accuracy_score(label_test, label_pred) print(accuracy_score) # plot trained data plot_decision_regions(attrib_train, label_train, clf=clf_result, res=0.01, legend=2) # plot test data # plot_decision_regions(attrib_test_plot, label_test_plot, clf=clf_result, res=0.01, legend=2) plt.show()
def plot_tree_decision_regions(clf: DecisionTreeClassifier, fontsize=None): if fontsize is None: fontsize = FONTSIZE X, y = datasets.load_iris(return_X_y=True) X = X[:, :2] labels = ['setosa', 'versicolor', 'virginica'] fig, ax = plt.subplots(figsize=(10, 8)) with plt.style.context({'lines.markersize': 10}): plot_decision_regions(X, y, clf, colors='C0,C1,C2', markers='ooo', hide_spines=False, ax=ax) ax.set_xlabel('Sepal length (cm)', fontsize=fontsize) ax.set_ylabel('Sepal width (cm)', fontsize=fontsize) plt.xticks(fontsize=18) plt.yticks(fontsize=18) leg = plt.legend(title='Iris species', fontsize=18) for idx, label in enumerate(labels): leg.get_texts()[idx].set_text(label) plt.setp(leg.get_title(), fontsize=fontsize) plt.show()
def do_pca_and_plot_decision_regions(clf): pca = PCA(n_components=2) pca.fit(X_train) X_t_train = pca.transform(X_train) X_t_test = pca.transform(X_test) clf.fit(X_t_train, y_train) plot_decision_regions(X_t_train, y_train.to_numpy(), clf=clf, legend=2)
def SVM_RBF_Kernel_3(file): X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, n_samples=200, random_state=1, n_clusters_per_class=1) rng = np.random.RandomState(2) X += 2 * rng.uniform(size=X.shape) C = 0.1 # clf = SVC(kernel = "rbf", gamma = 2., C = C) kernel不选,默认就是rbf clf = SVC(gamma=2., C=C) clf.fit(X, y) y_pred = clf.predict(X) prec = precision_score(y_true=y, y_pred=y_pred, pos_label=1) rec = recall_score(y_true=y, y_pred=y_pred, pos_label=1) f1 = f1_score(y_true=y, y_pred=y_pred, pos_label=1) precision_score_ = "Precision score is : {:.2f}".format(prec) recall_score_ = "Recall score is : {:.2f}".format(rec) f1_score_ = "f1 score is : {:.2f}".format(f1) confusion_matrix_ = "Confusion matrix is :{}".format( confusion_matrix(y_pred=y_pred, y_true=y)) plot_decision_regions(X, y, clf=clf, colors='orange,navy') plt.title("SVM with rbf kernel") plt.savefig(file) plt.close()
def visualize_pathways_for_desease(): X, y = DataReader().read_fva_solutions('fva_without.transports.txt') X = PathwayFvaDiffScaler().fit_transform(X, y) vect = DictVectorizer(sparse=False) X = vect.fit_transform(X, y) # X = X[:, None] y = np.array([1 if i == 'bc' else 0 for i in y], dtype=int) # clf = LinearSVC(C=0.01, random_state=43).fit(X, y) if len(X) == 1: X = X + np.reshape(np.random.normal(1, 100, size=len(X)), X.shape) clf = DecisionTreeClassifier(max_depth=2).fit(X, y) plot_decision_regions(X, y, clf=clf, res=0.5, legend=2) plt.xlabel(vect.feature_names_[0]) else: for fn in set(map(lambda x: x[:-4], vect.feature_names_)): try: x = X[:, (vect.feature_names_.index('%s_min' % fn), vect.feature_names_.index('%s_max' % fn))] except: continue # clf = DecisionTreeClassifier(max_depth=1).fit(x, y) clf = LinearSVC(C=1e-4, random_state=43).fit(x, y) # clf = LogisticRegression(C=0.1e-1, random_state=43).fit(x, y) x = x + np.reshape(np.random.normal(1, 100, size=len(x) * 2), x.shape) plot_decision_regions(X=x, y=y, clf=clf, legend=2, res=10) plt.xlabel('%s_min' % fn) plt.ylabel('%s_max' % fn) plt.show()
def draw_plot(clf, test, y_test): plot_decision_regions(X=test.values, y=y_test.values,clf=clf, legend=2) plt.xlabel(test.columns[0], size=14) plt.ylabel(test.columns[1], size=14) Title = classifier_type + ' Decision Region Boundary' plt.title(Title, size=16) plt.show()
def main(): from mlxtend.data import iris_data from mlxtend.plotting import plot_decision_regions import matplotlib.pyplot as plt # Loading Data X, y = iris_data() X = X[:, [0, 3]] # sepal length and petal width # standardize X[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() lr = SoftmaxRegression(eta=0.01, epochs=10, minibatches=1, random_seed=0) lr.fit(X, y) plot_decision_regions(X, y, clf=lr) plt.title('Softmax Regression - Gradient Descent') plt.show() plt.plot(range(len(lr.cost_)), lr.cost_) plt.xlabel('Iterations') plt.ylabel('Cost') plt.show()
def plot_article_result_from_files(python_files, weights_files): for i, (file_path, weight_path) in enumerate(zip(python_files, weights_files)): print('plotting', file_path) ax = plt.subplot(3, 3, i + 1) plt.minorticks_on() if i == 0 or i == 3 or i == 6: plt.ylabel('$x_2$') if i == 6 or i == 7 or i == 8: plt.xlabel('$x_1$') if i < 6: plt.setp(ax.get_xticklabels(), visible=False) if i == 1 or i == 2 or i == 4 or i == 5 or i == 7 or i == 8: plt.setp(ax.get_yticklabels(), visible=False) # import model path, file = os.path.split(file_path) sys.path.append(path) module_name = pathlib.Path(file).stem module = importlib.import_module(module_name) model = module.U3_U() # Load data plain_x, plain_labels = article_2003_09887_data(i) plain_x = normalize_data(plain_x) model(plain_x[:2]) # load weights with open(weight_path, 'br') as w_file: weights = pickle.load(w_file) model.set_weights(weights) plot_decision_regions(X=plain_x, y=np.array(plain_labels, int), clf=model, ax=ax, scatter_kwargs={'alpha': 0.5}) sys.path.remove(path) # Rm the path of the file again
def main(): df = pd.read_csv("delay_time.csv") X = df[['mean', 'stdev']] Y = df['label'].map({'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4, 'f': 5}) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.2, random_state=0 ) # dividing the data to 80% as training data, 20% as testing data # knn knn = KNeighborsClassifier(n_neighbors=5) knn.fit(X, Y) Y_pred = knn.predict(X_test) C = confusion_matrix(Y_test, Y_pred) # Normalization NC = C / C.astype(np.float).sum(axis=1) print(NC) for r in NC: for c in r: print("{}".format(c), end=",") # plot X_combined = np.vstack((X_train, X_test)) y_combined = np.hstack((Y_train, Y_test)) plot_decision_regions(X_combined, y_combined, clf=knn) plt.xlabel('x []') plt.ylabel('y []') plt.xlim(-150, 50) plt.ylim(0, 70) plt.legend(loc='upper left') plt.savefig("knn.png") plt.close('all')
def svm_comparison(datas): X = datas.drop(['hora_ideal'], axis=1).values # Se deFine los datos correspondientes a la etiqueta y = datas["hora_ideal"].values pca = PCA(n_components=2) X_train = pca.fit_transform(X) X_train2, X_test, y_train, y_test = train_test_split(X_train, y, test_size=0.2) clf = SVC(kernel='sigmoid', C=0.5) clf.fit(X_train2, y_train) y_pred = clf.predict(X_test) # Plotting decision region plt.figure(figsize=(8, 5), dpi=300) plot_decision_regions(X_train2, y_train, clf=clf, legend=2) # Adding axes annotations plt.xlabel('Características') plt.ylabel('Objetivo') plt.title( "SVM:Límite de la región de decisión con 'kernel'= sigmoid y 'C' =0.5") plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left') plt.tight_layout() plt.savefig('SVM_Limit.png', dpi=300) matriz = confusion_matrix(y_test, y_pred) print("Matriz de confusión") print(matriz) plt.show()
def plot_knn(data, grid, y_true, y_pred): X_plot = data['X_train'].values y_plot = data['y_train'].values.astype(np.integer) plot_decision_regions(X_plot, y_plot, clf=grid.best_estimator_, legend=2, scatter_kwargs=dict(s=20), markers='+o') cm = confusion_matrix(y_true, y_pred) fig, ax = plot_confusion_matrix(conf_mat=cm, show_absolute=True, show_normed=True, colorbar=True)
def displayData(X, y, grid=False, clf=None): pos = y == 1 neg = y == 0 plt.plot(X[pos, 0], X[pos, 1], 'X', mew=1, ms=10, mec='k') plt.plot(X[neg, 0], X[neg, 1], 'o', mew=1, mfc='y', ms=10, mec='k') plt.grid(grid) if clf: if clf.kernel == 'linear': w = clf.coef_[0] a = -w[0] / w[1] xx = np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 10) yy = a * xx - clf.intercept_[0] / w[1] plt.plot(xx, yy, 'k-') elif clf.kernel == 'rbf': x_min, x_max = X[:, 0].min(), X[:, 0].max() y_min, y_max = X[:, 1].min(), X[:, 1].max() plot_decision_regions(X, y, clf=clf, legend=2) plt.xlim(x_min, x_max) plt.ylim(y_min, y_max) # h = 0.2 # Mesh step size # cm = plt.cm.RdBu # x_min, x_max = X[:, 0].min(), X[:, 0].max() # y_min, y_max = X[:, 1].min(), X[:, 1].max() # xx, yy = np.meshgrid(np.arange(x_min, x_max, h), # np.arange(y_min, y_max, h)) # if hasattr(clf, "decision_function"): # Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) # else: # Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] # Z = Z.reshape(xx.shape) # plt.contourf(xx, yy, Z, cmap=cm, alpha=.8) else: raise TypeError('Must be rbf or linear (for now)')
def KNN(): allData = pd.read_csv('Data_26and30_electrode.csv') allLabel = pd.read_csv('Data_26and30_electrode_label.csv').values.reshape( -1, ) #pca pca = PCA(n_components=2) pca.fit(allData) newSet = pca.fit_transform(allData) ##data split X_train, X_test, y_train, y_test = train_test_split(newSet, allLabel) standardScaler = StandardScaler() standardScaler.fit(X_train) X_train = standardScaler.transform(X_train) X_test = standardScaler.transform(X_test) knn_model = KNeighborsClassifier(n_neighbors=10) knn_model.fit(X_train, y_train) knn_model.score(X_test, y_test) y_predict = knn_model.predict(X_test[:1]) #results visualization plot_decision_regions(newSet, allLabel, clf=knn_model, legend=2) plt.xlabel('X') plt.ylabel('Y') plt.title('Knn model') plt.show()
def svm_graphs(mdls, data, test_n): titles = ('LinearSVC (linear kernel)', 'SVC with RBF kernel', 'SVC with polynomial (degree 3) kernel') X = [] for d in data[0]: X.append(np.asarray([d[0], d[1]])) X = np.asarray(X) x_col = [] y_col = [] for x in X: x_col.append(x[0]) y_col.append(x[1]) Y = np.asarray(data[1]) graph_models = list((clf.fit(X, Y) for clf in mdls)) for clf, ttl in zip(graph_models, titles): plot_decision_regions(X=X, y=Y, clf=clf, legend=2) plt.xlabel(x_col) plt.ylabel(y_col) plt.title(ttl) filename = 'SvmGraphsTest/Data' + str(test_n) if ttl == 'LinearSVC (linear kernel)': filename += 'LinearSvc' elif ttl == 'SVC with RBF kernel': filename += 'SvcRbf' else: filename += 'SvcPoly' filename += '.png' plt.savefig(filename)
def plotter(): global xlist, ylist plt.clf() X = contentx.get() Y = contenty.get() colo = contentc.get() xlist.append([float(X), float(Y)]) ylist.append(int(colo)) contentx.set("") contenty.set("") contentc.set("") npx_list = np.array(xlist) npy_list = np.array(ylist) earth_classifier = Pipeline([('earth', Earth()), ('logistic', LogisticRegression())]) earth_classifier.fit(npx_list, npy_list) plot_decision_regions(npx_list, npy_list, clf=earth_classifier, legend=2) plt.savefig('foo.png') root.photo_n = ImageTk.PhotoImage(Image.open('foo.png')) vlabel.configure(image=root.photo_n) print("Image Updated")
def main(): seed = 0 np.random.seed(seed) X, y = make_classification(n_features=2, n_redundant=0, random_state=seed, n_informative=2, n_clusters_per_class=1) X = X + np.random.uniform(-.5, .5, X.shape[0] * 2).reshape(X.shape) ds = pd.DataFrame(X, columns=["A", "B"]) ds["Response"] = pd.Series(y) DF_train, DF_unseen = train_test_split(ds.copy(), test_size=0.2, stratify=ds["Response"], random_state=seed) #+++++++++++++++++ 5) modelling mlp_param_grid = {'mlpc__hidden_layer_sizes': [(3), (6), (3, 3), (5, 5)], 'mlpc__learning_rate_init': [0.001, 0.01]} mlp_gscv = grid_search_MLP(DF_train, mlp_param_grid, seed) print("Best parameter set: ", mlp_gscv.best_params_) # pd.DataFrame.from_dict(mlp_gscv.cv_results_).to_excel("D:\\PipeLines\\project_directory\\data\\mlp_gscv.xlsx") #+++++++++++++++++ 6) retraining & assessment of generalization ability auprc = assess_generalization_auprc(mlp_gscv.best_estimator_, DF_unseen) print("AUPRC: {:.2f}".format(auprc)) plot_decision_regions(X=ds.iloc[:, :-1].values, y=ds.iloc[:, -1].values, clf=mlp_gscv.best_estimator_, X_highlight=DF_unseen.iloc[:, :-1].values, scatter_highlight_kwargs={'s': 120, 'label': 'Test data', 'alpha': 0.7}) plt.show()
def Highlighting_Test_Data_Points(): from mlxtend.plotting import plot_decision_regions from mlxtend.preprocessing import shuffle_arrays_unison import matplotlib.pyplot as plt from sklearn import datasets from sklearn.svm import SVC # Loading some example data iris = datasets.load_iris() data = pd.read_csv('2clstrain1200.csv', header=None) X, y = data.iloc[:, 0:2].values, data.iloc[:, 2].values X = X.astype(np.integer) y = y.astype(np.integer) X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3) X_train, y_train = X[:700], y[:700] X_test, y_test = X[700:], y[700:] # Training a classifier svm = SVC(C=0.5, kernel='linear') svm.fit(X_train, y_train) # Plotting decision regions plot_decision_regions(X, y, clf=svm, legend=2, X_highlight=X_test) # Adding axes annotations plt.xlabel('') plt.ylabel('') plt.title('SVM on Iris') plt.show()
def svm_classification(): df_test, df_train, df = tf_idf_vect_feature_vector() clf = SVC() clf.fit(df_train['tweets_vec'].tolist(), df_train['tag'].tolist()) # Plot Decision Region using mlxtend's awesome plotting function # print(clf.score()) plot_decision_regions(X=df_train['tweets_vec'].tolist(), y=df_train['tag'].tolist(), clf=clf, legend=2) # Update plot object with X/Y axis labels and Figure Title #plt.xlabel(X.columns[0], size=14) #plt.ylabel(X.columns[1], size=14) plt.title('SVM Decision Region Boundary', size=16) plt.show() #predict = clf.predict(df_test['tweets_vec'].tolist(), df_test.tag.tolist()) predict = clf.predict(df_test['tweets_vec'].tolist(), df_test['tag'].tolist()) print(predict) print('Recall:', recall_score(df_test['tag'].tolist(), predict)) print('Precision:', precision_score(df_test['tag'].tolist(), predict)) f1 = f1_score(df_test['tag'].tolist(), predict, average='micro') print("f1_score is :", f1) return predict
def dt_classifier(max_depth=None): dtc = DecisionTreeClassifier(max_depth=max_depth).fit(X_train, y_train) plot_decision_regions(X_train, y_train, clf=dtc, legend=2, markers='oooo^v') plt.show()
def plot(self): fig = plt.figure() woe_norm = np.array([self.region_woe[w] for w in sorted(self.region_woe.keys())]) woe_norm -= woe_norm.min() woe_norm /= woe_norm.max() colors = ','.join(["#{:02x}".format(int(np.abs(w)*255)) + "0000" for w in woe_norm]) plot_decision_regions(self.x, self.region.flatten(), clf=self, legend=2, colors = colors) return fig
def plot_boundaries(X, y, clf): features = X.columns plot_decision_regions(X=X.values, y=y.values, clf=clf, res=0.02, legend=2) # Adding axes annotations plt.xlabel(features[0]) plt.ylabel(features[1]) plt.title('Predictions of the model') plt.show()
def plotResult(X_std, y, clf): plot_decision_regions(X_std, y, clf=clf) plt.title('Adaline - Stochastic Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.show() pass
def plot(data, labels, clf, title): if not PLOT_DATASETS: return plot_decision_regions(X=data, y=labels, clf=clf, legend=2) plt.xlabel('Maximum Aim Yaw Change (Degrees)', size=14) plt.ylabel('Maximum Aim Pitch Change (Degrees)', size=14) plt.title(title, size=16) plt.show()
def test_pass(): sr.fit(X[:, :2], y) plot_decision_regions(X=X[:, :2], y=y, clf=sr)