import matplotlib.pyplot as plt # This project provides the same capabilities as the LogisticRegression.py program # but uses a radial basis function kernel for the learning algorithm rather than # a sigma function. This allows it to group data into sets with non-linear # boundaries. iris = datasets.load_iris() X = iris.data[:, [2,3]] y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) g = float(input("Input gamma: ")) svm = SVC(C=1.0, kernel='rbf',gamma=g, random_state = 0, probability=True) svm.fit(X_train_std, y_train) PlotFigures.plot_decision_regions(X_combined_std, y_combined, classifier=svm, test_idx=range(105, 150)) plt.show() for X_test_val in X_test_std: print(svm.predict_proba(X_test_val.reshape(1, -1)))
import matplotlib.pyplot as plt from plots import PlotFigures # This program is a simulation of a data set with a non-linear separation region. # The data is gathered via numpy's randn command, which is not truly random, so the data # will be the same through each simulation. Data is segregated by quadrant, X,Y can take # four different quadrants. As the data is collected via the randn function, which returns # values with mean 0 and standard deviation 1, all points lie inside the range -3, 3, which # is purely coincidental since this is not a true random function and is seeded with the # same number each time (0). Scikit-learn then makes use of the SVC function, which # organizes this data into two distinct groups. Finally, matplotlib.pyplot plots the # colored output data. np.random.seed(0) X_xor = np.random.randn(200, 2) y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0) y_xor = np.where(y_xor, 1, -1) g = float(input("Input gamma: ")) # The gamma value here is effectively a cutoff parameter for a normal distribution sphere # which is used to vary the decision boundary between groups. # See the help page for more details: help(sklearn.svm.SVC) svm = SVC(kernel='rbf', C=10.0, gamma=g, random_state = 0) svm.fit(X_xor, y_xor) PlotFigures.plot_decision_regions(X_xor, y_xor, classifier=svm, test_idx=range(105, 150)) plt.show() #for X_test_val in X_test_std: # print(lr.predict_proba(X_test_val.reshape(1, -1)))
test_size=0.3, random_state=0) sc = StandardScaler() sc.fit(X_train) X_train_std = sc.transform(X_train) X_test_std = sc.transform(X_test) ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0) ppn.fit(X_train_std, y_train) y_pred = ppn.predict(X_test_std) print('Misclassified samples: %d' %(y_test != y_pred).sum()) X_combined_std = np.vstack((X_train_std, X_test_std)) y_combined = np.hstack((y_train, y_test)) PlotFigures.plot_decision_regions(X=X_combined_std, \ y=y_combined, classifier=ppn, test_idx=range(105, 150)) plt.xlabel('petal length [standardized]') plt.ylabel('petal width [standardized]') plt.legend(loc='upper left') plt.show() weights, params = [], [] for c in np.arange(-5, 5): lr = LogisticRegression(C=10**c, random_state=0) lr.fit(X_train_std, y_train) weights.append(lr.coef_[1]) params.append(10**c) weights = np.array(weights) plt.plot(params, weights[:, 0], label='petal length') plt.plot(params, weights[:, 1], linestyle='--', label='petal width')