Example #1
0
import matplotlib.pyplot as plt

# This project provides the same capabilities as the LogisticRegression.py program
# but uses a radial basis function kernel for the learning algorithm rather than
# a sigma function. This allows it to group data into sets with non-linear
# boundaries.

iris = datasets.load_iris()
X = iris.data[:, [2,3]]
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))

g = float(input("Input gamma: "))

svm = SVC(C=1.0, kernel='rbf',gamma=g, random_state = 0, probability=True)
svm.fit(X_train_std, y_train)

PlotFigures.plot_decision_regions(X_combined_std, y_combined, classifier=svm, test_idx=range(105, 150))
plt.show()

for X_test_val in X_test_std:
    print(svm.predict_proba(X_test_val.reshape(1, -1)))
Example #2
0
import matplotlib.pyplot as plt

from plots import PlotFigures

# This program is a simulation of a data set with a non-linear separation region.
# The data is gathered via numpy's randn command, which is not truly random, so the data
# will be the same through each simulation. Data is segregated by quadrant, X,Y can take
# four different quadrants. As the data is collected via the randn function, which returns
# values with mean 0 and standard deviation 1, all points lie inside the range -3, 3, which
# is purely coincidental since this is not a true random function and is seeded with the
# same number each time (0). Scikit-learn then makes use of the SVC function, which
# organizes this data into two distinct groups. Finally, matplotlib.pyplot plots the 
# colored output data.

np.random.seed(0)
X_xor = np.random.randn(200, 2)
y_xor = np.logical_xor(X_xor[:, 0] > 0, X_xor[:, 1] > 0)
y_xor = np.where(y_xor, 1, -1)
g = float(input("Input gamma: "))
# The gamma value here is effectively a cutoff parameter for a normal distribution sphere
# which is used to vary the decision boundary between groups.
# See the help page for more details: help(sklearn.svm.SVC)
svm = SVC(kernel='rbf', C=10.0, gamma=g, random_state = 0)
svm.fit(X_xor, y_xor)

PlotFigures.plot_decision_regions(X_xor, y_xor, classifier=svm, test_idx=range(105, 150))
plt.show()

#for X_test_val in X_test_std:
#    print(lr.predict_proba(X_test_val.reshape(1, -1)))
Example #3
0
        test_size=0.3, random_state=0)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
ppn.fit(X_train_std, y_train)

y_pred = ppn.predict(X_test_std)
print('Misclassified samples: %d' %(y_test != y_pred).sum())

X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
PlotFigures.plot_decision_regions(X=X_combined_std, \
        y=y_combined, classifier=ppn, test_idx=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.show()

weights, params = [], []
for c in np.arange(-5, 5):
    lr = LogisticRegression(C=10**c, random_state=0)
    lr.fit(X_train_std, y_train)
    weights.append(lr.coef_[1])
    params.append(10**c)

weights = np.array(weights)
plt.plot(params, weights[:, 0], label='petal length')
plt.plot(params, weights[:, 1], linestyle='--', label='petal width')