Ejemplo n.º 1
0
            label='Versicolor')

plt.title('Iris dataset feature')
plt.xlabel('Sepal length [cm]')
plt.ylabel('Petal length [cm]')
plt.legend(loc='upper left')

plt.show()

# Train the Perceptron model
perceptron = Perceptron(eta=0.1, n_iter=10)
perceptron.fit(X, y)

# Plot the number of updates per iteration
plt.plot(range(1, len(perceptron.errors_) + 1), perceptron.errors_, marker='o')

plt.title('Perceptron - Updates per epoch')
plt.xlabel('Epochs')
plt.ylabel('Number of updates')

plt.show()

# Plot decision regions
plot_decision_regions(X, y, classifier=perceptron)

plt.title('Perceptron - Decision regions')
plt.xlabel('Sepal length [cm]')
plt.ylabel('Petal length [cm]')
plt.legend(loc='upper left')

plt.show()
Ejemplo n.º 2
0
# Plot dataset
plt.scatter(X_xor[y_xor == 1, 0],
            X_xor[y_xor == 1, 1],
            c='b',
            marker='x',
            label='1')
plt.scatter(X_xor[y_xor == -1, 0],
            X_xor[y_xor == -1, 1],
            c='r',
            marker='s',
            edgecolor='black',
            label='-1')

plt.legend(loc='best')

plt.tight_layout()
plt.show()

# SVM classifier with Gaussian kernel (RBF) for non-linearly separable data
svm = SVC(kernel='rbf', C=1.0, gamma=0.10, random_state=1)
svm.fit(X_xor, y_xor)

# Plot decision regions
plot_decision_regions(X=X_xor, y=y_xor, classifier=svm)

plt.title('SVM - linearly inseparable')
plt.legend(loc='upper left')

plt.tight_layout()
plt.show()
Ejemplo n.º 3
0
# Load iris data (load all 150 examples)
iris_data = datasets.load_iris()

# Separate train and test samples (train: 70%, test: 30% default) and apply feature standardization
X_train_std, X_test_std, y_train, y_test = init_dataset(dataset=iris_data)

# Random Forest classifier (is an ensemble of Decision Trees) using Gini impurity method for Information Gain
forest = RandomForestClassifier(criterion='gini',
                                n_estimators=25,
                                random_state=1,
                                n_jobs=2)
forest.fit(X_train_std, y_train)

# Put together train and test data
X_std = np.vstack((X_train_std, X_test_std))
y = np.hstack((y_train, y_test))

# Plot decision regions
plot_decision_regions(X=X_std,
                      y=y,
                      classifier=forest,
                      test_idx=range(105, 150))

plt.title('Random Forest - Scikit')
plt.xlabel('Petal length [standardized]')
plt.ylabel('Petal width [standardized]')
plt.legend(loc='upper left')

plt.tight_layout()
plt.show()
Ejemplo n.º 4
0
y = np.where(y == 'Iris-setosa', 0, 1)  # Set Setosa to 0 and Versicolor to 1
X = df.iloc[
    0:100,
    [2, 3]].values  # Extract sepal length (pos 0) and petal length (pos 2)

# Separete data for tests
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1,
                                                    stratify=y)

X_train_01_subset = X_train[(y_train == 0) | (y_train == 1)]
y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)]

# Train model
lrgd = LogisticRegressionGD(eta=0.05, n_iter=1000, random_state_seed=1)
lrgd.fit(X_train_01_subset, y_train_01_subset)

# Plot decision regions
plot_decision_regions(X=X_train_01_subset,
                      y=y_train_01_subset,
                      classifier=lrgd)

plt.title('Logistic Regression (GD) - Decision regions')
plt.xlabel('Petal length [standardized]')
plt.ylabel('Petal width [standardized]')
plt.legend(loc='upper left')

plt.tight_layout()
plt.show()
Ejemplo n.º 5
0
X, y = df_wine.iloc[:, 1:].values, df_wine.iloc[:, 0].values
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0,
                                                    stratify=y)

# Standardize the features
stds = StandardScaler()
X_train_std = stds.fit_transform(X_train)
X_test_std = stds.transform(X_test)

pca = PCA(n_components=2)
lr = LogisticRegression(multi_class='ovr', random_state=1, solver='lbfgs')

# Dimensionality reduction
X_train_pca = pca.fit_transform(X_train_std)
X_test_pca = pca.transform(X_test_std)

# Fit the logistic regression model on the reduced dataset
lr.fit(X_train_pca, y_train)

# Plot decision regions
plot_decision_regions(X_train_pca, y_train, classifier=lr)

plt.xlabel('PC1')
plt.ylabel('PC2')
plt.legend(loc='lower left')

plt.tight_layout()
plt.show()
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Cost')

plt.show()

# Improve the gradient descent through feature scaling with standardization method
# Standardize features
X_std = np.copy(X)
for j in range(X_std.ndim):
    X_std[:, j] = (X[:, j] - X[:, j].mean()) / X[:, j].std()

# Train model
ada_gd = AdalineGD(n_iter=15, eta=0.01).fit(X_std, y)

# Plot decision regions
plot_decision_regions(X_std, y, classifier=ada_gd)

plt.title('ADALINE_GD - Decision regions')
plt.xlabel('Sepal length [standardized]')
plt.ylabel('Petal length [standardized]')
plt.legend(loc='upper left')

plt.show()

# Plot cost per epoch
plt.plot(range(1, len(ada_gd.cost_) + 1), ada_gd.cost_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Cost')
plt.title('ADALINE_GD - Cost per epoch')

plt.show()
Ejemplo n.º 7
0
# Separate train and test samples (train: 70%, test: 30% default) and apply feature standardization
X_train_std, X_test_std, y_train, y_test = init_dataset(dataset=iris_data)

# Decision Tree using Gini impurity method for Information Gain
tree_model = DecisionTreeClassifier(criterion='gini',
                                    max_depth=4,
                                    random_state=1)
tree_model.fit(X_train_std, y_train)

# Put together train and test data
X_std = np.vstack((X_train_std, X_test_std))
y = np.hstack((y_train, y_test))

# Plot decision regions
plot_decision_regions(X=X_std,
                      y=y,
                      classifier=tree_model,
                      test_idx=range(105, 150))

plt.title('Decision Tree - Scikit')
plt.xlabel('Petal length [standardized]')
plt.ylabel('Petal width [standardized]')
plt.legend(loc='upper left')

plt.tight_layout()
plt.show()

# Display tree graph using sklearn
# tree.plot_tree(tree_model)
# plt.show()

# Display tree graph using graphviz
Ejemplo n.º 8
0
stds = StandardScaler()
X_train_std = stds.fit_transform(X_train)
X_test_std = stds.transform(X_test)

lda = LDA(n_components=2)
lr = LogisticRegression(multi_class='ovr', random_state=1, solver='lbfgs')

# Dimensionality reduction
X_train_lda = lda.fit_transform(X_train_std, y_train)
X_test_lda = lda.transform(X_test_std)

# Fit the logistic regression model on the reduced dataset
lr.fit(X_train_lda, y_train)

# Plot decision regions of train data
plot_decision_regions(X_train_lda, y_train, classifier=lr)

plt.xlabel('LD1')
plt.ylabel('LD2')
plt.legend(loc='lower left')

plt.tight_layout()
plt.show()

# Plot decision regions of train data
plot_decision_regions(X_test_lda, y_test, classifier=lr)

plt.xlabel('LD1')
plt.ylabel('LD2')
plt.legend(loc='lower left')