def weight_graph(regularization='l1'): weights, params = [], [] for c in np.arange(-4, 6): lr = LogisticRegression(penalty=regularization, C=10**c, random_state=0) lr.fit(X_train_std, y_train) weights.append(lr.coef_[1]) params.append(10**c) weights = np.array(weights) for column, color in zip(range(weights.shape[1]), colors): plt.plot(params, weights[:, column], label=columnsXY[column + 1], color=color) plt.axhline(0, color='black', linestyle='--', linewidth=3) plt.xlim([10**(-5), 10**5]) plt.ylabel('weight coefficient') plt.xlabel('C') plt.xscale('log') title = 'regularization {}'.format(regularization) plt.title(title) plt.legend(loc='upper left') ax.legend(loc='upper center', bbox_to_anchor=(1.38, 1.03), ncol=1, fancybox=True) ocr_utils.show_figures(plt, title + ' path')
def weight_graph(regularization = 'l1'): weights, params = [], [] for c in np.arange(0, 6): lr = LogisticRegression(penalty=regularization, C=10**c, random_state=0) lr.fit(X_train_std, y_train) weights.append(lr.coef_[1]) params.append(10**c) weights = np.array(weights) for column, color in zip(range(weights.shape[1]), colors): plt.plot(params, weights[:, column], label=columnsXY[column+1], color=color) plt.axhline(0, color='black', linestyle='--', linewidth=3) plt.xlim([10**(-5), 10**5]) plt.ylabel('weight coefficient') plt.xlabel('C') plt.xscale('log') title = 'regularization {}'.format(regularization) plt.title(title) plt.legend(loc='upper left') ax.legend(loc='upper center', bbox_to_anchor=(1.38, 1.03), ncol=1, fancybox=True) ocr_utils.show_figures(plt,title + ' path')
roc_auc = auc(x=fpr, y=tpr) plt.plot(fpr, tpr, color=clr, linestyle=ls, label='%s (auc = %0.2f)' % (label, roc_auc)) plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], linestyle='--', color='gray', linewidth=2) plt.xlim([-0.1, 1.1]) plt.ylim([-0.1, 1.1]) plt.grid() title = 'Majority Vote Receiver Operation Curve' plt.title(title) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') ocr_utils.show_figures(plt, title) sc = StandardScaler() X_train_std = sc.fit_transform(X_train) X_train_std = sc.fit_transform(X_train) from itertools import product all_clf = [pipe1, clf2, pipe3, mv_clf] x_min = X_train_std[:, 0].min() - 1 x_max = X_train_std[:, 0].max() + 1 y_min = X_train_std[:, 1].min() - 1 y_max = X_train_std[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
tot = sum(eigen_vals) var_exp = [(i / tot) for i in sorted(eigen_vals, reverse=True)] cum_var_exp = np.cumsum(var_exp) var_exp = var_exp[:20] cum_var_exp = cum_var_exp[:2*n_components] title='explained variance' plt.bar(range(1, len(var_exp)+1), var_exp, alpha=0.5, align='center', label='individual explained variance') plt.step(range(1, len(cum_var_exp)+1), cum_var_exp, where='mid', label='cumulative explained variance') plt.ylabel('Explained variance ratio') plt.xlabel('Principal components') plt.legend(loc='best') plt.tight_layout() plt.title(title) ocr_utils.show_figures(plt,title) # Make a list of (eigenvalue, eigenvector) tuples eigen_pairs = [(np.abs(eigen_vals[i]), eigen_vecs[:,i]) for i in range(len(eigen_vals))] # Sort the (eigenvalue, eigenvector) tuples from high to low eigen_pairs.sort(reverse=True) # The eigenpairs with the highest explained variance w = np.hstack((eigen_pairs[0][1][:, np.newaxis], eigen_pairs[1][1][:, np.newaxis])) print('Matrix W:\n', w[:2*n_components,:]) X_train_pca = X_train_image.dot(w) print ('projection of first dataset sample on first 2 eignvectors {}'.format(X_train_image[0].dot(w)))
return np.where(self.activation(X) >= 0.0, 1, -1) title = 'Gradient Descent Learning rate 0.01' fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4)) ada1 = AdalineGD(n_iter=10, eta=0.01).fit(X, y) ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o') ax[0].set_xlabel('Epochs') ax[0].set_ylabel('log(Sum-squared-error)') ax[0].set_title('Adaline - Learning rate 0.01') ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(X, y) ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o') ax[1].set_xlabel('Epochs') ax[1].set_ylabel('Sum-squared-error') ax[1].set_title('Adaline - Learning rate 0.0001') ocr_utils.show_figures(plt, title) # # plt.plot(range(1,len(ada1.cost_)+1), np.log10(ada1.cost_), marker='o',label = title) # plt.title(title) # ocr_utils.show_figures(plt, title) # # ada2 = AdalineGD(n_iter=15, eta=0.0001).fit(X, y) # title = 'Gradient Descent Learning rate 0.0001' # plt.plot(range(1,len(ada2.cost_)+1), np.log10(ada2.cost_) ,marker='x',label = title) # plt.title(title) # ocr_utils.show_figures(plt, title) # standardize features X_std = np.copy(X)
title = 'Gradient Descent Learning rate 0.01' fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4)) ada1 = AdalineGD(n_iter=10, eta=0.01).fit(X, y) ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o') ax[0].set_xlabel('Epochs') ax[0].set_ylabel('log(Sum-squared-error)') ax[0].set_title('Adaline - Learning rate 0.01') ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(X, y) ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o') ax[1].set_xlabel('Epochs') ax[1].set_ylabel('Sum-squared-error') ax[1].set_title('Adaline - Learning rate 0.0001') ocr_utils.show_figures(plt, title) # # plt.plot(range(1,len(ada1.cost_)+1), np.log10(ada1.cost_), marker='o',label = title) # plt.title(title) # ocr_utils.show_figures(plt, title) # # ada2 = AdalineGD(n_iter=15, eta=0.0001).fit(X, y) # title = 'Gradient Descent Learning rate 0.0001' # plt.plot(range(1,len(ada2.cost_)+1), np.log10(ada2.cost_) ,marker='x',label = title) # plt.title(title) # ocr_utils.show_figures(plt, title) # standardize features X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()