def solution(filename): # Считывание данных data = pd.read_csv(filename + '.csv') # data['class'] = pd.factorize(data['class'])[0] y = data['class'].values x = data.drop(['class'], axis=1).values # Разбиение данных на тестовую и тренировочную выборки # x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15) # Алгоритм бустинга if filename == 'chips': learning_rate = 0.15 else: learning_rate = 1.15 model = AdaBoostClassifier(learning_rate=learning_rate, n_estimators=150) model.fit(x, y) # Построение графика для каждого шага алгоритма plt.figure() plt.scatter(x[:, 0], x[:, 1], c=colors(y), cmap=plt.cm.Paired) ax = plt.gca() xlim = ax.get_xlim() ylim = ax.get_ylim() xx = np.linspace(xlim[0], xlim[1], 30) yy = np.linspace(ylim[0], ylim[1], 30) YY, XX = np.meshgrid(yy, xx) xy = np.vstack([XX.ravel(), YY.ravel()]).T y_pred = model.staged_predict(x) decision_funcs = model.staged_decision_function(xy) i = 1 for _, func in zip(y_pred, decision_funcs): plt.clf() plt.scatter(x[:, 0], x[:, 1], c=colors(y), cmap=plt.cm.Paired) f = func.reshape(XX.shape) ax = plt.gca() ax.contour(XX, YY, f, colors='k', levels=[-1, 0, 1], alpha=0.5, linestyles=['--', '-', '--']) plt.legend([f'Step number {i}']) plt.savefig(filename + '/' + str(i) + '.png') i += 1
def test_sparse_classification(): # Check classification with sparse input. class CustomSVC(SVC): """SVC variant that records the nature of the training set.""" def fit(self, X, y, sample_weight=None): """Modification on fit caries data type for later verification.""" super(CustomSVC, self).fit(X, y, sample_weight=sample_weight) self.data_type_ = type(X) return self X, y = datasets.make_multilabel_classification(n_classes=1, n_samples=15, n_features=5, random_state=42) # Flatten y to a 1d array y = np.ravel(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) for sparse_format in [csc_matrix, csr_matrix, lil_matrix, coo_matrix, dok_matrix]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) # Trained on sparse format sparse_classifier = AdaBoostClassifier( base_estimator=CustomSVC(probability=True), random_state=1, algorithm="SAMME" ).fit(X_train_sparse, y_train) # Trained on dense format dense_classifier = AdaBoostClassifier( base_estimator=CustomSVC(probability=True), random_state=1, algorithm="SAMME" ).fit(X_train, y_train) # predict sparse_results = sparse_classifier.predict(X_test_sparse) dense_results = dense_classifier.predict(X_test) assert_array_equal(sparse_results, dense_results) # decision_function sparse_results = sparse_classifier.decision_function(X_test_sparse) dense_results = dense_classifier.decision_function(X_test) assert_array_equal(sparse_results, dense_results) # predict_log_proba sparse_results = sparse_classifier.predict_log_proba(X_test_sparse) dense_results = dense_classifier.predict_log_proba(X_test) assert_array_equal(sparse_results, dense_results) # predict_proba sparse_results = sparse_classifier.predict_proba(X_test_sparse) dense_results = dense_classifier.predict_proba(X_test) assert_array_equal(sparse_results, dense_results) # score sparse_results = sparse_classifier.score(X_test_sparse, y_test) dense_results = dense_classifier.score(X_test, y_test) assert_array_equal(sparse_results, dense_results) # staged_decision_function sparse_results = sparse_classifier.staged_decision_function( X_test_sparse) dense_results = dense_classifier.staged_decision_function(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_predict sparse_results = sparse_classifier.staged_predict(X_test_sparse) dense_results = dense_classifier.staged_predict(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_predict_proba sparse_results = sparse_classifier.staged_predict_proba(X_test_sparse) dense_results = dense_classifier.staged_predict_proba(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_score sparse_results = sparse_classifier.staged_score(X_test_sparse, y_test) dense_results = dense_classifier.staged_score(X_test, y_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # Verify sparsity of data is maintained during training types = [i.data_type_ for i in sparse_classifier.estimators_] assert all([(t == csc_matrix or t == csr_matrix) for t in types])
def test_sparse_classification(): # Check classification with sparse input. class CustomSVC(SVC): """SVC variant that records the nature of the training set.""" def fit(self, X, y, sample_weight=None): """Modification on fit caries data type for later verification.""" super(CustomSVC, self).fit(X, y, sample_weight=sample_weight) self.data_type_ = type(X) return self X, y = datasets.make_multilabel_classification(n_classes=1, n_samples=15, n_features=5, random_state=42) # Flatten y to a 1d array y = np.ravel(y) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) for sparse_format in [ csc_matrix, csr_matrix, lil_matrix, coo_matrix, dok_matrix ]: X_train_sparse = sparse_format(X_train) X_test_sparse = sparse_format(X_test) # Trained on sparse format sparse_classifier = AdaBoostClassifier( base_estimator=CustomSVC(probability=True), random_state=1, algorithm="SAMME").fit(X_train_sparse, y_train) # Trained on dense format dense_classifier = AdaBoostClassifier( base_estimator=CustomSVC(probability=True), random_state=1, algorithm="SAMME").fit(X_train, y_train) # predict sparse_results = sparse_classifier.predict(X_test_sparse) dense_results = dense_classifier.predict(X_test) assert_array_equal(sparse_results, dense_results) # decision_function sparse_results = sparse_classifier.decision_function(X_test_sparse) dense_results = dense_classifier.decision_function(X_test) assert_array_equal(sparse_results, dense_results) # predict_log_proba sparse_results = sparse_classifier.predict_log_proba(X_test_sparse) dense_results = dense_classifier.predict_log_proba(X_test) assert_array_equal(sparse_results, dense_results) # predict_proba sparse_results = sparse_classifier.predict_proba(X_test_sparse) dense_results = dense_classifier.predict_proba(X_test) assert_array_equal(sparse_results, dense_results) # score sparse_results = sparse_classifier.score(X_test_sparse, y_test) dense_results = dense_classifier.score(X_test, y_test) assert_array_equal(sparse_results, dense_results) # staged_decision_function sparse_results = sparse_classifier.staged_decision_function( X_test_sparse) dense_results = dense_classifier.staged_decision_function(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_predict sparse_results = sparse_classifier.staged_predict(X_test_sparse) dense_results = dense_classifier.staged_predict(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_predict_proba sparse_results = sparse_classifier.staged_predict_proba(X_test_sparse) dense_results = dense_classifier.staged_predict_proba(X_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # staged_score sparse_results = sparse_classifier.staged_score(X_test_sparse, y_test) dense_results = dense_classifier.staged_score(X_test, y_test) for sprase_res, dense_res in zip(sparse_results, dense_results): assert_array_equal(sprase_res, dense_res) # Verify sparsity of data is maintained during training types = [i.data_type_ for i in sparse_classifier.estimators_] assert all([(t == csc_matrix or t == csr_matrix) for t in types])
attributes[:, 1], c=colors(targetValues), cmap=pyplot.cm.Paired) axes = pyplot.gca() xLim = axes.get_xlim() yLim = axes.get_ylim() xs = numpy.linspace(xLim[0], xLim[1], 30) ys = numpy.linspace(yLim[0], yLim[1], 30) xCoords, yCoords = numpy.meshgrid(xs, ys) xyCoords = numpy.vstack([xCoords.ravel(), yCoords.ravel()]).T Z = classifier.staged_decision_function(xyCoords) iterationsCount = [] accuracies = [] os.mkdir(fileName) i = 1 for item, z in zip(targetValuePredictedFunctions, Z): pyplot.clf() pyplot.xlabel("Step number " + str(i)) pyplot.scatter(attributes[:, 0], attributes[:, 1], c=colors(targetValues), cmap=pyplot.cm.Paired)
# Plot the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright) # and testing points ax.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6) ax.set_xlim(xx.min(), xx.max()) ax.set_ylim(yy.min(), yy.max()) ax.set_xticks(()) ax.set_yticks(()) i = 1 plt.show() clf.fit(X_train, y_train) score = clf.staged_score(X_test, y_test) step = 1 for S, Z, est in zip( score, clf.staged_decision_function(np.c_[xx.ravel(), yy.ravel()]), clf.estimators_): # iterate over classifiers ax = plt.subplot(1, 1, i) # Put the result into a color plot Z = Z.reshape(xx.shape) ax.contourf(xx, yy, Z, cmap=cm, alpha=.8) #size = np.repeat(2.0, X_train.shape[0]) + (6-2.0)*(est.sample_weight - est.sample_weight.min())/(est.sample_weight.max() - est.sample_weight.min() + 0.0001) size = np.repeat(2.0, X_train.shape[0]) size = 2 * np.pi * size**2 # Plot also the training points ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train,