clf = GridSearchCV( SVC(), tuned_parameters, scoring='%s_macro' % score ) clf.fit(X_train, y_train) print("Best parameters set found on development set:") print() print(clf.best_params_) print() print("Grid scores on development set:") print() means = clf.cv_results_['mean_test_score'] stds = clf.cv_results_['std_test_score'] for mean, std, params in zip(means, stds, clf.cv_results_['params']): print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params)) print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() y_true, y_pred = y_test, clf.predict(X_test) print(classification_report(y_true, y_pred)) print() # Note the problem is too easy: the hyperparameter plateau is too flat and the # output model is the same for precision and recall with ties in quality.
y_train = np.copy(y) y_train[unlabeled_set] = -1 # ############################################################################# # Learn with LabelSpreading lp_model = label_propagation.LabelSpreading(gamma=.25, max_iter=20) lp_model.fit(X, y_train) predicted_labels = lp_model.transduction_[unlabeled_set] true_labels = y[unlabeled_set] cm = confusion_matrix(true_labels, predicted_labels, labels=lp_model.classes_) print("Label Spreading model: %d labeled & %d unlabeled points (%d total)" % (n_labeled_points, n_total_samples - n_labeled_points, n_total_samples)) print(classification_report(true_labels, predicted_labels)) print("Confusion matrix") print(cm) # ############################################################################# # Calculate uncertainty values for each transduced distribution pred_entropies = stats.distributions.entropy(lp_model.label_distributions_.T) # ############################################################################# # Pick the top 10 most uncertain labels uncertainty_index = np.argsort(pred_entropies)[-10:] # ############################################################################# # Plot f = plt.figure(figsize=(7, 5))
# the pipeline instance should stored in a variable named clf clf = Pipeline([ ('vec', vectorizer), ('clf', Perceptron()), ]) # TASK: Fit the pipeline on the training set clf.fit(docs_train, y_train) # TASK: Predict the outcome on the testing set in a variable named y_predicted y_predicted = clf.predict(docs_test) # Print the classification report print( metrics.classification_report(y_test, y_predicted, target_names=dataset.target_names)) # Plot the confusion matrix cm = metrics.confusion_matrix(y_test, y_predicted) print(cm) #import matlotlib.pyplot as plt #plt.matshow(cm, cmap=plt.cm.jet) #plt.show() # Predict the result on some short new sentences: sentences = [ 'This is a language detection test.', 'Ceci est un test de d\xe9tection de la langue.', 'Dies ist ein Test, um die Sprache zu erkennen.',
plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') plt.title('Training: %i' % label) # To apply a classifier on this data, we need to flatten the image, to # turn the data in a (samples, feature) matrix: n_samples = len(digits.images) data = digits.images.reshape((n_samples, -1)) # Create a classifier: a support vector classifier classifier = svm.SVC(gamma=0.001) # We learn the digits on the first half of the digits classifier.fit(data[:n_samples // 2], digits.target[:n_samples // 2]) # Now predict the value of the digit on the second half: expected = digits.target[n_samples // 2:] predicted = classifier.predict(data[n_samples // 2:]) print("Classification report for classifier %s:\n%s\n" % (classifier, metrics.classification_report(expected, predicted))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted)) images_and_predictions = list(zip(digits.images[n_samples // 2:], predicted)) for index, (image, prediction) in enumerate(images_and_predictions[:4]): plt.subplot(2, 4, index + 5) plt.axis('off') plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest') plt.title('Prediction: %i' % prediction) plt.show()
from mrex.datasets import samples_generator from mrex.feature_selection import SelectKBest, f_regression from mrex.pipeline import make_pipeline from mrex.model_selection import train_test_split from mrex.metrics import classification_report print(__doc__) # import some data to play with X, y = samples_generator.make_classification(n_features=20, n_informative=3, n_redundant=0, n_classes=4, n_clusters_per_class=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) # ANOVA SVM-C # 1) anova filter, take 3 best ranked features anova_filter = SelectKBest(f_regression, k=3) # 2) svm clf = svm.LinearSVC() anova_svm = make_pipeline(anova_filter, clf) anova_svm.fit(X_train, y_train) y_pred = anova_svm.predict(X_test) print(classification_report(y_test, y_pred)) coef = anova_svm[:-1].inverse_transform(anova_svm['linearsvc'].coef_) print(coef)
) clf = clf.fit(X_train_pca, y_train) print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_) # ############################################################################# # Quantitative evaluation of the model quality on the test set print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca) print("done in %0.3fs" % (time() - t0)) print(classification_report(y_test, y_pred, target_names=target_names)) print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) # ############################################################################# # Qualitative evaluation of the predictions using matplotlib def plot_gallery(images, titles, h, w, n_row=3, n_col=4): """Helper function to plot a gallery of portraits""" plt.figure(figsize=(1.8 * n_col, 2.4 * n_row)) plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35) for i in range(n_row * n_col): plt.subplot(n_row, n_col, i + 1) plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray) plt.title(titles[i], size=12) plt.xticks(())
logistic.C = 6000 # Training RBM-Logistic Pipeline rbm_features_classifier.fit(X_train, Y_train) # Training the Logistic regression classifier directly on the pixel raw_pixel_classifier = clone(logistic) raw_pixel_classifier.C = 100. raw_pixel_classifier.fit(X_train, Y_train) # ############################################################################# # Evaluation Y_pred = rbm_features_classifier.predict(X_test) print("Logistic regression using RBM features:\n%s\n" % (metrics.classification_report(Y_test, Y_pred))) Y_pred = raw_pixel_classifier.predict(X_test) print("Logistic regression using raw pixel features:\n%s\n" % (metrics.classification_report(Y_test, Y_pred))) # ############################################################################# # Plotting plt.figure(figsize=(4.2, 4)) for i, comp in enumerate(rbm.components_): plt.subplot(10, 10, i + 1) plt.imshow(comp.reshape((8, 8)), cmap=plt.cm.gray_r, interpolation='nearest') plt.xticks(())