def main(): """ Esercizio 1 """ # carica dataset dataset = load_iris() x_data = dataset.data y_target = dataset.target # Esercizio # 1 - separa i dati in train e test # 2 - addestra il modello # 3 - raccogli le predizioni sul test set # 4 - misura accuratezza ## 1 - dividiamo i dati in training e test set x_train, x_test, y_train, y_test = train_test_split(x_data, y_target) ## 2 - addestra il modello model = DecisionTreeClassifier() model.fit(x_train, y_train) ## 2b - learning curve plot_learning_curve(model, x_data, y_target) ## 3 - raccoglie le predizioni sul test set y_pred = model.predict(x_test) ## 4 - misura accuratezza accuracy = accuracy_score(y_test, y_pred) print('Accuracy', accuracy) ## 4b - facciamo un grafico della matrice di confusione plot_confusion_matrix(y_test, y_pred) plt.show()
def test_train_sizes(self): np.random.seed(0) clf = LogisticRegression() plot_learning_curve(clf, self.X, self.y, train_sizes=np.linspace(0.1, 1.0, 8))
def test_ax(self): np.random.seed(0) clf = LogisticRegression() fig, ax = plt.subplots(1, 1) out_ax = plot_learning_curve(clf, self.X, self.y) assert ax is not out_ax out_ax = plot_learning_curve(clf, self.X, self.y, ax=ax) assert ax is out_ax
def create_learning_curve_chart(regressor, X_train, y_train): """Create learning curve chart. Tip: Check Sklearn-Neptune integration `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_ for the full example. Args: regressor (:obj:`regressor`): | Fitted sklearn regressor object X_train (:obj:`ndarray`): | Training data matrix y_train (:obj:`ndarray`): | The regression target for training Returns: ``neptune.types.File`` object that you can assign to run's ``base_namespace``. Examples: .. code:: python3 import neptune.new.integrations.sklearn as npt_utils rfr = RandomForestRegressor() rfr.fit(X_train, y_train) run = neptune.init(project='my_workspace/my_project') run['visuals/learning_curve'] = npt_utils.create_learning_curve_chart(rfr, X_train, y_train) """ assert is_regressor(regressor), 'regressor should be sklearn regressor.' chart = None try: fig, ax = plt.subplots() plot_learning_curve(regressor, X_train, y_train, ax=ax) chart = neptune.types.File.as_image(fig) plt.close(fig) except Exception as e: print('Did not log learning curve chart. Error: {}'.format(e)) return chart
def log_learning_curve_chart(regressor, X_train, y_train, experiment=None): """Log learning curve chart. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: regressor (:obj:`regressor`): | Fitted sklearn regressor object X_train (:obj:`ndarray`): | Training data matrix y_train (:obj:`ndarray`): | The regression target for training experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. Returns: ``None`` Examples: .. code:: python3 rfr = RandomForestRegressor() rfr.fit(X_train, y_train) neptune.init('my_workspace/my_project') neptune.create_experiment() log_learning_curve_chart(rfr, X_train, y_train) """ assert is_regressor(regressor), 'regressor should be sklearn regressor.' exp = _validate_experiment(experiment) try: fig, ax = plt.subplots() plot_learning_curve(regressor, X_train, y_train, ax=ax) exp.log_image('charts_sklearn', fig, image_name='Learning Curve') plt.close(fig) except Exception as e: print('Did not log learning curve chart. Error: {}'.format(e))
def model_learningcurve(model, params, Xdic, y_train, out_fp): """Classification plot of model """ select, modelname = params X_train_fs = Xdic[select] plot_label = '%s.%s_classifier' % (select, modelname) name = 'Learning Curve(%s)' % (plot_label) plt.figure() sns.set() sns.set_context({"figure.figsize": (20, 20)}) sns.set_context('talk') sns.set_style('white', { 'font.family': 'sans-serif', 'font.sans-serif': ['Helvetica'] }) plot_learning_curve(model, X_train_fs, y_train, title=name, scoring='precision') plt.savefig('%s/%s_learning_curve.png' % (out_fp, plot_label), bbox_inches='tight')
def main(): """ Esercizio 3 """ dataset = load_wine() #print(dataset['DESCR']) scaler = RobustScaler() x_data = scaler.fit_transform(dataset.data) y_target = dataset.target print(x_data) print(y_target) x_train, x_test, y_train, y_test = train_test_split(x_data, y_target, train_size=0.33) print(x_train) print(x_test) print(y_train) print(y_test) model = KNeighborsClassifier(n_neighbors=3) model.fit(x_train, y_train) pred_train = model.predict(x_train) pred_test = model.predict(x_test) acc_train = accuracy_score(y_train, pred_train) print("Train accuracy", acc_train) acc_test = accuracy_score(y_test, pred_test) print("Test accuracy", acc_test) plot_learning_curve(model, x_data, y_target) plt.show()
from sklearn.datasets import load_digits, fetch_olivetti_faces from sklearn.tree import DecisionTreeClassifier from sklearn.linear_model import LogisticRegression from sklearn.neural_network import MLPClassifier from sklearn.tree import DecisionTreeClassifier from scikitplot.estimators import plot_learning_curve import matplotlib.pyplot as plt import numpy as np # intro teorica # giro sul playground di google? dataset = fetch_olivetti_faces() X = dataset['data'] y = dataset['target'] #np.random.shuffle(y) #model = MLPClassifier(hidden_layer_sizes=[1], verbose=2) # underfitting #model = MLPClassifier(hidden_layer_sizes=[100, 50], verbose=2) # fitting model = MLPClassifier(hidden_layer_sizes=[500, 200], verbose=2) # overfitting plot_learning_curve(model, X, y) plt.show()
def test_n_jobs(self): np.random.seed(0) clf = LogisticRegression() plot_learning_curve(clf, self.X, self.y, n_jobs=-1)
(accuracy_score(Y_test, Y_hat, normalize=False))) print('Misclassified samples: %d' % (Y_test != Y_hat).sum()) print('Accuracy: %.2f' % accuracy_score(Y_test, Y_hat)) print('Accuracy: %.2f' % knn.score(X_test_std, Y_test)) plot_decision_regions(X=X_test_std, y=Y_test, classifier=knn) plt.xlabel('X0 [standardized]') plt.ylabel('X1 [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.show() title = "Learning Curves (KNN)" cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) estimator = knn plot_learning_curve(estimator, X, Y, title, cv=cv, n_jobs=4) plt.show() del knn # Iris print("Iris Test:") iris_dataset = datasets.load_iris() X = iris_dataset.data indice = sorted(np.random.choice(X.shape[1], 2, replace=False)) X = X[:, indice] # print("indice:", indice) # print("X:", X) Y = iris_dataset.target # print("Y:", Y) # print("Class lables:", np.unique(Y))
import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from scikitplot.estimators import plot_learning_curve df = pd.read_csv('pulsar_stars.csv') #Setting x and y and normalize the data x_data = df.drop(columns='target_class') x = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data)) #scaling y = df.target_class.values compare_score = [] #training and testing split x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=123) lr = LogisticRegression() lr.fit(x_train, y_train) lr_score = lr.score(x_test, y_test) * 100 compare_score.append(lr_score) print('Test accuracy: {}%'.format(lr_score)) plot_learning_curve(lr, x_test, y_test) plt.show()
def test_random_state_and_shuffle(self): np.random.seed(0) clf = LogisticRegression() plot_learning_curve(clf, self.X, self.y, random_state=1, shuffle=True)
def test_string_classes(self): np.random.seed(0) clf = LogisticRegression() plot_learning_curve(clf, self.X, convert_labels_into_string(self.y))
def test_cv(self): np.random.seed(0) clf = LogisticRegression() plot_learning_curve(clf, self.X, self.y) plot_learning_curve(clf, self.X, self.y, cv=5)
data = pd.read_csv('pulsar_stars.csv') # We first decide on how to split the data into features and labels # features: features = data.columns[:-1] X = data[features] # output: y = data.target_class # Now we need to split te data using train_test_split.This requires us to choose said split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # We now make the classifier classifier = AdaBoostClassifier(DecisionTreeClassifier()) # Training classifier = classifier.fit(X_train, y_train) # Preducting the response for the test dataset y_pred = classifier.predict(X_test) print(y_pred) # Let's see how accurate our model is likely to be score = round(metrics.accuracy_score(y_test, y_pred) * 100, 2) print('Accuracy = {}%'.format(score)) plot_learning_curve(classifier, X_test, y_test) plt.show()
from sklearn.svm import SVC import pandas as pd from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from scikitplot.estimators import plot_learning_curve import matplotlib.pyplot as plt df=pd.read_csv('pulsar_stars.csv') x_data=df.drop(columns='target_class') X=StandardScaler().fit_transform(x_data) y=df.target_class.values compare_score=[] x_train, x_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=42) svm=SVC(random_state=42, gamma = 'scale') svm=svm.fit(x_train, y_train) svm_score=svm.score(x_test, y_test)*100 compare_score.append(svm_score) print('Test accuracy: {}%'.format(svm_score)) plot_learning_curve(svm, x_test, y_test) plt.show()