Ejemplo n.º 1
0
def main():
    """
    Esercizio 1
    """
    # carica dataset
    dataset = load_iris()
    x_data = dataset.data
    y_target = dataset.target

    # Esercizio
    # 1 - separa i dati in train e test
    # 2 - addestra il modello
    # 3 - raccogli le predizioni sul test set
    # 4 - misura accuratezza

    ## 1 - dividiamo i dati in training e test set
    x_train, x_test, y_train, y_test = train_test_split(x_data, y_target)

    ## 2 - addestra il modello
    model = DecisionTreeClassifier()
    model.fit(x_train, y_train)

    ## 2b - learning curve
    plot_learning_curve(model, x_data, y_target)

    ## 3 - raccoglie le predizioni sul test set
    y_pred = model.predict(x_test)

    ## 4 - misura accuratezza
    accuracy = accuracy_score(y_test, y_pred)
    print('Accuracy', accuracy)

    ## 4b - facciamo un grafico della matrice di confusione
    plot_confusion_matrix(y_test, y_pred)
    plt.show()
Ejemplo n.º 2
0
 def test_train_sizes(self):
     np.random.seed(0)
     clf = LogisticRegression()
     plot_learning_curve(clf,
                         self.X,
                         self.y,
                         train_sizes=np.linspace(0.1, 1.0, 8))
Ejemplo n.º 3
0
 def test_ax(self):
     np.random.seed(0)
     clf = LogisticRegression()
     fig, ax = plt.subplots(1, 1)
     out_ax = plot_learning_curve(clf, self.X, self.y)
     assert ax is not out_ax
     out_ax = plot_learning_curve(clf, self.X, self.y, ax=ax)
     assert ax is out_ax
Ejemplo n.º 4
0
 def test_ax(self):
     np.random.seed(0)
     clf = LogisticRegression()
     fig, ax = plt.subplots(1, 1)
     out_ax = plot_learning_curve(clf, self.X, self.y)
     assert ax is not out_ax
     out_ax = plot_learning_curve(clf, self.X, self.y, ax=ax)
     assert ax is out_ax
Ejemplo n.º 5
0
def create_learning_curve_chart(regressor, X_train, y_train):
    """Create learning curve chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/learning_curve'] = npt_utils.create_learning_curve_chart(rfr, X_train, y_train)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'

    chart = None

    try:
        fig, ax = plt.subplots()
        plot_learning_curve(regressor, X_train, y_train, ax=ax)

        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log learning curve chart. Error: {}'.format(e))

    return chart
Ejemplo n.º 6
0
def log_learning_curve_chart(regressor, X_train, y_train, experiment=None):
    """Log learning curve chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            neptune.create_experiment()

            log_learning_curve_chart(rfr, X_train, y_train)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        plot_learning_curve(regressor, X_train, y_train, ax=ax)
        exp.log_image('charts_sklearn', fig, image_name='Learning Curve')
        plt.close(fig)
    except Exception as e:
        print('Did not log learning curve chart. Error: {}'.format(e))
Ejemplo n.º 7
0
def model_learningcurve(model, params, Xdic, y_train, out_fp):
    """Classification plot of model
    """
    select, modelname = params
    X_train_fs = Xdic[select]
    plot_label = '%s.%s_classifier' % (select, modelname)
    name = 'Learning Curve(%s)' % (plot_label)
    plt.figure()
    sns.set()
    sns.set_context({"figure.figsize": (20, 20)})
    sns.set_context('talk')
    sns.set_style('white', {
        'font.family': 'sans-serif',
        'font.sans-serif': ['Helvetica']
    })
    plot_learning_curve(model,
                        X_train_fs,
                        y_train,
                        title=name,
                        scoring='precision')
    plt.savefig('%s/%s_learning_curve.png' % (out_fp, plot_label),
                bbox_inches='tight')
Ejemplo n.º 8
0
def main():
    """
    Esercizio 3
    """

    dataset = load_wine()
    #print(dataset['DESCR'])

    scaler = RobustScaler()
    x_data = scaler.fit_transform(dataset.data)
    y_target = dataset.target

    print(x_data)
    print(y_target)

    x_train, x_test, y_train, y_test = train_test_split(x_data,
                                                        y_target,
                                                        train_size=0.33)

    print(x_train)
    print(x_test)
    print(y_train)
    print(y_test)

    model = KNeighborsClassifier(n_neighbors=3)
    model.fit(x_train, y_train)

    pred_train = model.predict(x_train)
    pred_test = model.predict(x_test)

    acc_train = accuracy_score(y_train, pred_train)
    print("Train accuracy", acc_train)
    acc_test = accuracy_score(y_test, pred_test)
    print("Test accuracy", acc_test)

    plot_learning_curve(model, x_data, y_target)
    plt.show()
from sklearn.datasets import load_digits, fetch_olivetti_faces
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from scikitplot.estimators import plot_learning_curve
import matplotlib.pyplot as plt
import numpy as np

# intro teorica
# giro sul playground di google?

dataset = fetch_olivetti_faces()

X = dataset['data']
y = dataset['target']
#np.random.shuffle(y)

#model = MLPClassifier(hidden_layer_sizes=[1], verbose=2) # underfitting
#model = MLPClassifier(hidden_layer_sizes=[100, 50], verbose=2) # fitting
model = MLPClassifier(hidden_layer_sizes=[500, 200], verbose=2) # overfitting

plot_learning_curve(model, X, y)
plt.show()
Ejemplo n.º 10
0
 def test_n_jobs(self):
     np.random.seed(0)
     clf = LogisticRegression()
     plot_learning_curve(clf, self.X, self.y, n_jobs=-1)
Ejemplo n.º 11
0
          (accuracy_score(Y_test, Y_hat, normalize=False)))
    print('Misclassified samples: %d' % (Y_test != Y_hat).sum())
    print('Accuracy: %.2f' % accuracy_score(Y_test, Y_hat))
    print('Accuracy: %.2f' % knn.score(X_test_std, Y_test))

    plot_decision_regions(X=X_test_std, y=Y_test, classifier=knn)
    plt.xlabel('X0 [standardized]')
    plt.ylabel('X1 [standardized]')
    plt.legend(loc='upper left')
    plt.tight_layout()
    plt.show()

    title = "Learning Curves (KNN)"
    cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)
    estimator = knn
    plot_learning_curve(estimator, X, Y, title, cv=cv, n_jobs=4)
    plt.show()

    del knn

    # Iris
    print("Iris Test:")
    iris_dataset = datasets.load_iris()
    X = iris_dataset.data
    indice = sorted(np.random.choice(X.shape[1], 2, replace=False))
    X = X[:, indice]
    # print("indice:", indice)
    # print("X:", X)
    Y = iris_dataset.target
    # print("Y:", Y)
    # print("Class lables:", np.unique(Y))
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from scikitplot.estimators import plot_learning_curve
df = pd.read_csv('pulsar_stars.csv')

#Setting x and y and normalize the data
x_data = df.drop(columns='target_class')
x = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data))  #scaling
y = df.target_class.values

compare_score = []

#training and testing split
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=123)

lr = LogisticRegression()
lr.fit(x_train, y_train)

lr_score = lr.score(x_test, y_test) * 100
compare_score.append(lr_score)

print('Test accuracy: {}%'.format(lr_score))

plot_learning_curve(lr, x_test, y_test)
plt.show()
Ejemplo n.º 13
0
 def test_random_state_and_shuffle(self):
     np.random.seed(0)
     clf = LogisticRegression()
     plot_learning_curve(clf, self.X, self.y, random_state=1, shuffle=True)
Ejemplo n.º 14
0
 def test_train_sizes(self):
     np.random.seed(0)
     clf = LogisticRegression()
     plot_learning_curve(clf, self.X, self.y,
                         train_sizes=np.linspace(0.1, 1.0, 8))
Ejemplo n.º 15
0
 def test_n_jobs(self):
     np.random.seed(0)
     clf = LogisticRegression()
     plot_learning_curve(clf, self.X, self.y, n_jobs=-1)
Ejemplo n.º 16
0
 def test_string_classes(self):
     np.random.seed(0)
     clf = LogisticRegression()
     plot_learning_curve(clf, self.X, convert_labels_into_string(self.y))
Ejemplo n.º 17
0
 def test_cv(self):
     np.random.seed(0)
     clf = LogisticRegression()
     plot_learning_curve(clf, self.X, self.y)
     plot_learning_curve(clf, self.X, self.y, cv=5)
Ejemplo n.º 18
0
 def test_string_classes(self):
     np.random.seed(0)
     clf = LogisticRegression()
     plot_learning_curve(clf, self.X, convert_labels_into_string(self.y))
Ejemplo n.º 19
0
 def test_random_state_and_shuffle(self):
     np.random.seed(0)
     clf = LogisticRegression()
     plot_learning_curve(clf, self.X, self.y, random_state=1, shuffle=True)
Ejemplo n.º 20
0
data = pd.read_csv('pulsar_stars.csv')

# We first decide on how to split the data into features and labels
# features:
features = data.columns[:-1]
X = data[features]
# output:
y = data.target_class

# Now we need to split te data using train_test_split.This requires us to choose said split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

# We now make the classifier
classifier = AdaBoostClassifier(DecisionTreeClassifier())

# Training
classifier = classifier.fit(X_train, y_train)
# Preducting the response for the test dataset
y_pred = classifier.predict(X_test)
print(y_pred)
# Let's see how accurate our model is likely to be
score = round(metrics.accuracy_score(y_test, y_pred) * 100, 2)
print('Accuracy = {}%'.format(score))

plot_learning_curve(classifier, X_test, y_test)
plt.show()
Ejemplo n.º 21
0
from sklearn.svm import SVC
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scikitplot.estimators import plot_learning_curve
import matplotlib.pyplot as plt

df=pd.read_csv('pulsar_stars.csv')
x_data=df.drop(columns='target_class')
X=StandardScaler().fit_transform(x_data)
y=df.target_class.values
compare_score=[]

x_train, x_test, y_train, y_test=train_test_split(X, y, test_size=0.2, random_state=42)

svm=SVC(random_state=42, gamma = 'scale')
svm=svm.fit(x_train, y_train)

svm_score=svm.score(x_test, y_test)*100
compare_score.append(svm_score)

print('Test accuracy: {}%'.format(svm_score))

plot_learning_curve(svm, x_test, y_test)
plt.show()
Ejemplo n.º 22
0
 def test_cv(self):
     np.random.seed(0)
     clf = LogisticRegression()
     plot_learning_curve(clf, self.X, self.y)
     plot_learning_curve(clf, self.X, self.y, cv=5)