Exemple #1
0
def log_test_preds_proba(classifier, X_test, y_pred_proba=None, nrows=1000, experiment=None):
    """Log test predictions probabilities.

    Calculate and log test predictions probabilities and have them as csv file in the Neptune artifacts.

    If you pass ``y_pred_proba``, then predictions probabilities are logged without computing from ``X_test`` data.

    Estimator should be fitted before calling this function.

    Path to predictions probabilities in the Neptune artifacts is 'csv/test_preds_proba.csv'.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        classifier (:obj:`classifier`):
            | Scikit-learn classifier to compute predictions probabilities.
        X_test (:obj:`ndarray`):
            | Testing data matrix.
        y_pred_proba (:obj:`ndarray`, optional, default is ``None``):
            | Classifier predictions probabilities on test data.
        nrows (`int`, optional, default is 1000):
            | Number of rows to log.
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfc = RandomForestClassifier()
            rfc.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            neptune.create_experiment()

            log_test_preds_proba(rfc, X_test, y_test)
    """
    assert is_classifier(classifier), 'Classifier should be sklearn classifier.'
    assert isinstance(nrows, int), 'nrows should be integer, {} was passed'.format(type(nrows))

    exp = _validate_experiment(experiment)

    if y_pred_proba is None:
        try:
            y_pred_proba = classifier.predict_proba(X_test)
        except Exception as e:
            print('This classifier does not provide predictions probabilities. Error: {}'.format(e))
            return

    df = pd.DataFrame(data=y_pred_proba, columns=classifier.classes_)
    log_csv('test_preds_proba', df.head(nrows), exp)
def log_cluster_labels(model, X, nrows=1000, experiment=None, **kwargs):
    """Log index of the cluster label each sample belongs to.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        model (:obj:`KMeans`):
            | KMeans object.
        X (:obj:`ndarray`):
            | Training instances to cluster.
        nrows (`int`, optional, default is 1000):
            | Number of rows to log.
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.
        kwargs:
            KMeans parameters.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            km = KMeans(n_init=11, max_iter=270)
            X, y = make_blobs(n_samples=579, n_features=17, centers=7, random_state=28743)

            neptune.init('my_workspace/my_project')
            neptune.create_experiment()

            log_cluster_labels(km, X=X)
    """
    assert isinstance(model,
                      KMeans), 'Model should be sklearn KMeans instance.'
    assert isinstance(nrows,
                      int), 'nrows should be integer, {} was passed'.format(
                          type(nrows))
    exp = _validate_experiment(experiment)

    model.set_params(**kwargs)
    labels = model.fit_predict(X)
    df = pd.DataFrame(data={'cluster_labels': labels})
    log_csv('cluster_labels', df.head(nrows), exp)
def log_test_predictions(estimator,
                         X_test,
                         y_test,
                         y_pred=None,
                         nrows=1000,
                         experiment=None):
    """Log test predictions.

    Calculate and log test predictions and have them as csv file in the Neptune artifacts.

    If you pass ``y_pred``, then predictions are logged without computing from ``X_test`` data.

    Estimator should be fitted before calling this function.

    Path to predictions in the Neptune artifacts is 'csv/test_predictions.csv'.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        estimator (:obj:`estimator`):
            | Scikit-learn estimator to compute predictions.
        X_test (:obj:`ndarray`):
            | Testing data matrix.
        y_test (:obj:`ndarray`):
            | Target for testing.
        y_pred (:obj:`ndarray`, optional, default is ``None``):
            | Estimator predictions on test data.
        nrows (`int`, optional, default is 1000):
            | Number of rows to log.
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            neptune.create_experiment()

            log_test_predictions(rfr, X_test, y_test)
    """
    assert is_regressor(estimator) or is_classifier(estimator),\
        'Estimator should be sklearn regressor or classifier.'
    assert isinstance(nrows,
                      int), 'nrows should be integer, {} was passed'.format(
                          type(nrows))

    exp = _validate_experiment(experiment)

    if y_pred is None:
        y_pred = estimator.predict(X_test)

    # single output
    if len(y_pred.shape) == 1:
        df = pd.DataFrame(data={'y_true': y_test, 'y_pred': y_pred})
        log_csv('test_predictions', df.head(nrows), exp)

    # multi output
    if len(y_pred.shape) == 2:
        df = pd.DataFrame()
        for j in range(y_pred.shape[1]):
            df['y_test_output_{}'.format(j)] = y_test[:, j]
            df['y_pred_output_{}'.format(j)] = y_pred[:, j]
        log_csv('test_predictions', df.head(nrows), exp)