def log_test_preds_proba(classifier, X_test, y_pred_proba=None, nrows=1000, experiment=None): """Log test predictions probabilities. Calculate and log test predictions probabilities and have them as csv file in the Neptune artifacts. If you pass ``y_pred_proba``, then predictions probabilities are logged without computing from ``X_test`` data. Estimator should be fitted before calling this function. Path to predictions probabilities in the Neptune artifacts is 'csv/test_preds_proba.csv'. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: classifier (:obj:`classifier`): | Scikit-learn classifier to compute predictions probabilities. X_test (:obj:`ndarray`): | Testing data matrix. y_pred_proba (:obj:`ndarray`, optional, default is ``None``): | Classifier predictions probabilities on test data. nrows (`int`, optional, default is 1000): | Number of rows to log. experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. Returns: ``None`` Examples: .. code:: python3 rfc = RandomForestClassifier() rfc.fit(X_train, y_train) neptune.init('my_workspace/my_project') neptune.create_experiment() log_test_preds_proba(rfc, X_test, y_test) """ assert is_classifier(classifier), 'Classifier should be sklearn classifier.' assert isinstance(nrows, int), 'nrows should be integer, {} was passed'.format(type(nrows)) exp = _validate_experiment(experiment) if y_pred_proba is None: try: y_pred_proba = classifier.predict_proba(X_test) except Exception as e: print('This classifier does not provide predictions probabilities. Error: {}'.format(e)) return df = pd.DataFrame(data=y_pred_proba, columns=classifier.classes_) log_csv('test_preds_proba', df.head(nrows), exp)
def log_cluster_labels(model, X, nrows=1000, experiment=None, **kwargs): """Log index of the cluster label each sample belongs to. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: model (:obj:`KMeans`): | KMeans object. X (:obj:`ndarray`): | Training instances to cluster. nrows (`int`, optional, default is 1000): | Number of rows to log. experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. kwargs: KMeans parameters. Returns: ``None`` Examples: .. code:: python3 km = KMeans(n_init=11, max_iter=270) X, y = make_blobs(n_samples=579, n_features=17, centers=7, random_state=28743) neptune.init('my_workspace/my_project') neptune.create_experiment() log_cluster_labels(km, X=X) """ assert isinstance(model, KMeans), 'Model should be sklearn KMeans instance.' assert isinstance(nrows, int), 'nrows should be integer, {} was passed'.format( type(nrows)) exp = _validate_experiment(experiment) model.set_params(**kwargs) labels = model.fit_predict(X) df = pd.DataFrame(data={'cluster_labels': labels}) log_csv('cluster_labels', df.head(nrows), exp)
def log_test_predictions(estimator, X_test, y_test, y_pred=None, nrows=1000, experiment=None): """Log test predictions. Calculate and log test predictions and have them as csv file in the Neptune artifacts. If you pass ``y_pred``, then predictions are logged without computing from ``X_test`` data. Estimator should be fitted before calling this function. Path to predictions in the Neptune artifacts is 'csv/test_predictions.csv'. Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method. Tip: Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example. Args: estimator (:obj:`estimator`): | Scikit-learn estimator to compute predictions. X_test (:obj:`ndarray`): | Testing data matrix. y_test (:obj:`ndarray`): | Target for testing. y_pred (:obj:`ndarray`, optional, default is ``None``): | Estimator predictions on test data. nrows (`int`, optional, default is 1000): | Number of rows to log. experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): | Neptune ``Experiment`` object to control to which experiment you log the data. | If ``None``, log to currently active, and most recent experiment. Returns: ``None`` Examples: .. code:: python3 rfr = RandomForestRegressor() rfr.fit(X_train, y_train) neptune.init('my_workspace/my_project') neptune.create_experiment() log_test_predictions(rfr, X_test, y_test) """ assert is_regressor(estimator) or is_classifier(estimator),\ 'Estimator should be sklearn regressor or classifier.' assert isinstance(nrows, int), 'nrows should be integer, {} was passed'.format( type(nrows)) exp = _validate_experiment(experiment) if y_pred is None: y_pred = estimator.predict(X_test) # single output if len(y_pred.shape) == 1: df = pd.DataFrame(data={'y_true': y_test, 'y_pred': y_pred}) log_csv('test_predictions', df.head(nrows), exp) # multi output if len(y_pred.shape) == 2: df = pd.DataFrame() for j in range(y_pred.shape[1]): df['y_test_output_{}'.format(j)] = y_test[:, j] df['y_pred_output_{}'.format(j)] = y_pred[:, j] log_csv('test_predictions', df.head(nrows), exp)