def test_create_memmap_backed_data(monkeypatch): registration_counter = RegistrationCounter() monkeypatch.setattr(atexit, 'register', registration_counter) input_array = np.ones(3) data = create_memmap_backed_data(input_array) check_memmap(input_array, data) assert registration_counter.nb_calls == 1 data, folder = create_memmap_backed_data(input_array, return_folder=True) check_memmap(input_array, data) assert folder == os.path.dirname(data.filename) assert registration_counter.nb_calls == 2 mmap_mode = 'r+' data = create_memmap_backed_data(input_array, mmap_mode=mmap_mode) check_memmap(input_array, data, mmap_mode) assert registration_counter.nb_calls == 3 input_list = [input_array, input_array + 1, input_array + 2] mmap_data_list = create_memmap_backed_data(input_list) for input_array, data in zip(input_list, mmap_data_list): check_memmap(input_array, data) assert registration_counter.nb_calls == 4
def check_estimators_fit_returns_self(name, estimator_orig, readonly_memmap=False): """Check if self is returned when calling fit""" X, y = make_blobs(random_state=0, n_samples=9, n_features=4) y = (y > 1).astype(int) # some want non-negative input X -= X.min() X = pairwise_estimator_convert_X(X, estimator_orig) estimator = clone(estimator_orig) y = multioutput_estimator_convert_y_2d(estimator, y) if readonly_memmap: X, y = create_memmap_backed_data([X, y]) set_random_state(estimator) assert estimator.fit(X, y) is estimator
def check_classifiers_train(name, classifier_orig, readonly_memmap=False): X_m, y_m = make_blobs(n_samples=300, random_state=0) X_m, y_m = shuffle(X_m, y_m, random_state=7) X_m = StandardScaler().fit_transform(X_m) # generate binary problem from multi-class one y_b = y_m[y_m != 2] X_b = X_m[y_m != 2] if readonly_memmap: X_b, y_b = create_memmap_backed_data([X_b, y_b]) for (X, y) in [(X_b, y_b)]: classes = np.unique(y) n_classes = len(classes) n_samples, _ = X.shape classifier = clone(classifier_orig) X = pairwise_estimator_convert_X(X, classifier) y = multioutput_estimator_convert_y_2d(classifier, y) set_random_state(classifier) # raises error on malformed input for fit with assert_raises(ValueError, msg="The classifier {} does not " "raise an error when incorrect/malformed input " "data for fit is passed. The number of training " "examples is not the same as the number of labels. " "Perhaps use check_X_y in fit.".format(name)): classifier.fit(X, y[:-1]) # fit classifier.fit(X, y) # with lists classifier.fit(X.tolist(), y.tolist()) assert hasattr(classifier, "classes_") y_pred = classifier.predict(X) assert_equal(y_pred.shape, (n_samples, )) # training set performance assert_greater(accuracy_score(y, y_pred), 0.83) # raises error on malformed input for predict msg = ("The classifier {} does not raise an error when the number of " "features in {} is different from the number of features in " "fit.") with assert_raises(ValueError, msg=msg.format(name, "predict")): classifier.predict(X.T) if hasattr(classifier, "decision_function"): try: # decision_function agrees with predict decision = classifier.decision_function(X) if n_classes == 2: assert_equal(decision.shape, (n_samples, 1)) dec_pred = (decision.ravel() > 0).astype(np.int) assert_array_equal(dec_pred, y_pred) else: assert_equal(decision.shape, (n_samples, n_classes)) assert_array_equal(np.argmax(decision, axis=1), y_pred) # raises error on malformed input for decision_function with assert_raises(ValueError, msg=msg.format(name, "decision_function")): classifier.decision_function(X.T) except NotImplementedError: pass if hasattr(classifier, "predict_proba"): # predict_proba agrees with predict y_prob = classifier.predict_proba(X) assert_equal(y_prob.shape, (n_samples, n_classes)) assert_array_equal(np.argmax(y_prob, axis=1), y_pred) # check that probas for all classes sum to one assert_array_almost_equal(np.sum(y_prob, axis=1), np.ones(n_samples)) # raises error on malformed input for predict_proba with assert_raises(ValueError, msg=msg.format(name, "predict_proba")): classifier.predict_proba(X.T) if hasattr(classifier, "predict_log_proba"): # predict_log_proba is a transformation of predict_proba y_log_prob = classifier.predict_log_proba(X) assert_allclose(y_log_prob, np.log(y_prob), 8, atol=1e-9) assert_array_equal(np.argsort(y_log_prob), np.argsort(y_prob))