def check_classifier_ratio(clf, method, cv): # Passing distributions directly p0 = Normal(mu=0.0) p1 = Normal(mu=0.1) ratio = ClassifierRatio(CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv)) ratio.fit(numerator=p0, denominator=p1, n_samples=10000) reals = np.linspace(-1, 1, num=100).reshape(-1, 1) assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1 assert np.mean(np.abs(np.log(ratio.predict(reals)) - ratio.predict(reals, log=True))) < 0.01 # Passing X, y only X = np.vstack((p0.rvs(5000), p1.rvs(5000))) y = np.zeros(10000, dtype=np.int) y[5000:] = 1 ratio = ClassifierRatio(CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv)) ratio.fit(X=X, y=y) reals = np.linspace(-1, 1, num=100).reshape(-1, 1) assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1 assert np.mean(np.abs(np.log(ratio.predict(reals)) - ratio.predict(reals, log=True))) < 0.01
def check_classifier_ratio(clf, method, cv): # Passing distributions directly p0 = Normal(mu=0.0) p1 = Normal(mu=0.1) ratio = ClassifierRatio( CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv)) ratio.fit(numerator=p0, denominator=p1, n_samples=10000) reals = np.linspace(-1, 1, num=100).reshape(-1, 1) assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1 assert np.mean( np.abs(np.log(ratio.predict(reals)) - ratio.predict(reals, log=True))) < 0.01 # Passing X, y only X = np.vstack((p0.rvs(5000), p1.rvs(5000))) y = np.zeros(10000, dtype=np.int) y[5000:] = 1 ratio = ClassifierRatio( CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv)) ratio.fit(X=X, y=y) reals = np.linspace(-1, 1, num=100).reshape(-1, 1) assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1 assert np.mean( np.abs(np.log(ratio.predict(reals)) - ratio.predict(reals, log=True))) < 0.01
def test_classifier_ratio_identity(): p = Normal(mu=0.0) ratio = ClassifierRatio( CalibratedClassifierCV(base_estimator=ElasticNetCV())) ratio.fit(numerator=p, denominator=p, n_samples=10000) reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1) assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0 assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals))) assert_array_almost_equal(ratio.predict(reals, log=True), np.zeros(len(reals)))
def make_ratio(num): X_num = Xs[num] X_den = X1_ X = np.vstack((X_num, X_den)) y = np.zeros(len(X_num) + len(X_den), dtype=np.int) y[len(X_num):] = 1 clf = KerasClassifier(make_model_join, nb_epoch=50, verbose=0) cv = StratifiedShuffleSplit(n_iter=1, test_size=0.5, random_state=1) ratio = ClassifierRatio( base_estimator=CalibratedClassifierCV(clf, cv=cv, bins=20), random_state=0) ratio.fit(X, y) print('Loss {0} : {1}'.format(num, log_loss(ratio.classifier_.classifiers_[0]. predict(X[:int(len(X)*0.3)]),y[:int(len(X)*0.3)]))) return ratio
def make_ratio(num): X_num = Xs_s[num] X_den = X1_s X = np.vstack((X_num, X_den)) y = np.zeros(len(X_num) + len(X_den), dtype=np.int) y[len(X_num):] = 1 clf = ExtraTreesClassifier(n_estimators=100, min_samples_split=20, random_state=0, n_jobs=-1) #clf = KerasClassifier(make_model_join, nb_epoch=50, verbose=0) cv = StratifiedShuffleSplit(n_iter=3, test_size=0.5, random_state=1) ratio = ClassifierRatio( base_estimator=CalibratedClassifierCV(clf, cv=cv, bins=20), random_state=0) ratio.fit(X, y) print('Loss {0} : {1}'.format(num, log_loss(ratio.classifier_.classifiers_[0]. predict(X[:int(len(X)*0.3)]),y[:int(len(X)*0.3)]))) return ratio
def reconstruct_ratio_using_estimated_pdfs(classifier, classifier_parameters, cv_val=3, with_linear_transformation=False, add_variation=False, n_samples=50000, verbose=True, inverse_weights=False, test_by_ML_GB=False): """ Reconstruct weights by discriinative classifiers (calibrated and non-calibrated) from `carl` on the generated samples defined by function `generate_samples` """ original, target, exact_weights, original_test, target_test, exact_weights_test = \ generate_samples(with_linear_transformation=with_linear_transformation, add_variation=add_variation, n_samples=n_samples, verbose=verbose) predicted_weights = [] for params in classifier_parameters: if verbose: print "Used parameters ", params classifier_clone = clone(classifier) classifier_clone.set_params(**params) ratio = ClassifierRatio(base_estimator=classifier_clone, random_state=42) #reformat X0 and X1 into training data X = numpy.vstack((original, target)) y = numpy.array([1] * original.shape[0] + [0] * target.shape[0]) # fit the ration ratio.fit(X, y) carl_weights_test = ratio.predict(original_test, log=False) carl_weights_test[numpy.isinf(carl_weights_test)] = 0. predicted_weights.append(carl_weights_test) # plot 1d distribution for test sample if verbose: run_verbose_info(original_test, target_test, carl_weights_test, exact_weights_test, cv_val=cv_val) regime = [False] if inverse_weights: regime = [True, False] for inverse in regime: plt.figure(figsize=(len(classifier_parameters) * 5, 4)) m = len(predicted_weights) for n, (weights, params) in enumerate( zip(predicted_weights, classifier_parameters)): plt.subplot(1, m, n + 1) if inverse: plot_scatter_weights(1. / exact_weights_test, 1. / weights, title="Inverse weights for\n" + str(params)) else: plot_scatter_weights(exact_weights_test, weights, title="Weights for\n" + str(params))