def make_ratio(num):
    X_num = Xs[num]
    X_den = X1_
    X = np.vstack((X_num, X_den))
    y = np.zeros(len(X_num) + len(X_den), dtype=np.int)
    y[len(X_num):] = 1

    clf = KerasClassifier(make_model_join, nb_epoch=50, verbose=0)

    cv =  StratifiedShuffleSplit(n_iter=1, test_size=0.5, random_state=1)

    ratio = ClassifierRatio(
        base_estimator=CalibratedClassifierCV(clf, cv=cv, bins=20),
        random_state=0)
    ratio.fit(X, y)
    
    print('Loss {0} : {1}'.format(num, log_loss(ratio.classifier_.classifiers_[0].
                   predict(X[:int(len(X)*0.3)]),y[:int(len(X)*0.3)])))
    
    return ratio
def make_ratio(num):
    X_num = Xs_s[num]
    X_den = X1_s
    X = np.vstack((X_num, X_den))
    y = np.zeros(len(X_num) + len(X_den), dtype=np.int)
    y[len(X_num):] = 1

    clf = ExtraTreesClassifier(n_estimators=100, min_samples_split=20, random_state=0, n_jobs=-1)
    #clf = KerasClassifier(make_model_join, nb_epoch=50, verbose=0)

    cv =  StratifiedShuffleSplit(n_iter=3, test_size=0.5, random_state=1)

    ratio = ClassifierRatio(
        base_estimator=CalibratedClassifierCV(clf, cv=cv, bins=20),
        random_state=0)
    ratio.fit(X, y)
    
    print('Loss {0} : {1}'.format(num, log_loss(ratio.classifier_.classifiers_[0].
                   predict(X[:int(len(X)*0.3)]),y[:int(len(X)*0.3)])))
    
    return ratio
Example #3
0
def test_decomposed_ratio_identity():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p = Mixture(components=components, weights=[0.45, 0.1, 0.45])

    ratio = DecomposedRatio(
        ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV())))
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Example #4
0
def test_classifier_ratio_identity():
    p = Normal(mu=0.0)
    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=ElasticNetCV()))
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Example #5
0
def test_decomposed_ratio():
    components = [Normal(mu=0.0), Normal(mu=0.25), Normal(mu=0.5)]
    p0 = Mixture(components=components, weights=[0.45, 0.1, 0.45])
    p1 = Mixture(components=[components[0]] + [components[2]])

    ratio = DecomposedRatio(
        ClassifierRatio(CalibratedClassifierCV(base_estimator=ElasticNetCV())))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01
Example #6
0
def test_classifier_ratio_identity():
    p = Normal(mu=0.0)
    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=ElasticNetCV()))
    ratio.fit(numerator=p, denominator=p, n_samples=10000)

    reals = np.linspace(-0.5, 1.0, num=100).reshape(-1, 1)
    assert ratio.score(reals, p.pdf(reals) / p.pdf(reals)) == 0.0
    assert_array_almost_equal(ratio.predict(reals), np.ones(len(reals)))
    assert_array_almost_equal(ratio.predict(reals, log=True),
                              np.zeros(len(reals)))
Example #7
0
def check_classifier_ratio(clf, method, cv):
    # Passing distributions directly
    p0 = Normal(mu=0.0)
    p1 = Normal(mu=0.1)

    ratio = ClassifierRatio(CalibratedClassifierCV(base_estimator=clf,
                                                   method=method,
                                                   cv=cv))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01

    # Passing X, y only
    X = np.vstack((p0.rvs(5000), p1.rvs(5000)))
    y = np.zeros(10000, dtype=np.int)
    y[5000:] = 1

    ratio = ClassifierRatio(CalibratedClassifierCV(base_estimator=clf,
                                                   method=method,
                                                   cv=cv))
    ratio.fit(X=X, y=y)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(np.abs(np.log(ratio.predict(reals)) -
                          ratio.predict(reals, log=True))) < 0.01
Example #8
0
def reconstruct_ratio_using_estimated_pdfs(classifier,
                                           classifier_parameters,
                                           cv_val=3,
                                           with_linear_transformation=False,
                                           add_variation=False,
                                           n_samples=50000,
                                           verbose=True,
                                           inverse_weights=False,
                                           test_by_ML_GB=False):
    """
    Reconstruct weights by discriinative classifiers (calibrated and non-calibrated) 
    from `carl` on the generated samples defined by function `generate_samples`
    """
    original, target, exact_weights, original_test, target_test, exact_weights_test = \
        generate_samples(with_linear_transformation=with_linear_transformation,
                         add_variation=add_variation, n_samples=n_samples, verbose=verbose)

    predicted_weights = []
    for params in classifier_parameters:
        if verbose:
            print "Used parameters ", params
        classifier_clone = clone(classifier)
        classifier_clone.set_params(**params)
        ratio = ClassifierRatio(base_estimator=classifier_clone,
                                random_state=42)

        #reformat X0 and X1 into training data
        X = numpy.vstack((original, target))
        y = numpy.array([1] * original.shape[0] + [0] * target.shape[0])

        # fit the ration
        ratio.fit(X, y)

        carl_weights_test = ratio.predict(original_test, log=False)
        carl_weights_test[numpy.isinf(carl_weights_test)] = 0.
        predicted_weights.append(carl_weights_test)

        # plot 1d distribution for test sample
        if verbose:
            run_verbose_info(original_test,
                             target_test,
                             carl_weights_test,
                             exact_weights_test,
                             cv_val=cv_val)
    regime = [False]
    if inverse_weights:
        regime = [True, False]

    for inverse in regime:
        plt.figure(figsize=(len(classifier_parameters) * 5, 4))
        m = len(predicted_weights)
        for n, (weights, params) in enumerate(
                zip(predicted_weights, classifier_parameters)):
            plt.subplot(1, m, n + 1)
            if inverse:
                plot_scatter_weights(1. / exact_weights_test,
                                     1. / weights,
                                     title="Inverse weights for\n" +
                                     str(params))
            else:
                plot_scatter_weights(exact_weights_test,
                                     weights,
                                     title="Weights for\n" + str(params))
Example #9
0
def check_classifier_ratio(clf, method, cv):
    # Passing distributions directly
    p0 = Normal(mu=0.0)
    p1 = Normal(mu=0.1)

    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv))
    ratio.fit(numerator=p0, denominator=p1, n_samples=10000)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(
        np.abs(np.log(ratio.predict(reals)) -
               ratio.predict(reals, log=True))) < 0.01

    # Passing X, y only
    X = np.vstack((p0.rvs(5000), p1.rvs(5000)))
    y = np.zeros(10000, dtype=np.int)
    y[5000:] = 1

    ratio = ClassifierRatio(
        CalibratedClassifierCV(base_estimator=clf, method=method, cv=cv))
    ratio.fit(X=X, y=y)

    reals = np.linspace(-1, 1, num=100).reshape(-1, 1)
    assert ratio.score(reals, p0.pdf(reals) / p1.pdf(reals)) > -0.1
    assert np.mean(
        np.abs(np.log(ratio.predict(reals)) -
               ratio.predict(reals, log=True))) < 0.01