Beispiel #1
0
def test_join():
    p = Join(components=[Normal(mu=0), Normal(mu=1), Normal(mu=2)])
    assert p.ndim == 3
    assert len(p.parameters_) == 6

    X = p.rvs(10000, random_state=1)
    assert X.shape == (10000, 3)
    assert np.abs(np.mean(X[:, 0]) - 0.) < 0.05
    assert np.abs(np.mean(X[:, 1]) - 1.) < 0.05
    assert np.abs(np.mean(X[:, 2]) - 2.) < 0.05
    assert_array_almost_equal(-np.log(p.pdf(X)), p.nll(X))
Beispiel #2
0
def test_join():
    p = Join(components=[Normal(mu=0), Normal(mu=1), Normal(mu=2)])
    assert p.ndim == 3
    assert len(p.parameters_) == 6

    X = p.rvs(10000, random_state=1)
    assert X.shape == (10000, 3)
    assert np.abs(np.mean(X[:, 0]) - 0.) < 0.05
    assert np.abs(np.mean(X[:, 1]) - 1.) < 0.05
    assert np.abs(np.mean(X[:, 2]) - 2.) < 0.05
    assert_array_almost_equal(-np.log(p.pdf(X)), p.nll(X))
Beispiel #3
0
def test_join_non_theano():
    h0 = Histogram(interpolation="linear", bins=30)
    h1 = Histogram(interpolation="linear", bins=30)
    h2 = Histogram(interpolation="linear", bins=30)

    h0.fit(Normal(mu=0).rvs(10000, random_state=0))
    h1.fit(Normal(mu=1).rvs(10000, random_state=1))
    h2.fit(Normal(mu=2).rvs(10000, random_state=2))

    p = Join(components=[h0, h1, h2])
    assert p.ndim == 3
    assert len(p.parameters_) == 0

    X = p.rvs(10000, random_state=1)
    assert X.shape == (10000, 3)
    assert np.abs(np.mean(X[:, 0]) - 0.) < 0.05
    assert np.abs(np.mean(X[:, 1]) - 1.) < 0.05
    assert np.abs(np.mean(X[:, 2]) - 2.) < 0.05
    assert_array_almost_equal(-np.log(p.pdf(X)), p.nll(X))
Beispiel #4
0
def test_join_non_theano():
    h0 = Histogram(interpolation="linear", bins=30)
    h1 = Histogram(interpolation="linear", bins=30)
    h2 = Histogram(interpolation="linear", bins=30)

    h0.fit(Normal(mu=0).rvs(10000, random_state=0))
    h1.fit(Normal(mu=1).rvs(10000, random_state=1))
    h2.fit(Normal(mu=2).rvs(10000, random_state=2))

    p = Join(components=[h0, h1, h2])
    assert p.ndim == 3
    assert len(p.parameters_) == 0

    X = p.rvs(10000, random_state=1)
    assert X.shape == (10000, 3)
    assert np.abs(np.mean(X[:, 0]) - 0.) < 0.05
    assert np.abs(np.mean(X[:, 1]) - 1.) < 0.05
    assert np.abs(np.mean(X[:, 2]) - 2.) < 0.05
    assert_array_almost_equal(-np.log(p.pdf(X)), p.nll(X))
Beispiel #5
0
def generate_samples(with_linear_transformation=False,
                     add_variation=False,
                     n_samples=50000,
                     verbose=True):
    """
    Generate 5 independent variables: two Gaussian, mixture of Gaussian, two exponents. 
    Two Gaussian have different means for original and target distributions.
    
    if with_linear_transformation is True then add linear transformation of generated 5 variables.
    
    if add_variation is True then add random values in variance to obtain gaussian pdf 
    for orignal and target samples not only with different mean but also with different variance.
    
    :param bool with_linear_transformation: apply or not linear transformation for samples features
    :param bool add_variation: make or not different variance for Gaussian distribution for original and target samples.
    :param int n_samples: number of generated samples for original/target distributions. For test samples 2*n_samples will be generated
    :param bool verbose: print and plot additional info during generation.
    
    :return: train original, train target, exact weights for train original, test original, test target, exact weights for test original
    """
    # define linear transformation matrix
    R = make_sparse_spd_matrix(5, alpha=0.5, random_state=7)

    variation_origin, variation_target = (0, 0)
    if add_variation:
        r = check_random_state(42)
        variation_origin, variation_target = r.uniform() / 3., r.uniform() / 3.

    p0 = Join(components=[
        Normal(mu=.5, sigma=1 + variation_origin),
        Normal(mu=-.5, sigma=3 + variation_origin),
        Mixture(components=[Normal(mu=-2, sigma=1),
                            Normal(mu=2, sigma=0.5)]),
        Exponential(inverse_scale=3.0),
        Exponential(inverse_scale=0.5)
    ])

    p1 = Join(components=[
        Normal(mu=0, sigma=1 + variation_target),
        Normal(mu=0, sigma=3 + variation_target),
        Mixture(components=[Normal(mu=-2, sigma=1),
                            Normal(mu=2, sigma=0.5)]),
        Exponential(inverse_scale=3.0),
        Exponential(inverse_scale=0.5)
    ])

    if with_linear_transformation:
        p0 = LinearTransform(p0, R)
        p1 = LinearTransform(p1, R)

    X0 = p0.rvs(n_samples, random_state=777)
    X1 = p1.rvs(n_samples, random_state=777)
    exact_weights = numpy.exp(p0.nll(X0) - p1.nll(X0))
    exact_weights[numpy.isinf(exact_weights)] = 0.

    # generate samples to test reweighting rule (to avoid overfitting)
    X0_roc = p0.rvs(2 * n_samples, random_state=777 * 2)
    X1_roc = p1.rvs(2 * n_samples, random_state=777 * 2)
    # Weighted with true ratios
    exact_weights_roc = numpy.exp(p0.nll(X0_roc) - p1.nll(X0_roc))
    exact_weights_roc[numpy.isinf(exact_weights_roc)] = 0.

    if verbose:
        print "Original distribution"
        fig = corner.corner(X0,
                            bins=20,
                            smooth=0.85,
                            labels=["X0", "X1", "X2", "X3", "X4"])
        plt.show()
        print "Target distribution"
        fig = corner.corner(X1,
                            bins=20,
                            smooth=0.85,
                            labels=["X0", "X1", "X2", "X3", "X4"])
        plt.show()
        print "Exact reweighting"
        # In this example, we know p0(x) and p1(x) exactly,
        #so we can compare the other can compare the approximate reweighting approaches with the exact weights.
        draw_distributions(X0, X1, exact_weights)

    return X0, X1, exact_weights, X0_roc, X1_roc, exact_weights_roc
Beispiel #6
0
def generate_samples_for_blow_up_demo(n_samples=50000):
    """
    Generate 3 independent Gaussian variables and apply linear transformation to them.
    These Gaussian have different means and different sigmas for target and original distribution.
    
    This is example of samples with regions with high target samples number zero original samples. In this case exact reweighting rule blow up and the same happens for algorithms.
    :param int n_samples: number of generated samples for original/target distributions. For test samples 2*n_samples will be generated
    
    :return: train original, train target, exact weights for train original, test original, test target, exact weights for test original
    """
    p0 = Join(components=[
        Normal(mu=1, sigma=0.7),
        Normal(mu=-1, sigma=0.7),
        Normal(mu=1, sigma=1.5)
    ])

    p1 = Join(components=[
        Normal(mu=0, sigma=0.7),
        Normal(mu=0, sigma=0.7),
        Normal(mu=0, sigma=1.5)
    ])

    R = make_sparse_spd_matrix(3, alpha=0.5, random_state=7)
    p0 = LinearTransform(p0, R)
    p1 = LinearTransform(p1, R)

    X0 = p0.rvs(n_samples, random_state=777)
    X1 = p1.rvs(n_samples, random_state=777)
    exact_weights = numpy.exp(p0.nll(X0) - p1.nll(X0))
    exact_weights[numpy.isinf(exact_weights)] = 1.

    # generate samples to test reweighting rule (to avoid overfitting)
    X0_roc = p0.rvs(2 * n_samples, random_state=777 * 2)
    X1_roc = p1.rvs(2 * n_samples, random_state=777 * 2)
    # Weighted with true ratios
    exact_weights_roc = numpy.exp(p0.nll(X0_roc) - p1.nll(X0_roc))
    exact_weights_roc[numpy.isinf(exact_weights_roc)] = 1.

    draw_distributions(X0, X1, numpy.ones(len(X0)))
    print "Exact weights are used (inf weights are set to 1)"
    draw_distributions(X0, X1, exact_weights)

    return X0, X1, exact_weights, X0_roc, X1_roc, exact_weights_roc
Beispiel #7
0
# Simulator

A = theano.shared(true_theta[0], name="A")
B = theano.shared(true_theta[1], name="B")
R = np.array([[1.31229955, 0.10499961, 0.48310515, -0.3249938, -0.26387927],
              [0.10499961, 1.15833058, -0.55865473, 0.25275522, -0.39790775],
              [0.48310515, -0.55865473, 2.25874579, -0.52087938, -0.39271231],
              [0.3249938, 0.25275522, -0.52087938, 1.4034925, -0.63521059],
              [-0.26387927, -0.39790775, -0.39271231, -0.63521059, 1.]])

p0 = LinearTransform(
    Join(components=[
        Normal(mu=A, sigma=1),
        Normal(mu=B, sigma=3),
        Mixture(components=[Normal(mu=-2, sigma=1),
                            Normal(mu=2, sigma=0.5)]),
        Exponential(inverse_scale=3.0),
        Exponential(inverse_scale=0.5)
    ]), R)


def simulator(theta, n_samples, random_state=None):
    A.set_value(theta[0])
    B.set_value(theta[1])
    return p0.rvs(n_samples, random_state=random_state)


X_obs = simulator(true_theta, 20000, random_state=rng)
n_params = len(true_theta)
n_features = X_obs.shape[1]