Ejemplo n.º 1
0
def parametric_likelihood(node, data, dtype=np.float64):
    assert len(node.scope) == 1, node.scope

    probs = np.ones((data.shape[0], 1), dtype=dtype)

    if data.shape[1] > 1:
        data = data[:, node.scope]

    assert data.shape[1] == 1, data.shape

    #
    # marginalize over something?
    marg_ids = np.isnan(data)

    if isinstance(node, (Gaussian, LogNormal, Exponential)):
        scipy_obj, params = get_scipy_obj_params(node)
        probs[~marg_ids] = scipy_obj.pdf(data[~marg_ids], **params)

    elif isinstance(node, Gamma):
        scipy_obj, params = get_scipy_obj_params(node)
        data_m = data[~marg_ids]
        data_m[data_m == 0] += POS_EPS
        probs[~marg_ids] = scipy_obj.pdf(data_m, **params)

    elif isinstance(node, (Poisson, Bernoulli, Geometric)):
        scipy_obj, params = get_scipy_obj_params(node)
        probs[~marg_ids] = scipy_obj.pmf(data[~marg_ids], **params)

    elif isinstance(node, NegativeBinomial):
        raise ValueError('Mismatch with scipy')
    elif isinstance(node, Hypergeometric):
        raise ValueError('Mismatch with wiki')
    elif isinstance(node, Categorical):
        #
        # forcing casting
        cat_data = data.astype(np.int64)
        assert np.all(np.equal(np.mod(cat_data[~marg_ids], 1), 0))
        out_domain_ids = cat_data >= node.k
        probs[~marg_ids & out_domain_ids] = 0
        probs[~marg_ids & ~out_domain_ids] = np.array(
            node.p)[cat_data[~marg_ids & ~out_domain_ids]]
    elif isinstance(node, CategoricalDictionary):
        dict_probs = [node.p.get(val, 0.0) for val in data[~marg_ids]]
        probs[~marg_ids] = dict_probs
    elif isinstance(node, Uniform):
        probs[~marg_ids] = node.density
    else:
        raise Exception("Unknown parametric " + str(type(node)))

    return probs
Ejemplo n.º 2
0
def discrete_likelihood(node, data=None, dtype=np.float64):
    probs, marg_ids, observations = leaf_marginalized_likelihood(node, data, dtype)
    scipy_obj, params = get_scipy_obj_params(node)
    probs[~marg_ids] = scipy_obj.pmf(observations, **params)
    probs[probs == 1.0] = 0.999999999
    probs[probs == 0.0] = 0.000000001
    return probs
Ejemplo n.º 3
0
def continuous_multivariate_likelihood(node, data=None, dtype=np.float64):
    probs = np.ones((data.shape[0], 1), dtype=dtype)
    observations = data[:, node.scope]
    assert not np.any(np.isnan(data))
    scipy_obj, params = get_scipy_obj_params(node)
    probs[:, 0] = scipy_obj.pdf(observations, allow_singular=True, **params)
    return probs
Ejemplo n.º 4
0
def sample_parametric_node(node, n_samples, data, rand_gen):
    assert isinstance(node, Parametric)
    assert n_samples > 0

    X = None
    if (
        isinstance(node, Gaussian)
        or isinstance(node, Gamma)
        or isinstance(node, LogNormal)
        or isinstance(node, Poisson)
        or isinstance(node, Geometric)
        or isinstance(node, Exponential)
        or isinstance(node, Bernoulli)
    ):

        scipy_obj, params = get_scipy_obj_params(node)

        X = scipy_obj.rvs(size=n_samples, random_state=rand_gen, **params)

    elif isinstance(node, Categorical):
        X = rand_gen.choice(np.arange(node.k), p=node.p, size=n_samples)

    elif isinstance(node, CategoricalDictionary):
        vals = []
        ps = []
        for v, p in node.p.items():
            vals.append(v)
            ps.append(p)
        X = rand_gen.choice(vals, p=ps, size=n_samples)

    else:
        raise Exception("Node type unknown: " + str(type(node)))

    return X
Ejemplo n.º 5
0
    def assert_correct_node_sampling_continuous(self, node, samples, plot):
        node.scope = [0]
        rand_gen = np.random.RandomState(1234)
        samples_gen = sample_parametric_node(node, 1000000, rand_gen)

        if plot:
            import matplotlib.pyplot as plt
            fig, ax = plt.subplots(1, 1)

            x = np.linspace(np.min(samples), np.max(samples), 1000)
            ax.plot(x, likelihood(node, x.reshape(-1, 1)), 'r-', lw=2, alpha=0.6,
                    label=node.__class__.__name__ + ' pdf')
            ax.hist(samples, normed=True, histtype='stepfilled', alpha=0.7, bins=1000)
            ax.legend(loc='best', frameon=False)
            plt.show()

        scipy_obj, params = get_scipy_obj_params(node)
        # H_0 dist are identical
        test_outside_samples = kstest(samples, lambda x: scipy_obj.cdf(x, **params))
        # reject H_0 (dist are identical) if p < 0.05
        # we pass the test if they are identical, pass if p >= 0.05
        self.assertGreaterEqual(test_outside_samples.pvalue, 0.05)

        test_generated_samples = kstest(samples_gen, lambda x: scipy_obj.cdf(x, **params))
        # reject H_0 (dist are identical) if p < 0.05
        # we pass the test if they are identical, pass if p >= 0.05
        self.assertGreaterEqual(test_generated_samples.pvalue, 0.05)
Ejemplo n.º 6
0
def gamma_likelihood(node, data=None, dtype=np.float64):
    probs, marg_ids, observations = leaf_marginalized_likelihood(node, data, dtype)

    observations[observations == 0] += POS_EPS

    scipy_obj, params = get_scipy_obj_params(node)
    probs[~marg_ids] = scipy_obj.pdf(observations, **params)
    return probs
Ejemplo n.º 7
0
def discrete_log_likelihood(node, data=None, dtype=np.float64, **kwargs):
    probs, marg_ids, observations = leaf_marginalized_log_likelihood(
        node, data, dtype)
    scipy_obj, params = get_scipy_obj_params(node)
    probs[~marg_ids] = scipy_obj.logpmf(observations, **params)
    # probs[probs == 1.0] = 0.999999999
    probs[np.isinf(probs)] = MIN_NEG  # 0.000000001
    return probs
Ejemplo n.º 8
0
def sample_parametric_node(node, n_samples, data, rand_gen):
    assert isinstance(node, Parametric)
    assert n_samples > 0

    X = None
    if isinstance(node, Gaussian) or isinstance(node, Gamma) or isinstance(node, LogNormal) or \
            isinstance(node, Poisson) or isinstance(node, Geometric) or isinstance(node, Exponential) or \
            isinstance(node, Bernoulli):

        scipy_obj, params = get_scipy_obj_params(node)

        X = scipy_obj.rvs(size=n_samples, random_state=rand_gen, **params)

    elif isinstance(node, Categorical):
        X = rand_gen.choice(np.arange(node.k), p=node.p, size=n_samples)

    else:
        raise Exception('Node type unknown: ' + str(type(node)))

    return X
Ejemplo n.º 9
0
def gaussian_likelihood(node,
                        data=None,
                        dtype=np.float64,
                        bmarg=None,
                        ibm=None):
    probs, marg_ids, observations = leaf_marginalized_likelihood(
        node, data, dtype)
    scipy_obj, params = get_scipy_obj_params(node)
    # probs[~marg_ids] = scipy_obj.pdf(observations, **params)
    if bmarg:
        ibm = ibm[:, node.scope]
        probs_reliable = np.expand_dims(scipy_obj.pdf(observations, **params),
                                        axis=1)
        probs_unreliable = np.expand_dims(scipy.stats.norm.cdf(
            observations, loc=params['loc'], scale=params['scale']),
                                          axis=1)
        probs = np.where(ibm, probs_reliable, probs_unreliable)
    else:
        probs[~marg_ids] = scipy_obj.pdf(observations, **params)
    return probs
Ejemplo n.º 10
0
def continuous_likelihood(node, data=None, dtype=np.float64, **kwargs):
    probs, marg_ids, observations = leaf_marginalized_likelihood(
        node, data, dtype)
    scipy_obj, params = get_scipy_obj_params(node)
    probs[~marg_ids] = scipy_obj.pdf(observations, **params)
    return probs
Ejemplo n.º 11
0
def poisson_likelihood(node, data=None, dtype=np.float64):
    probs, marg_ids, observations = leaf_marginalized_likelihood(
        node, data, dtype)
    scipy_obj, params = get_scipy_obj_params(node)
    probs[~marg_ids] = scipy_obj.pmf(observations, **params)
    return probs
Ejemplo n.º 12
0
def parametric_log_likelihood(node,
                              data,
                              dtype=np.float64,
                              context=None,
                              node_log_likelihood=None):
    assert len(node.scope) == 1, node.scope

    log_probs = np.zeros((data.shape[0], 1), dtype=dtype)

    if data.shape[1] > 1:
        data = data[:, node.scope]

    assert data.shape[1] == 1, data.shape

    #
    # marginalize over something?
    marg_ids = np.isnan(data)

    if isinstance(node, Gaussian) or isinstance(node, LogNormal) or \
            isinstance(node, Exponential) or isinstance(node, Beta) or isinstance(node, Gumbel) or \
            isinstance(node, Laplace) or isinstance(node, Wald) or isinstance(node, Weibull):
        scipy_obj, params = get_scipy_obj_params(node)
        log_probs[~marg_ids] = scipy_obj.logpdf(data[~marg_ids], **params)
        # if np.any(np.isposinf(log_probs[~marg_ids])):
        #     inf_ids = np.isposinf(log_probs)
        #     print(node, node.scope, log_probs[inf_ids],
        #           node.params, data[~marg_ids], data[inf_ids], params)
        #     0 / 0
    elif isinstance(node, Gamma):
        scipy_obj, params = get_scipy_obj_params(node)
        data_m = data[~marg_ids]
        data_m[data_m == 0] += POS_EPS
        log_probs[~marg_ids] = scipy_obj.logpdf(data_m, **params)
        # if np.any(np.isposinf(log_probs[~marg_ids])):
        #     inf_ids = np.isposinf(log_probs)
        #     print(node, node.scope, log_probs[inf_ids],
        #           node.params, data[~marg_ids], data[inf_ids], params)
        #     0 / 0

    elif isinstance(node, Poisson) or isinstance(
            node, Bernoulli) or isinstance(node, Geometric):
        scipy_obj, params = get_scipy_obj_params(node)
        log_probs[~marg_ids] = scipy_obj.logpmf(data[~marg_ids], **params)
        # if np.any(np.isposinf(log_probs[~marg_ids])):
        #     inf_log = np.isposinf(log_probs)
        #     print(log_probs[inf_log], data[inf_log])
        #     print(data[~marg_ids], (~marg_ids).sum(),  log_probs[~marg_ids])
        #     0 / 0
    elif isinstance(node, NegativeBinomial):
        raise ValueError('Mismatch with scipy')
    elif isinstance(node, Hypergeometric):
        raise ValueError('Mismatch with wiki')
    elif isinstance(node, Categorical):
        #
        # forcing casting
        cat_data = data.astype(np.int64)
        assert np.all(np.equal(np.mod(cat_data[~marg_ids], 1), 0))
        # assert np.all(np.logical_and(cat_data[~marg_ids] >= 0, cat_data[~marg_ids] < node.k))
        out_domain_ids = cat_data >= node.k
        log_probs[~marg_ids & out_domain_ids] = LOG_ZERO
        log_probs[~marg_ids & ~out_domain_ids] = np.array(np.log(
            node.p))[cat_data[~marg_ids & ~out_domain_ids]]
    elif isinstance(node, Uniform):
        log_probs[~marg_ids] = np.log(node.density)
    else:
        raise Exception("Unknown parametric " + str(type(node)))

    return log_probs