Esempio n. 1
0
 def test_n_zero(self):
     # Tests the corner case of n == 0 for the binomial distribution.
     # binomial(0, p) should be zero for any p in [0, 1].
     # This test addresses issue #3480.
     zeros = np.zeros(2, dtype='int')
     for p in [0, .5, 1]:
         assert_(rnd.binomial(0, p) == 0)
         np.testing.assert_array_equal(rnd.binomial(zeros, p), zeros)
Esempio n. 2
0
def make_propensity_based_simulated_labeler(treat_strength, con_strength, noise_level,
                                            base_propensity_scores, example_indices, exogeneous_con=0.,
                                            setting="simple", seed=42):
    np.random.seed(seed)
    all_noise = random.normal(0, 1, base_propensity_scores.shape[0]).astype(np.float32)
    all_threshholds = np.array(random.uniform(0, 1, base_propensity_scores.shape[0]), dtype=np.float32)

    extra_confounding = random.normal(0, 1, base_propensity_scores.shape[0]).astype(np.float32)

    all_propensity_scores = expit((1.-exogeneous_con)*logit(base_propensity_scores) + exogeneous_con * extra_confounding).astype(np.float32)
    all_treatments = random.binomial(1, all_propensity_scores).astype(np.int32)

    # indices in dataset refer to locations in entire corpus,
    # but propensity scores will typically only inlcude a subset of the examples
    reindex_hack = np.zeros(12000, dtype=np.int32)
    reindex_hack[example_indices] = np.arange(example_indices.shape[0], dtype=np.int32)

    def labeler(data):
        index = data['index']
        index_hack = tf.gather(reindex_hack, index)
        treatment = tf.gather(all_treatments, index_hack)
        confounding = 3.0 * (tf.gather(all_propensity_scores, index_hack) - 0.25)
        noise = tf.gather(all_noise, index_hack)

        y, y0, y1 = outcome_sim(treat_strength, con_strength, noise_level, tf.cast(treatment, tf.float32), confounding, noise, setting=setting)
        simulated_prob = tf.nn.sigmoid(y)
        y0 = tf.nn.sigmoid(y0)
        y1 = tf.nn.sigmoid(y1)
        threshold = tf.gather(all_threshholds, index)
        simulated_outcome = tf.cast(tf.greater(simulated_prob, threshold), tf.int32)

        return {**data, 'outcome': simulated_outcome, 'y0': y0, 'y1': y1, 'treatment': treatment}

    return labeler
Esempio n. 3
0
def make_propensity_based_simulated_labeler(treat_strength,
                                            con_strength,
                                            noise_level,
                                            base_propensity_scores,
                                            example_indices,
                                            exogeneous_con=0.,
                                            setting="simple",
                                            seed=42):
    np.random.seed(seed)
    all_noise = random.normal(0, 1, base_propensity_scores.shape[0]).astype(
        np.float32)
    # extra_confounding = random.binomial(1, 0.5*np.ones_like(base_propensity_scores)).astype(np.float32)
    extra_confounding = random.normal(
        0, 1, base_propensity_scores.shape[0]).astype(np.float32)

    all_propensity_scores = expit((1. - exogeneous_con) *
                                  logit(base_propensity_scores) +
                                  exogeneous_con * extra_confounding).astype(
                                      np.float32)
    all_treatments = random.binomial(1, all_propensity_scores).astype(np.int32)

    # indices in dataset refer to locations in entire corpus,
    # but propensity scores will typically only inlcude a subset of the examples
    reindex_hack = np.zeros(422206, dtype=np.int32)
    reindex_hack[example_indices] = np.arange(example_indices.shape[0],
                                              dtype=np.int32)

    def labeler(data):
        index = data['index']
        index_hack = tf.gather(reindex_hack, index)
        treatment = tf.gather(all_treatments, index_hack)
        confounding = tf.gather(all_propensity_scores, index_hack) - 0.5
        noise = tf.gather(all_noise, index_hack)

        simulated_score, y0, y1 = outcome_sim(treat_strength,
                                              con_strength,
                                              noise_level,
                                              tf.cast(treatment, tf.float32),
                                              confounding,
                                              noise,
                                              setting=setting)

        return {
            **data, 'outcome': simulated_score,
            'y0': y0,
            'y1': y1,
            'treatment': treatment
        }

    return labeler
Esempio n. 4
0
 def test_binomial(self):
     rnd.seed(self.seed, self.brng)
     actual = rnd.binomial(100.123, .456, size=(3, 2))
     desired = np.array([[43, 48], [55, 48], [46, 53]])
     np.testing.assert_array_equal(actual, desired)