def test_uniform_range_bounds(self): fmin = np.finfo('float').min fmax = np.finfo('float').max func = rnd.uniform np.testing.assert_raises(OverflowError, func, -np.inf, 0) np.testing.assert_raises(OverflowError, func, 0, np.inf) # this should not throw any error, since rng can be sampled as fmin*u + fmax*(1-u) # for 0<u<1 and it stays completely in range rnd.uniform(fmin, fmax) # (fmax / 1e17) - fmin is within range, so this should not throw rnd.uniform(low=fmin, high=fmax / 1e17)
def make_propensity_based_simulated_labeler(treat_strength, con_strength, noise_level, base_propensity_scores, example_indices, exogeneous_con=0., setting="simple", seed=42): np.random.seed(seed) all_noise = random.normal(0, 1, base_propensity_scores.shape[0]).astype(np.float32) all_threshholds = np.array(random.uniform(0, 1, base_propensity_scores.shape[0]), dtype=np.float32) extra_confounding = random.normal(0, 1, base_propensity_scores.shape[0]).astype(np.float32) all_propensity_scores = expit((1.-exogeneous_con)*logit(base_propensity_scores) + exogeneous_con * extra_confounding).astype(np.float32) all_treatments = random.binomial(1, all_propensity_scores).astype(np.int32) # indices in dataset refer to locations in entire corpus, # but propensity scores will typically only inlcude a subset of the examples reindex_hack = np.zeros(12000, dtype=np.int32) reindex_hack[example_indices] = np.arange(example_indices.shape[0], dtype=np.int32) def labeler(data): index = data['index'] index_hack = tf.gather(reindex_hack, index) treatment = tf.gather(all_treatments, index_hack) confounding = 3.0 * (tf.gather(all_propensity_scores, index_hack) - 0.25) noise = tf.gather(all_noise, index_hack) y, y0, y1 = outcome_sim(treat_strength, con_strength, noise_level, tf.cast(treatment, tf.float32), confounding, noise, setting=setting) simulated_prob = tf.nn.sigmoid(y) y0 = tf.nn.sigmoid(y0) y1 = tf.nn.sigmoid(y1) threshold = tf.gather(all_threshholds, index) simulated_outcome = tf.cast(tf.greater(simulated_prob, threshold), tf.int32) return {**data, 'outcome': simulated_outcome, 'y0': y0, 'y1': y1, 'treatment': treatment} return labeler
def make_buzzy_based_simulated_labeler(treat_strength, con_strength, noise_level, setting="simple", seed=0): # hardcode probability of theorem given buzzy / not_buzzy theorem_given_buzzy_probs = np.array([0.27, 0.07], dtype=np.float32) np.random.seed(seed) all_noise = np.array(random.normal(0, 1, 12000), dtype=np.float32) all_threshholds = np.array(random.uniform(0, 1, 12000), dtype=np.float32) def labeler(data): buzzy = data['buzzy_title'] index = data['index'] treatment = data['theorem_referenced'] treatment = tf.cast(treatment, tf.float32) confounding = 3.0*(tf.gather(theorem_given_buzzy_probs, buzzy) - 0.25) noise = tf.gather(all_noise, index) y, y0, y1 = outcome_sim(treat_strength, con_strength, noise_level, treatment, confounding, noise, setting=setting) simulated_prob = tf.nn.sigmoid(y) y0 = tf.nn.sigmoid(y0) y1 = tf.nn.sigmoid(y1) threshold = tf.gather(all_threshholds, index) simulated_outcome = tf.cast(tf.greater(simulated_prob, threshold), tf.int32) return {**data, 'outcome': simulated_outcome, 'y0': y0, 'y1': y1} return labeler
def symmetric_random_walk(size, seed=None, scale=1.0, normalize=True): # 0. Preparation if seed != None: rnd.seed(seed) else: rnd.seed() time_steps = size[0] num_dims = len(size) - 1 # 1. Generate random walk noise levels (integer) random_walk = 2 * rnd.randint(2, size=time_steps) - 1 random_walk = np.cumsum(random_walk) # 2. Normalize random walk noise levels to the range [0,1] if normalize == True: random_walk += np.abs(random_walk.min()) random_walk = random_walk / random_walk.max() # 3. Scale random walk noise levels to max_level random_walk *= scale # 4. Generate noise noise = rnd.uniform(low=-np.sqrt(12) / 2, high=np.sqrt(12) / 2, size=size) # 5. Scale noise to desired std over time_steps random_walk = random_walk.reshape((-1, ) + (1, ) * num_dims) noise *= random_walk return noise.astype(np.float32), random_walk.astype(np.float32)
def test_uniform(self): rnd.seed(self.seed, self.brng) actual = rnd.uniform(low=1.23, high=10.54, size=(3, 2)) desired = np.array([[10.38982478047721, 1.408218254214153], [2.8756430713785814, 7.836974412682466], [6.057706432128325, 10.426505200380925]]) np.testing.assert_array_almost_equal(actual, desired, decimal=10)