def thompson_sampling(T: int, mu, seed=None, prior_SF=None, **_kwargs): """ Bernoulli Thompson Sampling with known mu""" K_ = len(mu) S, F, theta = np.zeros((K_, )), np.zeros((K_, )), np.zeros((K_, )) if prior_SF is not None: S, F = prior_SF arms_selected = np.zeros((T, )).astype(int) rewards = np.zeros((T, )) with seeded(seed): random_numbers = np.random.rand(T) for t in range(T): theta = [beta(S[i] + 1, F[i] + 1) for i in range(K_)] arm_x = rand_argmax(theta) reward_y = int(random_numbers[t] <= mu[arm_x]) arms_selected[t] = arm_x rewards[t] = reward_y if reward_y == 1: S[arm_x] += 1 else: F[arm_x] += 1 return arms_selected, rewards
def kl_UCB(T: int, mu, f=None, seed=None, faster=True, prior_SF=None, **_kwargs): """Bernoulli kl-UCB""" if f is None: f = default_kl_UCB_func K_ = len(mu) faster = faster and K_ > 4 N, mu_hat = np.zeros((K_, )), np.zeros((K_, )) if prior_SF is not None: S, F = prior_SF for arm in range(K_): N[arm] = S[arm] + F[arm] mu_hat[arm] = S[arm] / (S[arm] + F[arm]) ukeeper = U_keeper(K_, T) arms_selected = np.zeros((T, )).astype(int) rewards = np.zeros((T, )) with seeded(seed): rands = np.random.rand(T) shuffled_arms = np.random.choice(K_, K_, replace=False) for t, arm_x in enumerate(shuffled_arms): reward_y = int(rands[t] <= mu[arm_x]) N[arm_x] += 1 mu_hat[arm_x] += (reward_y - mu_hat[arm_x]) / N[arm_x] arms_selected[t] = arm_x rewards[t] = reward_y U = np.array([sup_KL(mu_hat[i], f(K_) / N[i]) for i in range(K_)]) # compute for t in range(K_, T): arm_x = rand_argmax(U) # select reward_y = int(rands[t] <= mu[arm_x]) arms_selected[t] = arm_x rewards[t] = reward_y # update for next N[arm_x] += 1 mu_hat[arm_x] += (reward_y - mu_hat[arm_x]) / N[arm_x] if not faster: fval = f(t + 1) U = np.array( [sup_KL(mu_hat[i], fval / N[i]) for i in range(K_)]) else: ukeeper.update_U(t + 1, f, mu_hat, N, U, arm_x) return arms_selected, rewards
def XYZWST_SCM(devised=True, seed=None): with seeded(seed): G = XYZWST('U_WX', 'U_YZ') # parametrization for U if devised: mu1 = { 'U_WX': rand_bw(0.4, 0.6, precision=2), 'U_YZ': rand_bw(0.4, 0.6, precision=2), 'U_X': rand_bw(0.01, 0.1, precision=2), 'U_Y': rand_bw(0.01, 0.1, precision=2), 'U_Z': rand_bw(0.01, 0.1, precision=2), 'U_W': rand_bw(0.01, 0.1, precision=2), 'U_S': rand_bw(0.1, 0.9, precision=2), 'U_T': rand_bw(0.1, 0.9, precision=2) } else: mu1 = { 'U_WX': rand_bw(0.01, 0.99, precision=2), 'U_YZ': rand_bw(0.01, 0.99, precision=2), 'U_X': rand_bw(0.01, 0.99, precision=2), 'U_Y': rand_bw(0.01, 0.99, precision=2), 'U_Z': rand_bw(0.01, 0.99, precision=2), 'U_W': rand_bw(0.01, 0.99, precision=2), 'U_S': rand_bw(0.01, 0.99, precision=2), 'U_T': rand_bw(0.01, 0.99, precision=2), } domains = defaultdict(lambda: (0, 1)) # SCM with parametrization M = StructuralCausalModel( G, F={ 'S': lambda v: v['U_S'], 'T': lambda v: v['U_T'], 'W': lambda v: v['U_W'] ^ v['U_WX'] ^ v['S'], 'Z': lambda v: v['U_Z'] ^ v['U_YZ'], 'X': lambda v: 1 ^ v['U_X'] ^ v['Z'] ^ v['U_WX'] ^ v['T'], 'Y': lambda v: v['U_Y'] ^ v['U_YZ'] ^ v['X'] ^ v['W'] ^ v['T'] }, P_U=default_P_U(mu1), D=domains, more_U={'U_W', 'U_X', 'U_Y', 'U_Z', 'U_S', 'U_T'}) return M, mu1
def simple_markovian_SCM(seed=None) -> [StructuralCausalModel, dict]: with seeded(seed): G = simple_markovian() mu1 = {('U_' + v): rand_bw(0.1, 0.9, precision=2) for v in sorted(G.V)} domains = defaultdict(lambda: (0, 1)) # SCM with parametrization M = StructuralCausalModel( G, F={ 'Z1': lambda v: v['U_Z1'], 'Z2': lambda v: v['U_Z2'], 'X1': lambda v: v['U_X1'] ^ v['Z1'] ^ v['Z2'], 'X2': lambda v: 1 ^ v['U_X2'] ^ v['Z1'] ^ v['Z2'], 'Y': lambda v: v['U_Y'] | (v['X1'] & v['X2']), }, P_U=default_P_U(mu1), D=domains, more_U={'U_' + v for v in G.V}) return M, mu1
def IV_SCM(devised=True, seed=None): with seeded(seed): G = IV_CD() # parametrization for U if devised: mu1 = { 'U_X': rand_bw(0.01, 0.2, precision=2), 'U_Y': rand_bw(0.01, 0.2, precision=2), 'U_Z': rand_bw(0.01, 0.99, precision=2), 'U_XY': rand_bw(0.4, 0.6, precision=2) } else: mu1 = { 'U_X': rand_bw(0.01, 0.99, precision=2), 'U_Y': rand_bw(0.01, 0.99, precision=2), 'U_Z': rand_bw(0.01, 0.99, precision=2), 'U_XY': rand_bw(0.01, 0.99, precision=2) } P_U = default_P_U(mu1) domains = defaultdict(lambda: (0, 1)) # SCM with parametrization M = StructuralCausalModel( G, F={ 'Z': lambda v: v['U_Z'], 'X': lambda v: v['U_X'] ^ v['U_XY'] ^ v['Z'], 'Y': lambda v: 1 ^ v['U_Y'] ^ v['U_XY'] ^ v['X'] }, P_U=P_U, D=domains, more_U={'U_X', 'U_Y', 'U_Z'}) return M, mu1