Ejemplo n.º 1
0
def thompson_sampling(T: int, mu, seed=None, prior_SF=None, **_kwargs):
    """ Bernoulli Thompson Sampling with known mu"""
    K_ = len(mu)
    S, F, theta = np.zeros((K_, )), np.zeros((K_, )), np.zeros((K_, ))
    if prior_SF is not None:
        S, F = prior_SF

    arms_selected = np.zeros((T, )).astype(int)
    rewards = np.zeros((T, ))
    with seeded(seed):
        random_numbers = np.random.rand(T)

        for t in range(T):
            theta = [beta(S[i] + 1, F[i] + 1) for i in range(K_)]
            arm_x = rand_argmax(theta)
            reward_y = int(random_numbers[t] <= mu[arm_x])

            arms_selected[t] = arm_x
            rewards[t] = reward_y

            if reward_y == 1:
                S[arm_x] += 1
            else:
                F[arm_x] += 1

    return arms_selected, rewards
Ejemplo n.º 2
0
def kl_UCB(T: int,
           mu,
           f=None,
           seed=None,
           faster=True,
           prior_SF=None,
           **_kwargs):
    """Bernoulli kl-UCB"""
    if f is None:
        f = default_kl_UCB_func

    K_ = len(mu)
    faster = faster and K_ > 4
    N, mu_hat = np.zeros((K_, )), np.zeros((K_, ))
    if prior_SF is not None:
        S, F = prior_SF
        for arm in range(K_):
            N[arm] = S[arm] + F[arm]
            mu_hat[arm] = S[arm] / (S[arm] + F[arm])

    ukeeper = U_keeper(K_, T)

    arms_selected = np.zeros((T, )).astype(int)
    rewards = np.zeros((T, ))
    with seeded(seed):
        rands = np.random.rand(T)
        shuffled_arms = np.random.choice(K_, K_, replace=False)
        for t, arm_x in enumerate(shuffled_arms):
            reward_y = int(rands[t] <= mu[arm_x])
            N[arm_x] += 1
            mu_hat[arm_x] += (reward_y - mu_hat[arm_x]) / N[arm_x]

            arms_selected[t] = arm_x
            rewards[t] = reward_y

        U = np.array([sup_KL(mu_hat[i], f(K_) / N[i]) for i in range(K_)])

        # compute
        for t in range(K_, T):
            arm_x = rand_argmax(U)
            # select
            reward_y = int(rands[t] <= mu[arm_x])

            arms_selected[t] = arm_x
            rewards[t] = reward_y

            # update for next
            N[arm_x] += 1
            mu_hat[arm_x] += (reward_y - mu_hat[arm_x]) / N[arm_x]

            if not faster:
                fval = f(t + 1)
                U = np.array(
                    [sup_KL(mu_hat[i], fval / N[i]) for i in range(K_)])
            else:
                ukeeper.update_U(t + 1, f, mu_hat, N, U, arm_x)

    return arms_selected, rewards
Ejemplo n.º 3
0
def XYZWST_SCM(devised=True, seed=None):
    with seeded(seed):
        G = XYZWST('U_WX', 'U_YZ')

        # parametrization for U
        if devised:
            mu1 = {
                'U_WX': rand_bw(0.4, 0.6, precision=2),
                'U_YZ': rand_bw(0.4, 0.6, precision=2),
                'U_X': rand_bw(0.01, 0.1, precision=2),
                'U_Y': rand_bw(0.01, 0.1, precision=2),
                'U_Z': rand_bw(0.01, 0.1, precision=2),
                'U_W': rand_bw(0.01, 0.1, precision=2),
                'U_S': rand_bw(0.1, 0.9, precision=2),
                'U_T': rand_bw(0.1, 0.9, precision=2)
            }
        else:
            mu1 = {
                'U_WX': rand_bw(0.01, 0.99, precision=2),
                'U_YZ': rand_bw(0.01, 0.99, precision=2),
                'U_X': rand_bw(0.01, 0.99, precision=2),
                'U_Y': rand_bw(0.01, 0.99, precision=2),
                'U_Z': rand_bw(0.01, 0.99, precision=2),
                'U_W': rand_bw(0.01, 0.99, precision=2),
                'U_S': rand_bw(0.01, 0.99, precision=2),
                'U_T': rand_bw(0.01, 0.99, precision=2),
            }

        domains = defaultdict(lambda: (0, 1))

        # SCM with parametrization
        M = StructuralCausalModel(
            G,
            F={
                'S': lambda v: v['U_S'],
                'T': lambda v: v['U_T'],
                'W': lambda v: v['U_W'] ^ v['U_WX'] ^ v['S'],
                'Z': lambda v: v['U_Z'] ^ v['U_YZ'],
                'X': lambda v: 1 ^ v['U_X'] ^ v['Z'] ^ v['U_WX'] ^ v['T'],
                'Y': lambda v: v['U_Y'] ^ v['U_YZ'] ^ v['X'] ^ v['W'] ^ v['T']
            },
            P_U=default_P_U(mu1),
            D=domains,
            more_U={'U_W', 'U_X', 'U_Y', 'U_Z', 'U_S', 'U_T'})
        return M, mu1
Ejemplo n.º 4
0
def simple_markovian_SCM(seed=None) -> [StructuralCausalModel, dict]:
    with seeded(seed):
        G = simple_markovian()
        mu1 = {('U_' + v): rand_bw(0.1, 0.9, precision=2) for v in sorted(G.V)}

        domains = defaultdict(lambda: (0, 1))

        # SCM with parametrization
        M = StructuralCausalModel(
            G,
            F={
                'Z1': lambda v: v['U_Z1'],
                'Z2': lambda v: v['U_Z2'],
                'X1': lambda v: v['U_X1'] ^ v['Z1'] ^ v['Z2'],
                'X2': lambda v: 1 ^ v['U_X2'] ^ v['Z1'] ^ v['Z2'],
                'Y': lambda v: v['U_Y'] | (v['X1'] & v['X2']),
            },
            P_U=default_P_U(mu1),
            D=domains,
            more_U={'U_' + v
                    for v in G.V})
        return M, mu1
Ejemplo n.º 5
0
def IV_SCM(devised=True, seed=None):
    with seeded(seed):
        G = IV_CD()

        # parametrization for U
        if devised:
            mu1 = {
                'U_X': rand_bw(0.01, 0.2, precision=2),
                'U_Y': rand_bw(0.01, 0.2, precision=2),
                'U_Z': rand_bw(0.01, 0.99, precision=2),
                'U_XY': rand_bw(0.4, 0.6, precision=2)
            }
        else:
            mu1 = {
                'U_X': rand_bw(0.01, 0.99, precision=2),
                'U_Y': rand_bw(0.01, 0.99, precision=2),
                'U_Z': rand_bw(0.01, 0.99, precision=2),
                'U_XY': rand_bw(0.01, 0.99, precision=2)
            }

        P_U = default_P_U(mu1)

        domains = defaultdict(lambda: (0, 1))

        # SCM with parametrization
        M = StructuralCausalModel(
            G,
            F={
                'Z': lambda v: v['U_Z'],
                'X': lambda v: v['U_X'] ^ v['U_XY'] ^ v['Z'],
                'Y': lambda v: 1 ^ v['U_Y'] ^ v['U_XY'] ^ v['X']
            },
            P_U=P_U,
            D=domains,
            more_U={'U_X', 'U_Y', 'U_Z'})
        return M, mu1