Example #1
0
    def __init__(self,
                 env,
                 t,
                 alphas=None,
                 betas=None,
                 thres=0.5,
                 iters_per_arm=100):

        self.env = snapshot(env, t)
        self.t = t
        self.arms = two_cycles(self.env, t)
        self.n_arms = len(self.arms)
        self.iters_per_arm = iters_per_arm
        self.thres = thres

        # Prior successes
        if alphas is None:
            self.alphas = np.ones(self.n_arms)
        else:
            self.alphas = alphas

        # Prior failures
        if betas is None:
            self.betas = np.ones(self.n_arms)
        else:
            self.betas = alphas

        self.s = np.zeros(self.n_arms)  # Successes
        self.f = np.zeros(self.n_arms)  # Failures

        self.r = np.zeros(self.n_arms)  # Rewards
        self.n = np.zeros(self.n_arms)  # Visits
Example #2
0
    def __init__(self,
                 env,
                 t,
                 n_iters=10,
                 method="cycles",
                 mix=0.5,
                 none_prob=None):

        self.env = snapshot(env, t)
        self.t = t
        self.n_iters = n_iters
        self.method = method

        if method == "cycles":
            self.arms = list(
                map(lambda x: tuple(sorted(x)), two_cycles(self.env,
                                                           t))) + [None]
        elif method == "pairs":
            self.arms = self.env.get_living(self.t) + [None]
        else:
            raise ValueError("Unknown optimal simulation method.")

        self.n_arms = len(self.arms)
        self.rnn = torch.load("results/policy_function_lstm")
        self.mix = mix
        self.none_prob = none_prob
def test_two_cycles_all(env):
    cycles = two_cycles(env, 4)
    for i, j in cycles:
        assert env.has_edge(i, j)
        assert env.has_edge(j, i)
        assert env.node[i]["entry"] <= env.node[j]["death"]
        assert env.node[j]["entry"] <= env.node[i]["death"]
        assert env.node[i]["d_blood"] == 0 or \
               env.node[j]["p_blood"] == 3 or \
               env.node[i]["d_blood"] == env.node[j]["p_blood"]
    def __init__(self, env, t, priors, prior_counts, algo="opt"):

        self.env = snapshot(env, t)
        self.t = t
        self.arms = two_cycles(self.env, t)
        self.n_arms = len(self.arms)
        self.successes = np.zeros(self.n_arms)
        self.solver = optimal
        self.horizons = [1, 5, 10, 20]
        self.successes = prior_counts * get_cycle_probabilities(
            self.env.get_living(t), self.arms, priors) + 1e-8
Example #5
0
 def __init__(self, env, t, gamma=.1, iters_per_arm=100, thres=0.5):
     
     self.env = env
     self.t = t
     self.arms = two_cycles(self.env, t) 
     self.n_arms = len(self.arms)
     self.w = np.ones(self.n_arms)
     self.p = np.full_like(self.w, fill_value=1/self.n_arms)
     self.gamma = gamma
     self.r = np.zeros(self.n_arms)
     self.n = np.zeros(self.n_arms)
     self.iters_per_arm = iters_per_arm
     self.thres = thres
Example #6
0
    def __init__(self, env, t, c=2, iters_per_arm=100, thres=0.5):

        self.env = snapshot(env, t)
        self.t = t

        self.arms = two_cycles(self.env, t)
        self.n_arms = len(self.arms)

        self.c = c
        self.r = np.zeros(self.n_arms)  # Rewards
        self.n = np.zeros(self.n_arms)  # Visits
        self.iters_per_arm = iters_per_arm
        self.thres = thres
    def __init__(self, env, t, method="cycles"):
        self.env = snapshot(env, t)
        self.t = t
        self.method = method

        if method == "cycles":
            self.arms = list(
                map(lambda x: tuple(sorted(x)), two_cycles(self.env,
                                                           t))) + [None]
        elif method == "pairs":
            self.arms = self.env.get_living(self.t) + [None]
        else:
            raise ValueError("Unknown optimal simulation method.")

        self.n_arms = len(self.arms)
    def __init__(self,
                 env,
                 t: int,
                 gamma: float = .1,
                 iters_per_arm: int = 100,
                 max_match: int = 5):
        self.env = env
        self.t = t
        self.max_match = max_match
        self.cycles = two_cycles(env, t) + [()]
        self.arms = get_arm_matrix(self.cycles, max_match)
        self.n_arms = len(self.arms)

        self.h = max(2, int(geom(env.death_rate).ppf(.9)))
        self.w = np.ones(self.n_arms)
        self.p = np.full_like(self.w, fill_value=1 / self.n_arms)
        self.q = np.full_like(self.w, fill_value=1 / self.n_arms)
        self.mu = np.full_like(self.w, fill_value=1 / self.n_arms)
        self.gamma = gamma
        self.iters_per_arm = iters_per_arm
        opt = optimal(env)
        gre = greedy(env)

        o = get_n_matched(opt["matched"], 0, env.time_length)
        g = get_n_matched(gre["matched"], 0, env.time_length)

        rewards = np.zeros(env.time_length)

        #%%
        np.random.seed(clock_seed())
        for t in range(env.time_length):
            probs, count = evaluate_policy(net, env, t, dtype="numpy")

            for i in range(count):
                probs, _ = evaluate_policy(net, env, t, dtype="numpy")
                cycles = two_cycles(env, t)
                if len(cycles) == 0:
                    break
                elif len(cycles) == 1:
                    res = cycles.pop()
                else:
                    sim = MonteCarlo(env, t, probs, n_prior)
                    res = sim.simulate(n_sims)

                env.removed_container[t].update(res)
                probs, count = evaluate_policy(net, env, t, dtype="numpy")

            rewards[t] = len(env.removed_container[t])

            print(t, np.mean(rewards[:t + 1]), np.mean(g[:t + 1]),
                  np.mean(o[:t + 1]))