def __init__(self, model, k=2, discount_rate=0.9, u0=1, std0=0.2, sampling_interval=20): self.model = model self.discount_rate = discount_rate self.keepr = Keeper() # priority queue for ML self.ML_queue = UniquePriorityQueue() # comparison constant self.delta = 0.001 # number of back-up per action self.k = k # default mean and std self.u0 = u0 self.std0 = std0 # draw initial hypothesis self.hypothesis = Hypothesis.draw_init_hypothesis( model, self.u0, self.std0) # maximum-likelihood V self.ML_V = {} # interval to draw samples self.sampling_interval = sampling_interval
def test_1(self): queue = UniquePriorityQueue() queue.push(2, 3) queue.push(0, 5) queue.push(8, 1) (p, v) = queue.pop() self.assertEqual(5, v) (p, v) = queue.pop() self.assertEqual(3, v) (p, v) = queue.pop() self.assertEqual(1, v)
def test_2(self): queue = UniquePriorityQueue() queue.push_or_update(2, 3) queue.push_or_update(10, 5) queue.push_or_update(8, 1) queue.push_or_update(0, 5) queue.push_or_update(4, 6) (p, v) = queue.pop() self.assertEqual(5, v) (p, v) = queue.pop() self.assertEqual(3, v) (p, v) = queue.pop() self.assertEqual(6, v) (p, v) = queue.pop() self.assertEqual(1, v) queue.push_or_update(10, 5) (p, v) = queue.pop() self.assertEqual(5, v)
def __init__(self, model, k=2, epsilon=1, degrading_constant=0.99, discount_rate=0.9): self.model = model # value model self.V = {} # book-keeping keeper self.keepr = Keeper() # parameters for the algorithm self.k = k self.epsilon = epsilon self.degrading_constant = degrading_constant self.discount_rate = discount_rate # priority queue self.queue = UniquePriorityQueue() self.delta = 0.001