def take_action(self, env, a=None): s = env.current_state as_pairs = env.get_as_pairs() e = np.random.rand() if e < self.policy.eps: rand = np.random.randint(len(as_pairs)) a = as_pairs[rand][0] sn = as_pairs[rand][1] _, v = search.alphabeta_batch_hist(self.V, faster_featurize, Environment(sn), list(env.hist), self.depth - 1, -float('inf'), float('inf')) else: a, v = self.get_av_pairs(env) r, s_n = env.perform_action(a) # r_w reward from whites perspective r_w = (-1 if env.get_turn() else 1) * r # invert value if white #print r_w #print r self.data_thread.append((s, r_w, v)) if Environment.terminal_state(s_n): #print 'Reward for white: ',r_w self.data_thread.append((s_n, r_w, r_w)) self.data_thread.set_update() if env.result() == '1-0' or env.result() == '0-1': #env.draw() self.data_thread.set_win() return s, a, r, s_n
def take_action(self, env, a=None): s = env.current_state as_pairs = env.get_as_pairs() e = np.random.rand() if e < self.policy.eps: rand = np.random.randint(len(as_pairs)) a = as_pairs[rand][0] sn = as_pairs[rand][1] _, v = search.alphabeta_batch_hist(self.V, faster_featurize, Environment(sn), list(env.hist), self.depth - 1, -float('inf'), float('inf')) else: a, v = self.get_av_pairs(env) if a is None: for st in env.hist: print chess.Board.from_epd(st) print('Value: {}, random e: {}, epsilon: {}').format( v, e, self.policy.eps) print 'as pairs: {}'.format(as_pairs) env.draw() r, s_n = env.perform_action(a) # r_w reward from whites perspective r_w = (-1 if env.get_turn() else 1) * r # invert value if white self.data_thread.append((s, r, v)) if Environment.terminal_state(s_n): self.data_thread.append((s_n, r, r)) self.data_thread.set_outcome(env.outcome()) return s, a, r, s_n
def get_av_pairs(self, env): ao, o = search.alphabeta_outcome(None, None, env.current_state, self.depth, -float('inf'), float('inf')) a, v = search.alphabeta_batch_hist(self.approx.value, faster_featurize, env, list(env.hist), settings.params['OC_DEPTH'], -float('inf'), float('inf')) if o > 0: a = ao return [(a, v)]
def take_action(self, env, a=None): ao, o = search.alphabeta_outcome(None, None, env.current_state, self.depth, -float('inf'), float('inf')) if o > 0: env.perform_action(ao) else: a, v = search.alphabeta_batch_hist(self.approx.value, faster_featurize, env, list(env.hist), self.depth, -float('inf'), float('inf')) env.perform_action(a)
def get_av_pairs(self, env): t = time.time() a, v = search.alphabeta_batch_hist(self.V, faster_featurize, env, list(env.hist), self.depth, -float('inf'), float('inf')) t2 = time.time() ao, o = search.alphabeta_outcome(None, None, env.current_state, settings.params['OC_DEPTH'], -float('inf'), float('inf')) t3 = time.time() if o > 0: a = ao env.draw() print a, o, (t3 - t2) / (t2 - t) return a, v
def get_av_pairs(self, env): return search.alphabeta_batch_hist(self.V, faster_featurize, env, list(env.hist), self.depth, -float('inf'), float('inf'))
def take_action(self, env, a=None): a, v = search.alphabeta_batch_hist(self.approx.value, faster_featurize, env, list(env.hist), self.depth, -float('inf'), float('inf')) env.perform_action(a)