예제 #1
0
    def take_action(self, env, a=None):
        s = env.current_state
        as_pairs = env.get_as_pairs()
        e = np.random.rand()
        if e < self.policy.eps:
            rand = np.random.randint(len(as_pairs))
            a = as_pairs[rand][0]
            sn = as_pairs[rand][1]
            _, v = search.alphabeta_batch_hist(self.V, faster_featurize,
                                               Environment(sn), list(env.hist),
                                               self.depth - 1, -float('inf'),
                                               float('inf'))
        else:
            a, v = self.get_av_pairs(env)

        r, s_n = env.perform_action(a)
        # r_w reward from whites perspective
        r_w = (-1 if env.get_turn() else 1) * r  # invert value if white
        #print r_w
        #print r

        self.data_thread.append((s, r_w, v))
        if Environment.terminal_state(s_n):
            #print 'Reward for white: ',r_w
            self.data_thread.append((s_n, r_w, r_w))
            self.data_thread.set_update()
            if env.result() == '1-0' or env.result() == '0-1':
                #env.draw()
                self.data_thread.set_win()
        return s, a, r, s_n
예제 #2
0
파일: td_stem.py 프로젝트: devdnhee/rookie
    def take_action(self, env, a=None):
        s = env.current_state
        as_pairs = env.get_as_pairs()
        e = np.random.rand()
        if e < self.policy.eps:
            rand = np.random.randint(len(as_pairs))
            a = as_pairs[rand][0]
            sn = as_pairs[rand][1]
            _, v = search.alphabeta_batch_hist(self.V, faster_featurize,
                                               Environment(sn), list(env.hist),
                                               self.depth - 1, -float('inf'),
                                               float('inf'))
        else:
            a, v = self.get_av_pairs(env)

        if a is None:
            for st in env.hist:
                print chess.Board.from_epd(st)
            print('Value: {}, random e: {}, epsilon: {}').format(
                v, e, self.policy.eps)
            print 'as pairs: {}'.format(as_pairs)
            env.draw()

        r, s_n = env.perform_action(a)
        # r_w reward from whites perspective
        r_w = (-1 if env.get_turn() else 1) * r  # invert value if white

        self.data_thread.append((s, r, v))
        if Environment.terminal_state(s_n):
            self.data_thread.append((s_n, r, r))
            self.data_thread.set_outcome(env.outcome())
        return s, a, r, s_n
예제 #3
0
파일: td_stem.py 프로젝트: devdnhee/rookie
 def get_av_pairs(self, env):
     ao, o = search.alphabeta_outcome(None, None, env.current_state,
                                      self.depth, -float('inf'),
                                      float('inf'))
     a, v = search.alphabeta_batch_hist(self.approx.value, faster_featurize,
                                        env, list(env.hist),
                                        settings.params['OC_DEPTH'],
                                        -float('inf'), float('inf'))
     if o > 0:
         a = ao
     return [(a, v)]
예제 #4
0
파일: td_stem.py 프로젝트: devdnhee/rookie
 def take_action(self, env, a=None):
     ao, o = search.alphabeta_outcome(None, None, env.current_state,
                                      self.depth, -float('inf'),
                                      float('inf'))
     if o > 0:
         env.perform_action(ao)
     else:
         a, v = search.alphabeta_batch_hist(self.approx.value,
                                            faster_featurize, env,
                                            list(env.hist), self.depth,
                                            -float('inf'), float('inf'))
         env.perform_action(a)
예제 #5
0
파일: td_stem.py 프로젝트: devdnhee/rookie
 def get_av_pairs(self, env):
     t = time.time()
     a, v = search.alphabeta_batch_hist(self.V, faster_featurize, env,
                                        list(env.hist), self.depth,
                                        -float('inf'), float('inf'))
     t2 = time.time()
     ao, o = search.alphabeta_outcome(None, None, env.current_state,
                                      settings.params['OC_DEPTH'],
                                      -float('inf'), float('inf'))
     t3 = time.time()
     if o > 0:
         a = ao
         env.draw()
         print a, o, (t3 - t2) / (t2 - t)
     return a, v
예제 #6
0
 def get_av_pairs(self, env):
     return search.alphabeta_batch_hist(self.V, faster_featurize, env,
                                        list(env.hist), self.depth,
                                        -float('inf'), float('inf'))
예제 #7
0
 def take_action(self, env, a=None):
     a, v = search.alphabeta_batch_hist(self.approx.value, faster_featurize,
                                        env, list(env.hist), self.depth,
                                        -float('inf'), float('inf'))
     env.perform_action(a)