Python alphabeta_batch_hist 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: search

메소드/함수: alphabeta_batch_hist

hotexamples.com에서의 예제들: 7

Python alphabeta_batch_hist - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 search.alphabeta_batch_hist에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

    def take_action(self, env, a=None):
        s = env.current_state
        as_pairs = env.get_as_pairs()
        e = np.random.rand()
        if e < self.policy.eps:
            rand = np.random.randint(len(as_pairs))
            a = as_pairs[rand][0]
            sn = as_pairs[rand][1]
            _, v = search.alphabeta_batch_hist(self.V, faster_featurize,
                                               Environment(sn), list(env.hist),
                                               self.depth - 1, -float('inf'),
                                               float('inf'))
        else:
            a, v = self.get_av_pairs(env)

        r, s_n = env.perform_action(a)
        # r_w reward from whites perspective
        r_w = (-1 if env.get_turn() else 1) * r  # invert value if white
        #print r_w
        #print r

        self.data_thread.append((s, r_w, v))
        if Environment.terminal_state(s_n):
            #print 'Reward for white: ',r_w
            self.data_thread.append((s_n, r_w, r_w))
            self.data_thread.set_update()
            if env.result() == '1-0' or env.result() == '0-1':
                #env.draw()
                self.data_thread.set_win()
        return s, a, r, s_n

예제 #2

파일 보기

파일: td_stem.py 프로젝트: devdnhee/rookie

    def take_action(self, env, a=None):
        s = env.current_state
        as_pairs = env.get_as_pairs()
        e = np.random.rand()
        if e < self.policy.eps:
            rand = np.random.randint(len(as_pairs))
            a = as_pairs[rand][0]
            sn = as_pairs[rand][1]
            _, v = search.alphabeta_batch_hist(self.V, faster_featurize,
                                               Environment(sn), list(env.hist),
                                               self.depth - 1, -float('inf'),
                                               float('inf'))
        else:
            a, v = self.get_av_pairs(env)

        if a is None:
            for st in env.hist:
                print chess.Board.from_epd(st)
            print('Value: {}, random e: {}, epsilon: {}').format(
                v, e, self.policy.eps)
            print 'as pairs: {}'.format(as_pairs)
            env.draw()

        r, s_n = env.perform_action(a)
        # r_w reward from whites perspective
        r_w = (-1 if env.get_turn() else 1) * r  # invert value if white

        self.data_thread.append((s, r, v))
        if Environment.terminal_state(s_n):
            self.data_thread.append((s_n, r, r))
            self.data_thread.set_outcome(env.outcome())
        return s, a, r, s_n

예제 #3

파일 보기

파일: td_stem.py 프로젝트: devdnhee/rookie

 def get_av_pairs(self, env):
     ao, o = search.alphabeta_outcome(None, None, env.current_state,
                                      self.depth, -float('inf'),
                                      float('inf'))
     a, v = search.alphabeta_batch_hist(self.approx.value, faster_featurize,
                                        env, list(env.hist),
                                        settings.params['OC_DEPTH'],
                                        -float('inf'), float('inf'))
     if o > 0:
         a = ao
     return [(a, v)]

예제 #4

파일 보기

파일: td_stem.py 프로젝트: devdnhee/rookie

 def take_action(self, env, a=None):
     ao, o = search.alphabeta_outcome(None, None, env.current_state,
                                      self.depth, -float('inf'),
                                      float('inf'))
     if o > 0:
         env.perform_action(ao)
     else:
         a, v = search.alphabeta_batch_hist(self.approx.value,
                                            faster_featurize, env,
                                            list(env.hist), self.depth,
                                            -float('inf'), float('inf'))
         env.perform_action(a)

예제 #5

파일 보기

파일: td_stem.py 프로젝트: devdnhee/rookie

 def get_av_pairs(self, env):
     t = time.time()
     a, v = search.alphabeta_batch_hist(self.V, faster_featurize, env,
                                        list(env.hist), self.depth,
                                        -float('inf'), float('inf'))
     t2 = time.time()
     ao, o = search.alphabeta_outcome(None, None, env.current_state,
                                      settings.params['OC_DEPTH'],
                                      -float('inf'), float('inf'))
     t3 = time.time()
     if o > 0:
         a = ao
         env.draw()
         print a, o, (t3 - t2) / (t2 - t)
     return a, v

예제 #6

파일 보기

 def get_av_pairs(self, env):
     return search.alphabeta_batch_hist(self.V, faster_featurize, env,
                                        list(env.hist), self.depth,
                                        -float('inf'), float('inf'))

예제 #7

파일 보기

 def take_action(self, env, a=None):
     a, v = search.alphabeta_batch_hist(self.approx.value, faster_featurize,
                                        env, list(env.hist), self.depth,
                                        -float('inf'), float('inf'))
     env.perform_action(a)