예제 #1
0
class VacuumEnv(gym.Env):
    def __init__(self, size=8, max_episodes=100, gen_proba=0.5, collide='sym'):
        self.viewer = None
        self.done = None
        self.game = Game(max_episodes, gen_proba, size, collide)
        self.action_space = spaces.Discrete(4)
        self.observation_space = spaces.Box(0, 1, shape=(4, ), dtype=np.uint8)

    def step(self, action=None):
        obs = self.game.step(action)
        self.done = self.game.finished()
        reward = -self.game.garbage_count if self.done else 0
        info = {
            'garbage_count': self.game.garbage_count,
            'gathered': self.game.agent.gathered,
            'episodes_left': self.game.episodes_left
        }
        return np.array(obs), reward, self.done, info

    def reset(self):
        obs = self.game.reset()
        self.done = False
        return np.array(obs)

    def render(self, mode='window', size='small'):
        if self.viewer is None:
            if mode == 'window':
                self.viewer = WindowVisualizer(self.game, size)
            elif mode == 'terminal':
                self.viewer = TerminalVisualizer(self.game)

        self.viewer.render()

    def close(self):
        if self.viewer:
            self.viewer.close()
            self.viewer = None
예제 #2
0
# -*- coding:utf-8 -*-
import random
import time
from core import Game, Grid
from agents import RandomAgent, MinMaxAgent

if __name__ == '__main__':
    env = Game()
    # agent = RandomAgent(action_num=4, env=env)
    agent = MinMaxAgent(env=env, maxSearchTime=100)

    while not env.isOver:
        act, move = agent.step()
        print("当前下法:", move)
        env.step(act)
        print("当前棋盘:\n{}".format(env.state))
        print("当前最大值:{}\n".format(env.maxValue))
    print("最终得分:", env.score)