예제 #1
0
class ChessEnv_v0(gym.Env):
    reward_range = (-1.0, 1.0) #does this matter?
    
    action_space = box.Box(0, 1, shape=(2, 64))
    observation_space = box.Box(0, 1, shape=(8, 8, 12))
    
    def reset(self):
        self.board = chess.Board()
        self.oh_board = oneHotBoard(self.board)
        return self.oh_board 
    
    def step(self, action):
        move = numbersToMove(action[0], action[1])
        legal = move in self.board.legal_moves
        if legal:
            self.board.push(move)
        else:
            move = randomAgent(self.board)
            self.board.push(move)
        self.oh_board = oneHotBoard(self.board)
        
        obs = self.oh_board
        rew = int(legal)
        done = self.board.is_checkmate() or self.board.can_claim_draw()
        info = None        
        
        return obs, rew, done, info
예제 #2
0
def get_descriptor():
    """Creates an EnvironmentDescriptor."""
    plane_size = FLAGS.n_history_frames if (
        FLAGS.encode_actions == 0) else int(2 * FLAGS.n_history_frames)
    if not FLAGS.grayscale:
        plane_size *= 3
    reward_range, value_range = RANGES
    reward_range = tuple(map(mzcore.inverse_contractive_mapping, reward_range))
    value_range = tuple(map(mzcore.inverse_contractive_mapping, value_range))
    observation_space = box.Box(low=0.,
                                high=1.,
                                shape=(FLAGS.screen_size, FLAGS.screen_size,
                                       plane_size),
                                dtype=np.float32)
    return mzcore.EnvironmentDescriptor(
        observation_space=observation_space,
        action_space=gym.make(
            get_game_id(),
            full_action_space=(FLAGS.full_action_space == 1)).action_space,
        reward_range=reward_range,
        value_range=value_range,
        pretraining_space=gym.spaces.Tuple([
            observation_space,
            observation_space,
        ]),
    )
예제 #3
0
    def __init__(self,
                 env,
                 frame_skip=4,
                 terminal_on_life_loss=False,
                 screen_size=84):
        """Constructor for an Atari 2600 preprocessor.

    Args:
      env: Gym environment whose observations are preprocessed.
      frame_skip: int, the frequency at which the agent experiences the game.
      terminal_on_life_loss: bool, If True, the step() method returns
        is_terminal=True whenever a life is lost. See Mnih et al. 2015.
      screen_size: int, size of a resized Atari 2600 frame.

    Raises:
      ValueError: if frame_skip or screen_size are not strictly positive.
    """
        super(AtariPreprocessing, self).__init__(env)

        # Return the observation space adjusted to match the shape of the processed
        # observations.
        self.observation_space = box.Box(low=0,
                                         high=255,
                                         shape=(screen_size, screen_size, 1),
                                         dtype=np.uint8)

        if frame_skip <= 0:
            raise ValueError(
                'Frame skip should be strictly positive, got {}'.format(
                    frame_skip))
        if screen_size <= 0:
            raise ValueError(
                'Target screen size should be strictly positive, got {}'.
                format(screen_size))

        self.terminal_on_life_loss = terminal_on_life_loss
        self.frame_skip = frame_skip
        self.screen_size = screen_size

        obs_dims = self.env.observation_space
        # Stores temporary observations used for pooling over two successive
        # frames.
        self.screen_buffer = [
            np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8),
            np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8)
        ]

        self.game_over = False
        self.lives = 0  # Will need to be set by reset().
예제 #4
0
 def __init__(self, env, obs_dimorder=None, add_action_to_obs=False):
     super(obsTupleWrap, self).__init__(env)
     self.obs_dimorder = obs_dimorder
     self.add_action_to_obs = add_action_to_obs
     if obs_dimorder is not None:
         new_obsspace = gym_box.Box(np.transpose(self.env.observation_space.low,  self.obs_dimorder),
                     np.transpose(self.env.observation_space.high, self.obs_dimorder))
         if self.add_action_to_obs:
             self.observation_space = gym_tuple((new_obsspace, self.env.action_space))
         else:
             self.observation_space = gym_tuple((new_obsspace,))
     else:
         if self.add_action_to_obs:
             self.observation_space = gym_tuple((self.env.observation_space, self.env.action_space))
         else:
             self.observation_space = gym_tuple((self.env.observation_space,))
예제 #5
0
 def __init__(self):
     self.graphics = False
     self.s = 2
     #the scale of the graphics
     self.minShootDist = 5  #This is the MINIMUM Distance from away the target
     self.maxShootDist = 10  #This is the MAXIMUM Distance away from the target
     self.a = self.reward_point()
     self.w = 5
     #width of robot
     self.t = 0.5
     #round to the nearest .5 seconds when dealing with time
     self.x0 = 0
     #robot's starting x-position
     self.y0 = 0
     #robot's starting y-position
     self.x = self.x0
     #robot's current x-position.
     self.y = self.y0
     #robot's current y-position.
     self.facing = 0
     #the direction that the robot is facing, in standard position
     self.l_speed = 0
     #left wheel speed
     self.r_speed = 0
     #right wheel speed
     self.is_over = False
     #the game is not over yet.
     self.reward = 0  #the  a rewarded to the robot during the simulation
     self.counter = 0
     self.observation_space = b.Box(0,
                                    1.0,
                                    shape=(int(821 / 10), int(1598 / 10),
                                           24))
     #The structure of the data that will be returned by the environment. It's the dimensions of the field (without obstacles at the moment)
     #The box is technically a 1x1x1 cube.
     self.action_space = ActionSpace()
     #The range of speeds that the wheel can have.
     self.path = []
     self.fast_mode = 0