class ChessEnv_v0(gym.Env): reward_range = (-1.0, 1.0) #does this matter? action_space = box.Box(0, 1, shape=(2, 64)) observation_space = box.Box(0, 1, shape=(8, 8, 12)) def reset(self): self.board = chess.Board() self.oh_board = oneHotBoard(self.board) return self.oh_board def step(self, action): move = numbersToMove(action[0], action[1]) legal = move in self.board.legal_moves if legal: self.board.push(move) else: move = randomAgent(self.board) self.board.push(move) self.oh_board = oneHotBoard(self.board) obs = self.oh_board rew = int(legal) done = self.board.is_checkmate() or self.board.can_claim_draw() info = None return obs, rew, done, info
def get_descriptor(): """Creates an EnvironmentDescriptor.""" plane_size = FLAGS.n_history_frames if ( FLAGS.encode_actions == 0) else int(2 * FLAGS.n_history_frames) if not FLAGS.grayscale: plane_size *= 3 reward_range, value_range = RANGES reward_range = tuple(map(mzcore.inverse_contractive_mapping, reward_range)) value_range = tuple(map(mzcore.inverse_contractive_mapping, value_range)) observation_space = box.Box(low=0., high=1., shape=(FLAGS.screen_size, FLAGS.screen_size, plane_size), dtype=np.float32) return mzcore.EnvironmentDescriptor( observation_space=observation_space, action_space=gym.make( get_game_id(), full_action_space=(FLAGS.full_action_space == 1)).action_space, reward_range=reward_range, value_range=value_range, pretraining_space=gym.spaces.Tuple([ observation_space, observation_space, ]), )
def __init__(self, env, frame_skip=4, terminal_on_life_loss=False, screen_size=84): """Constructor for an Atari 2600 preprocessor. Args: env: Gym environment whose observations are preprocessed. frame_skip: int, the frequency at which the agent experiences the game. terminal_on_life_loss: bool, If True, the step() method returns is_terminal=True whenever a life is lost. See Mnih et al. 2015. screen_size: int, size of a resized Atari 2600 frame. Raises: ValueError: if frame_skip or screen_size are not strictly positive. """ super(AtariPreprocessing, self).__init__(env) # Return the observation space adjusted to match the shape of the processed # observations. self.observation_space = box.Box(low=0, high=255, shape=(screen_size, screen_size, 1), dtype=np.uint8) if frame_skip <= 0: raise ValueError( 'Frame skip should be strictly positive, got {}'.format( frame_skip)) if screen_size <= 0: raise ValueError( 'Target screen size should be strictly positive, got {}'. format(screen_size)) self.terminal_on_life_loss = terminal_on_life_loss self.frame_skip = frame_skip self.screen_size = screen_size obs_dims = self.env.observation_space # Stores temporary observations used for pooling over two successive # frames. self.screen_buffer = [ np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8), np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8) ] self.game_over = False self.lives = 0 # Will need to be set by reset().
def __init__(self, env, obs_dimorder=None, add_action_to_obs=False): super(obsTupleWrap, self).__init__(env) self.obs_dimorder = obs_dimorder self.add_action_to_obs = add_action_to_obs if obs_dimorder is not None: new_obsspace = gym_box.Box(np.transpose(self.env.observation_space.low, self.obs_dimorder), np.transpose(self.env.observation_space.high, self.obs_dimorder)) if self.add_action_to_obs: self.observation_space = gym_tuple((new_obsspace, self.env.action_space)) else: self.observation_space = gym_tuple((new_obsspace,)) else: if self.add_action_to_obs: self.observation_space = gym_tuple((self.env.observation_space, self.env.action_space)) else: self.observation_space = gym_tuple((self.env.observation_space,))
def __init__(self): self.graphics = False self.s = 2 #the scale of the graphics self.minShootDist = 5 #This is the MINIMUM Distance from away the target self.maxShootDist = 10 #This is the MAXIMUM Distance away from the target self.a = self.reward_point() self.w = 5 #width of robot self.t = 0.5 #round to the nearest .5 seconds when dealing with time self.x0 = 0 #robot's starting x-position self.y0 = 0 #robot's starting y-position self.x = self.x0 #robot's current x-position. self.y = self.y0 #robot's current y-position. self.facing = 0 #the direction that the robot is facing, in standard position self.l_speed = 0 #left wheel speed self.r_speed = 0 #right wheel speed self.is_over = False #the game is not over yet. self.reward = 0 #the a rewarded to the robot during the simulation self.counter = 0 self.observation_space = b.Box(0, 1.0, shape=(int(821 / 10), int(1598 / 10), 24)) #The structure of the data that will be returned by the environment. It's the dimensions of the field (without obstacles at the moment) #The box is technically a 1x1x1 cube. self.action_space = ActionSpace() #The range of speeds that the wheel can have. self.path = [] self.fast_mode = 0