def step(self, action): """ Advance the world by one time step """ self.timestep += 1 self.action = action.ravel() self.action[np.nonzero(self.action)] = 1. # Actions 0-3 move the field of view to a higher-numbered row # (downward in the image) with varying magnitudes, and # actions 4-7 do the opposite. column_step = np.round(self.action[0] * self.MAX_STEP_SIZE / 2 + self.action[1] * self.MAX_STEP_SIZE / 4 + self.action[2] * self.MAX_STEP_SIZE / 8 + self.action[3] * self.MAX_STEP_SIZE / 16 - self.action[4] * self.MAX_STEP_SIZE / 2 - self.action[5] * self.MAX_STEP_SIZE / 4 - self.action[6] * self.MAX_STEP_SIZE / 8 - self.action[7] * self.MAX_STEP_SIZE / 16) column_step = np.round(column_step * ( 1 + self.NOISE_MAGNITUDE * np.random.random_sample() * 2.0 - self.NOISE_MAGNITUDE * np.random.random_sample() * 2.0)) self.column_step = column_step self.column_position = self.column_position + int(column_step) self.column_position = max(self.column_position, self.column_min) self.column_position = min(self.column_position, self.column_max) # At random intervals, jump to a random position in the world if np.random.random_sample() < self.JUMP_FRACTION: self.column_position = np.random.random_integers(self.column_min, self.column_max) # Create the sensory input vector fov = self.data[:, self.column_position - self.fov_width / 2: self.column_position + self.fov_width / 2] center_surround_pixels = wtools.center_surround(fov, self.fov_span, self.fov_span) unsplit_sensors = center_surround_pixels.ravel() self.sensors = np.concatenate((np.maximum(unsplit_sensors, 0), np.abs(np.minimum(unsplit_sensors, 0)))) # Calculate the reward self.reward = 0 if (np.abs(self.column_position - self.TARGET_COLUMN) < self.REWARD_REGION_WIDTH / 2.0): self.reward += self.REWARD_MAGNITUDE self.reward -= np.abs(column_step) / self.MAX_STEP_SIZE * self.STEP_COST return self.sensors, self.reward
def step(self, action): self.timestep += 1 self.action = action.ravel() # Actions 0-3 move the field of view to a higher-numbered row # (downward in the image) with varying magnitudes, and # actions 4-7 do the opposite. column_step = np.round(self.action[0] * self.MAX_STEP_SIZE / 2 + self.action[1] * self.MAX_STEP_SIZE / 4 + self.action[2] * self.MAX_STEP_SIZE / 8 + self.action[3] * self.MAX_STEP_SIZE / 16 - self.action[4] * self.MAX_STEP_SIZE / 2 - self.action[5] * self.MAX_STEP_SIZE / 4 - self.action[6] * self.MAX_STEP_SIZE / 8 - self.action[7] * self.MAX_STEP_SIZE / 16) column_step = np.round( column_step * (1 + self.NOISE_MAGNITUDE * np.random.random_sample() * 2.0 - self.NOISE_MAGNITUDE * np.random.random_sample() * 2.0)) self.column_step = column_step self.column_position = self.column_position + int(column_step) self.column_position = max(self.column_position, self.column_min) self.column_position = min(self.column_position, self.column_max) # At random intervals, jump to a random position in the world if np.random.random_sample() < self.JUMP_FRACTION: self.column_position = np.random.random_integers( self.column_min, self.column_max) # Create the sensory input vector fov = self.data[:, self.column_position - self.fov_width / 2:self.column_position + self.fov_width / 2] center_surround_pixels = wtools.center_surround( fov, self.fov_span, self.fov_span) unsplit_sensors = center_surround_pixels.ravel() self.sensors = np.concatenate( (np.maximum(unsplit_sensors, 0), np.abs(np.minimum(unsplit_sensors, 0)))) # Calculate the reward self.reward = 0 if (np.abs(self.column_position - self.TARGET_COLUMN) < self.REWARD_REGION_WIDTH / 2.0): self.reward += self.REWARD_MAGNITUDE self.reward -= np.abs( column_step) / self.MAX_STEP_SIZE * self.STEP_COST return self.sensors, self.reward
def step(self, action): """ Advance the world by one time step Parameters ---------- action : array of floats The set of action commands to execute. Returns ------- self.reward : float The amount of reward or punishment given by the world. self.sensors : array of floats The values of each of the sensors. """ self.timestep += 1 self.action = action.ravel() self.action[np.nonzero(self.action)] = 1. # Actions 0-3 move the field of view to a higher-numbered row # (downward in the image) with varying magnitudes, and # actions 4-7 do the opposite. column_step = np.round(self.action[0] * self.max_step_size / 2 + self.action[1] * self.max_step_size / 4 + self.action[2] * self.max_step_size / 8 + self.action[3] * self.max_step_size / 16 - self.action[4] * self.max_step_size / 2 - self.action[5] * self.max_step_size / 4 - self.action[6] * self.max_step_size / 8 - self.action[7] * self.max_step_size / 16) column_step = np.round(column_step * ( 1 + self.noise_magnitude * np.random.random_sample() * 2.0 - self.noise_magnitude * np.random.random_sample() * 2.0)) self.column_step = column_step self.column_position = self.column_position + int(column_step) self.column_position = max(self.column_position, self.column_min) self.column_position = min(self.column_position, self.column_max) self.column_history.append(self.column_position) # At random intervals, jump to a random position in the world if np.random.random_sample() < self.jump_fraction: self.column_position = np.random.random_integers(self.column_min, self.column_max) # Create the sensory input vector fov = self.data[:, self.column_position - self.fov_width / 2: self.column_position + self.fov_width / 2] center_surround_pixels = wtools.center_surround(fov, self.fov_span, self.fov_span) unsplit_sensors = center_surround_pixels.ravel() self.sensors = np.concatenate((np.maximum(unsplit_sensors, 0), np.abs(np.minimum(unsplit_sensors, 0)))) # Calculate the reward self.reward = 0 if (np.abs(self.column_position - self.target_column) < self.reward_region_width / 2.0): self.reward += self.reward_magnitude self.reward -= (np.abs(column_step) / self.max_step_size * self.step_cost) return self.sensors, self.reward
def step(self, action): """ Advance the world by one time step. Parameters ---------- action : array of floats The set of action commands to execute. Returns ------- self.reward : float The amount of reward or punishment given by the world. self.sensors : array of floats The values of each of the sensors. """ self.timestep += 1 self.action = action.ravel() self.action[np.nonzero(self.action)] = 1. # Actions 0-3 move the field of view to a higher-numbered # row (downward in the image_data) with varying magnitudes, # and actions 4-7 do the opposite. # Actions 8-11 move the field of view to a higher-numbered # column (rightward in the image_data) with varying magnitudes, # and actions 12-15 do the opposite. row_step = np.round(action[0] * self.max_step_size / 2 + action[1] * self.max_step_size / 4 + action[2] * self.max_step_size / 8 + action[3] * self.max_step_size / 16 - action[4] * self.max_step_size / 2 - action[5] * self.max_step_size / 4 - action[6] * self.max_step_size / 8 - action[7] * self.max_step_size / 16) column_step = np.round(action[8] * self.max_step_size / 2 + action[9] * self.max_step_size / 4 + action[10] * self.max_step_size / 8 + action[11] * self.max_step_size / 16 - action[12] * self.max_step_size / 2 - action[13] * self.max_step_size / 4 - action[14] * self.max_step_size / 8 - action[15] * self.max_step_size / 16) row_step = np.round(row_step * ( 1 + np.random.normal(scale=self.noise_magnitude))) column_step = np.round(column_step * ( 1 + np.random.normal(scale=self.noise_magnitude))) self.row_position = self.row_position + int(row_step) self.column_position = self.column_position + int(column_step) # Respect the boundaries of the image_data self.row_position = max(self.row_position, self.row_min) self.row_position = min(self.row_position, self.row_max) self.column_position = max(self.column_position, self.column_min) self.column_position = min(self.column_position, self.column_max) # At random intervals, jump to a random position in the world if np.random.random_sample() < self.jump_fraction: self.column_position = np.random.random_integers(self.column_min, self.column_max) self.row_position = np.random.random_integers(self.row_min, self.row_max) self.row_history.append(self.row_position) self.column_history.append(self.column_position) # Create the sensory input vector fov = self.image_data[self.row_position - self.fov_height / 2: self.row_position + self.fov_height / 2, self.column_position - self.fov_width / 2: self.column_position + self.fov_width / 2] center_surround_pixels = wtools.center_surround(fov, self.fov_span, self.fov_span) unsplit_sensors = center_surround_pixels.ravel() self.sensors = np.concatenate((np.maximum(unsplit_sensors, 0), np.abs(np.minimum(unsplit_sensors, 0)))) self.reward = 0 if ((np.abs(self.column_position - self.target_column) < self.reward_region_width / 2) and (np.abs(self.row_position - self.target_row) < self.reward_region_width / 2)): self.reward += self.reward_magnitude return self.sensors, self.reward
def step(self, action): """ Advance the world by one time step """ self.timestep += 1 self.action = action.ravel() self.action[np.nonzero(self.action)] = 1. # Actions 0-3 move the field of view to a higher-numbered # row (downward in the block_image_data) with varying magnitudes, # and actions 4-7 do the opposite. # Actions 8-11 move the field of view to a higher-numbered # column (rightward in the block_image_data) with varying magnitudes, # and actions 12-15 do the opposite. row_step = np.round(action[0] * self.MAX_STEP_SIZE / 2 + action[1] * self.MAX_STEP_SIZE / 4 + action[2] * self.MAX_STEP_SIZE / 8 + action[3] * self.MAX_STEP_SIZE / 16 - action[4] * self.MAX_STEP_SIZE / 2 - action[5] * self.MAX_STEP_SIZE / 4 - action[6] * self.MAX_STEP_SIZE / 8 - action[7] * self.MAX_STEP_SIZE / 16) column_step = np.round(action[8] * self.MAX_STEP_SIZE / 2 + action[9] * self.MAX_STEP_SIZE / 4 + action[10] * self.MAX_STEP_SIZE / 8 + action[11] * self.MAX_STEP_SIZE / 16 - action[12] * self.MAX_STEP_SIZE / 2 - action[13] * self.MAX_STEP_SIZE / 4 - action[14] * self.MAX_STEP_SIZE / 8 - action[15] * self.MAX_STEP_SIZE / 16) row_step = np.round(row_step * ( 1 + np.random.normal(scale=self.NOISE_MAGNITUDE))) column_step = np.round(column_step * ( 1 + np.random.normal(scale=self.NOISE_MAGNITUDE))) self.row_position = self.row_position + int(row_step) self.column_position = self.column_position + int(column_step) # Respect the boundaries of the block_image_data self.row_position = max(self.row_position, self.row_min) self.row_position = min(self.row_position, self.row_max) self.column_position = max(self.column_position, self.column_min) self.column_position = min(self.column_position, self.column_max) # At random intervals, jump to a random position in the world if np.random.random_sample() < self.JUMP_FRACTION: self.column_position = np.random.random_integers(self.column_min, self.column_max) self.row_position = np.random.random_integers(self.row_min, self.row_max) # Create the sensory input vector fov = self.block_image_data[self.row_position - self.fov_height / 2: self.row_position + self.fov_height / 2, self.column_position - self.fov_width / 2: self.column_position + self.fov_width / 2] center_surround_pixels = wtools.center_surround(fov, self.fov_span, self.fov_span) unsplit_sensors = center_surround_pixels.ravel() self.sensors = np.concatenate((np.maximum(unsplit_sensors, 0), np.abs(np.minimum(unsplit_sensors, 0)))) self.reward = 0 if ((np.abs(self.column_position - self.TARGET_COLUMN) < self.REWARD_REGION_WIDTH / 2) and (np.abs(self.row_position - self.TARGET_ROW) < self.REWARD_REGION_WIDTH / 2)): self.reward += self.REWARD_MAGNITUDE return self.sensors, self.reward
def step(self, action): """ Advance the world by one time step Parameters ---------- action : array of floats The set of action commands to execute. Returns ------- self.reward : float The amount of reward or punishment given by the world. self.sensors : array of floats The values of each of the sensors. """ self.timestep += 1 self.action = action.ravel() self.action[np.nonzero(self.action)] = 1. # Actions 0-3 move the field of view to a higher-numbered row # (downward in the image) with varying magnitudes, and # actions 4-7 do the opposite. column_step = np.round(self.action[0] * self.max_step_size / 2 + self.action[1] * self.max_step_size / 4 + self.action[2] * self.max_step_size / 8 + self.action[3] * self.max_step_size / 16 - self.action[4] * self.max_step_size / 2 - self.action[5] * self.max_step_size / 4 - self.action[6] * self.max_step_size / 8 - self.action[7] * self.max_step_size / 16) column_step = np.round( column_step * (1 + self.noise_magnitude * np.random.random_sample() * 2.0 - self.noise_magnitude * np.random.random_sample() * 2.0)) self.column_step = column_step self.column_position = self.column_position + int(column_step) self.column_position = max(self.column_position, self.column_min) self.column_position = min(self.column_position, self.column_max) self.column_history.append(self.column_position) # At random intervals, jump to a random position in the world if np.random.random_sample() < self.jump_fraction: self.column_position = np.random.random_integers( self.column_min, self.column_max) # Create the sensory input vector fov = self.data[:, self.column_position - self.fov_width / 2:self.column_position + self.fov_width / 2] center_surround_pixels = wtools.center_surround( fov, self.fov_span, self.fov_span) unsplit_sensors = center_surround_pixels.ravel() self.sensors = np.concatenate( (np.maximum(unsplit_sensors, 0), np.abs(np.minimum(unsplit_sensors, 0)))) # Calculate the reward self.reward = 0 if (np.abs(self.column_position - self.target_column) < self.reward_region_width / 2.0): self.reward += self.reward_magnitude self.reward -= (np.abs(column_step) / self.max_step_size * self.step_cost) return self.sensors, self.reward
def step(self, action): self.timestep += 1 self.action = action.ravel() # Actions 0-3 move the field of view to a higher-numbered # row (downward in the block_image_data) with varying magnitudes, # and actions 4-7 do the opposite. # Actions 8-11 move the field of view to a higher-numbered # column (rightward in the block_image_data) with varying magnitudes, # and actions 12-15 do the opposite. row_step = np.round(action[0] * self.MAX_STEP_SIZE / 2 + action[1] * self.MAX_STEP_SIZE / 4 + action[2] * self.MAX_STEP_SIZE / 8 + action[3] * self.MAX_STEP_SIZE / 16 - action[4] * self.MAX_STEP_SIZE / 2 - action[5] * self.MAX_STEP_SIZE / 4 - action[6] * self.MAX_STEP_SIZE / 8 - action[7] * self.MAX_STEP_SIZE / 16) column_step = np.round(action[8] * self.MAX_STEP_SIZE / 2 + action[9] * self.MAX_STEP_SIZE / 4 + action[10] * self.MAX_STEP_SIZE / 8 + action[11] * self.MAX_STEP_SIZE / 16 - action[12] * self.MAX_STEP_SIZE / 2 - action[13] * self.MAX_STEP_SIZE / 4 - action[14] * self.MAX_STEP_SIZE / 8 - action[15] * self.MAX_STEP_SIZE / 16) row_step = np.round(row_step * (1 + np.random.normal(scale=self.NOISE_MAGNITUDE))) column_step = np.round( column_step * (1 + np.random.normal(scale=self.NOISE_MAGNITUDE))) self.row_position = self.row_position + int(row_step) self.column_position = self.column_position + int(column_step) # Respect the boundaries of the block_image_data self.row_position = max(self.row_position, self.row_min) self.row_position = min(self.row_position, self.row_max) self.column_position = max(self.column_position, self.column_min) self.column_position = min(self.column_position, self.column_max) # At random intervals, jump to a random position in the world if np.random.random_sample() < self.JUMP_FRACTION: self.column_position = np.random.random_integers( self.column_min, self.column_max) self.row_position = np.random.random_integers( self.row_min, self.row_max) # Create the sensory input vector fov = self.block_image_data[self.row_position - self.fov_height / 2:self.row_position + self.fov_height / 2, self.column_position - self.fov_width / 2:self.column_position + self.fov_width / 2] center_surround_pixels = wtools.center_surround( fov, self.fov_span, self.fov_span) unsplit_sensors = center_surround_pixels.ravel() self.sensors = np.concatenate( (np.maximum(unsplit_sensors, 0), np.abs(np.minimum(unsplit_sensors, 0)))) self.reward = 0 if ((np.abs(self.column_position - self.TARGET_COLUMN) < self.REWARD_REGION_WIDTH / 2) and (np.abs(self.row_position - self.TARGET_ROW) < self.REWARD_REGION_WIDTH / 2)): self.reward += self.REWARD_MAGNITUDE return self.sensors, self.reward