Example #1
0
 def step(self, action): 
     """
     Advance the world by one time step
     """
     self.timestep += 1
     self.action = action.ravel() 
     self.action[np.nonzero(self.action)] = 1.
     # Actions 0-3 move the field of view to a higher-numbered row 
     # (downward in the image) with varying magnitudes, and 
     # actions 4-7 do the opposite.
     column_step = np.round(self.action[0] * self.MAX_STEP_SIZE / 2 + 
                            self.action[1] * self.MAX_STEP_SIZE / 4 + 
                            self.action[2] * self.MAX_STEP_SIZE / 8 + 
                            self.action[3] * self.MAX_STEP_SIZE / 16 - 
                            self.action[4] * self.MAX_STEP_SIZE / 2 - 
                            self.action[5] * self.MAX_STEP_SIZE / 4 - 
                            self.action[6] * self.MAX_STEP_SIZE / 8 - 
                            self.action[7] * self.MAX_STEP_SIZE / 16)
     column_step = np.round(column_step * (
             1 + self.NOISE_MAGNITUDE * np.random.random_sample() * 2.0 - 
             self.NOISE_MAGNITUDE * np.random.random_sample() * 2.0))
     self.column_step = column_step
     self.column_position = self.column_position + int(column_step)
     self.column_position = max(self.column_position, self.column_min)
     self.column_position = min(self.column_position, self.column_max)
     # At random intervals, jump to a random position in the world
     if np.random.random_sample() < self.JUMP_FRACTION:
         self.column_position = np.random.random_integers(self.column_min, 
                                                          self.column_max)
     # Create the sensory input vector
     fov = self.data[:, self.column_position - self.fov_width / 2: 
                        self.column_position + self.fov_width / 2]
     center_surround_pixels = wtools.center_surround(fov, self.fov_span,
                                                          self.fov_span)
     unsplit_sensors = center_surround_pixels.ravel()        
     self.sensors = np.concatenate((np.maximum(unsplit_sensors, 0), 
                                    np.abs(np.minimum(unsplit_sensors, 0))))
     
     # Calculate the reward
     self.reward = 0
     if (np.abs(self.column_position - self.TARGET_COLUMN) < 
             self.REWARD_REGION_WIDTH / 2.0):
         self.reward += self.REWARD_MAGNITUDE
     self.reward -= np.abs(column_step) / self.MAX_STEP_SIZE * self.STEP_COST
     return self.sensors, self.reward
Example #2
0
    def step(self, action):
        self.timestep += 1
        self.action = action.ravel()
        # Actions 0-3 move the field of view to a higher-numbered row
        # (downward in the image) with varying magnitudes, and
        # actions 4-7 do the opposite.
        column_step = np.round(self.action[0] * self.MAX_STEP_SIZE / 2 +
                               self.action[1] * self.MAX_STEP_SIZE / 4 +
                               self.action[2] * self.MAX_STEP_SIZE / 8 +
                               self.action[3] * self.MAX_STEP_SIZE / 16 -
                               self.action[4] * self.MAX_STEP_SIZE / 2 -
                               self.action[5] * self.MAX_STEP_SIZE / 4 -
                               self.action[6] * self.MAX_STEP_SIZE / 8 -
                               self.action[7] * self.MAX_STEP_SIZE / 16)
        column_step = np.round(
            column_step *
            (1 + self.NOISE_MAGNITUDE * np.random.random_sample() * 2.0 -
             self.NOISE_MAGNITUDE * np.random.random_sample() * 2.0))
        self.column_step = column_step
        self.column_position = self.column_position + int(column_step)
        self.column_position = max(self.column_position, self.column_min)
        self.column_position = min(self.column_position, self.column_max)
        # At random intervals, jump to a random position in the world
        if np.random.random_sample() < self.JUMP_FRACTION:
            self.column_position = np.random.random_integers(
                self.column_min, self.column_max)
        # Create the sensory input vector
        fov = self.data[:, self.column_position -
                        self.fov_width / 2:self.column_position +
                        self.fov_width / 2]
        center_surround_pixels = wtools.center_surround(
            fov, self.fov_span, self.fov_span)
        unsplit_sensors = center_surround_pixels.ravel()
        self.sensors = np.concatenate(
            (np.maximum(unsplit_sensors,
                        0), np.abs(np.minimum(unsplit_sensors, 0))))

        # Calculate the reward
        self.reward = 0
        if (np.abs(self.column_position - self.TARGET_COLUMN) <
                self.REWARD_REGION_WIDTH / 2.0):
            self.reward += self.REWARD_MAGNITUDE
        self.reward -= np.abs(
            column_step) / self.MAX_STEP_SIZE * self.STEP_COST
        return self.sensors, self.reward
Example #3
0
    def step(self, action): 
        """
        Advance the world by one time step

        Parameters
        ----------
        action : array of floats
            The set of action commands to execute.

        Returns
        -------
        self.reward : float
            The amount of reward or punishment given by the world.
        self.sensors : array of floats
            The values of each of the sensors.
        """
        self.timestep += 1
        self.action = action.ravel() 
        self.action[np.nonzero(self.action)] = 1.

        # Actions 0-3 move the field of view to a higher-numbered row 
        # (downward in the image) with varying magnitudes, and 
        # actions 4-7 do the opposite.
        column_step = np.round(self.action[0] * self.max_step_size / 2 + 
                               self.action[1] * self.max_step_size / 4 + 
                               self.action[2] * self.max_step_size / 8 + 
                               self.action[3] * self.max_step_size / 16 - 
                               self.action[4] * self.max_step_size / 2 - 
                               self.action[5] * self.max_step_size / 4 - 
                               self.action[6] * self.max_step_size / 8 - 
                               self.action[7] * self.max_step_size / 16)
        column_step = np.round(column_step * (
                1 + self.noise_magnitude * np.random.random_sample() * 2.0 - 
                self.noise_magnitude * np.random.random_sample() * 2.0))
        self.column_step = column_step
        self.column_position = self.column_position + int(column_step)
        self.column_position = max(self.column_position, self.column_min)
        self.column_position = min(self.column_position, self.column_max)
        self.column_history.append(self.column_position)

        # At random intervals, jump to a random position in the world
        if np.random.random_sample() < self.jump_fraction:
            self.column_position = np.random.random_integers(self.column_min, 
                                                             self.column_max)
        # Create the sensory input vector
        fov = self.data[:, self.column_position - self.fov_width / 2: 
                           self.column_position + self.fov_width / 2]
        center_surround_pixels = wtools.center_surround(fov, self.fov_span,
                                                             self.fov_span)
        unsplit_sensors = center_surround_pixels.ravel()        
        self.sensors = np.concatenate((np.maximum(unsplit_sensors, 0), 
                                       np.abs(np.minimum(unsplit_sensors, 0))))
        
        
        # Calculate the reward
        self.reward = 0
        if (np.abs(self.column_position - self.target_column) < 
            self.reward_region_width / 2.0):
            self.reward += self.reward_magnitude
        self.reward -= (np.abs(column_step) / 
                        self.max_step_size * self.step_cost)
        return self.sensors, self.reward
Example #4
0
    def step(self, action): 
        """
        Advance the world by one time step.

        Parameters
        ----------
        action : array of floats
            The set of action commands to execute.

        Returns
        -------
        self.reward : float
            The amount of reward or punishment given by the world.
        self.sensors : array of floats
            The values of each of the sensors.
        """
        self.timestep += 1
        self.action = action.ravel()
        self.action[np.nonzero(self.action)] = 1.

        # Actions 0-3 move the field of view to a higher-numbered 
        # row (downward in the image_data) with varying magnitudes, 
        # and actions 4-7 do the opposite.
        # Actions 8-11 move the field of view to a higher-numbered 
        # column (rightward in the image_data) with varying magnitudes, 
        # and actions 12-15 do the opposite.
        row_step    = np.round(action[0] * self.max_step_size / 2 + 
                               action[1] * self.max_step_size / 4 + 
                               action[2] * self.max_step_size / 8 + 
                               action[3] * self.max_step_size / 16 - 
                               action[4] * self.max_step_size / 2 - 
                               action[5] * self.max_step_size / 4 - 
                               action[6] * self.max_step_size / 8 - 
                               action[7] * self.max_step_size / 16)
        column_step = np.round(action[8] * self.max_step_size / 2 + 
                               action[9] * self.max_step_size / 4 + 
                               action[10] * self.max_step_size / 8 + 
                               action[11] * self.max_step_size / 16 - 
                               action[12] * self.max_step_size / 2 - 
                               action[13] * self.max_step_size / 4 - 
                               action[14] * self.max_step_size / 8 - 
                               action[15] * self.max_step_size / 16)
        
        row_step = np.round(row_step * (
                1 + np.random.normal(scale=self.noise_magnitude)))
        column_step = np.round(column_step * (
                1 + np.random.normal(scale=self.noise_magnitude)))
        self.row_position = self.row_position + int(row_step)
        self.column_position = self.column_position + int(column_step)

        # Respect the boundaries of the image_data
        self.row_position = max(self.row_position, self.row_min)
        self.row_position = min(self.row_position, self.row_max)
        self.column_position = max(self.column_position, self.column_min)
        self.column_position = min(self.column_position, self.column_max)

        # At random intervals, jump to a random position in the world
        if np.random.random_sample() < self.jump_fraction:
            self.column_position = np.random.random_integers(self.column_min, 
                                                             self.column_max)
            self.row_position = np.random.random_integers(self.row_min, 
                                                          self.row_max)
        self.row_history.append(self.row_position)
        self.column_history.append(self.column_position)

        # Create the sensory input vector
        fov = self.image_data[self.row_position - self.fov_height / 2: 
                              self.row_position + self.fov_height / 2, 
                              self.column_position - self.fov_width / 2: 
                              self.column_position + self.fov_width / 2]
        center_surround_pixels = wtools.center_surround(fov, self.fov_span, 
                                                             self.fov_span)
        unsplit_sensors = center_surround_pixels.ravel()
        self.sensors = np.concatenate((np.maximum(unsplit_sensors, 0), 
                                       np.abs(np.minimum(unsplit_sensors, 0))))

        self.reward = 0
        if ((np.abs(self.column_position - self.target_column) < 
             self.reward_region_width / 2) and 
            (np.abs(self.row_position - self.target_row) < 
             self.reward_region_width / 2)):
            self.reward += self.reward_magnitude

        return self.sensors, self.reward
Example #5
0
    def step(self, action): 
        """
        Advance the world by one time step
        """
        self.timestep += 1
        self.action = action.ravel()
        self.action[np.nonzero(self.action)] = 1.
        # Actions 0-3 move the field of view to a higher-numbered 
        # row (downward in the block_image_data) with varying magnitudes, 
        # and actions 4-7 do the opposite.
        # Actions 8-11 move the field of view to a higher-numbered 
        # column (rightward in the block_image_data) with varying magnitudes, 
        # and actions 12-15 do the opposite.
        row_step    = np.round(action[0] * self.MAX_STEP_SIZE / 2 + 
                               action[1] * self.MAX_STEP_SIZE / 4 + 
                               action[2] * self.MAX_STEP_SIZE / 8 + 
                               action[3] * self.MAX_STEP_SIZE / 16 - 
                               action[4] * self.MAX_STEP_SIZE / 2 - 
                               action[5] * self.MAX_STEP_SIZE / 4 - 
                               action[6] * self.MAX_STEP_SIZE / 8 - 
                               action[7] * self.MAX_STEP_SIZE / 16)
        column_step = np.round(action[8] * self.MAX_STEP_SIZE / 2 + 
                               action[9] * self.MAX_STEP_SIZE / 4 + 
                               action[10] * self.MAX_STEP_SIZE / 8 + 
                               action[11] * self.MAX_STEP_SIZE / 16 - 
                               action[12] * self.MAX_STEP_SIZE / 2 - 
                               action[13] * self.MAX_STEP_SIZE / 4 - 
                               action[14] * self.MAX_STEP_SIZE / 8 - 
                               action[15] * self.MAX_STEP_SIZE / 16)
        
        row_step = np.round(row_step * (
                1 + np.random.normal(scale=self.NOISE_MAGNITUDE)))
        column_step = np.round(column_step * (
                1 + np.random.normal(scale=self.NOISE_MAGNITUDE)))
        self.row_position = self.row_position + int(row_step)
        self.column_position = self.column_position + int(column_step)
        # Respect the boundaries of the block_image_data
        self.row_position = max(self.row_position, self.row_min)
        self.row_position = min(self.row_position, self.row_max)
        self.column_position = max(self.column_position, self.column_min)
        self.column_position = min(self.column_position, self.column_max)

        # At random intervals, jump to a random position in the world
        if np.random.random_sample() < self.JUMP_FRACTION:
            self.column_position = np.random.random_integers(self.column_min, 
                                                             self.column_max)
            self.row_position = np.random.random_integers(self.row_min, 
                                                          self.row_max)

        # Create the sensory input vector
        fov = self.block_image_data[self.row_position - self.fov_height / 2: 
                                    self.row_position + self.fov_height / 2, 
                                    self.column_position - self.fov_width / 2: 
                                    self.column_position + self.fov_width / 2]
        center_surround_pixels = wtools.center_surround(fov, self.fov_span, 
                                                             self.fov_span)
        unsplit_sensors = center_surround_pixels.ravel()
        self.sensors = np.concatenate((np.maximum(unsplit_sensors, 0), 
                                       np.abs(np.minimum(unsplit_sensors, 0))))

        self.reward = 0
        if ((np.abs(self.column_position - self.TARGET_COLUMN) < 
             self.REWARD_REGION_WIDTH / 2) and 
            (np.abs(self.row_position - self.TARGET_ROW) < 
             self.REWARD_REGION_WIDTH / 2)):
            self.reward += self.REWARD_MAGNITUDE
        return self.sensors, self.reward
Example #6
0
    def step(self, action):
        """
        Advance the world by one time step

        Parameters
        ----------
        action : array of floats
            The set of action commands to execute.

        Returns
        -------
        self.reward : float
            The amount of reward or punishment given by the world.
        self.sensors : array of floats
            The values of each of the sensors.
        """
        self.timestep += 1
        self.action = action.ravel()
        self.action[np.nonzero(self.action)] = 1.

        # Actions 0-3 move the field of view to a higher-numbered row
        # (downward in the image) with varying magnitudes, and
        # actions 4-7 do the opposite.
        column_step = np.round(self.action[0] * self.max_step_size / 2 +
                               self.action[1] * self.max_step_size / 4 +
                               self.action[2] * self.max_step_size / 8 +
                               self.action[3] * self.max_step_size / 16 -
                               self.action[4] * self.max_step_size / 2 -
                               self.action[5] * self.max_step_size / 4 -
                               self.action[6] * self.max_step_size / 8 -
                               self.action[7] * self.max_step_size / 16)
        column_step = np.round(
            column_step *
            (1 + self.noise_magnitude * np.random.random_sample() * 2.0 -
             self.noise_magnitude * np.random.random_sample() * 2.0))
        self.column_step = column_step
        self.column_position = self.column_position + int(column_step)
        self.column_position = max(self.column_position, self.column_min)
        self.column_position = min(self.column_position, self.column_max)
        self.column_history.append(self.column_position)

        # At random intervals, jump to a random position in the world
        if np.random.random_sample() < self.jump_fraction:
            self.column_position = np.random.random_integers(
                self.column_min, self.column_max)
        # Create the sensory input vector
        fov = self.data[:, self.column_position -
                        self.fov_width / 2:self.column_position +
                        self.fov_width / 2]
        center_surround_pixels = wtools.center_surround(
            fov, self.fov_span, self.fov_span)
        unsplit_sensors = center_surround_pixels.ravel()
        self.sensors = np.concatenate(
            (np.maximum(unsplit_sensors,
                        0), np.abs(np.minimum(unsplit_sensors, 0))))

        # Calculate the reward
        self.reward = 0
        if (np.abs(self.column_position - self.target_column) <
                self.reward_region_width / 2.0):
            self.reward += self.reward_magnitude
        self.reward -= (np.abs(column_step) / self.max_step_size *
                        self.step_cost)
        return self.sensors, self.reward
Example #7
0
    def step(self, action):
        self.timestep += 1
        self.action = action.ravel()
        # Actions 0-3 move the field of view to a higher-numbered
        # row (downward in the block_image_data) with varying magnitudes,
        # and actions 4-7 do the opposite.
        # Actions 8-11 move the field of view to a higher-numbered
        # column (rightward in the block_image_data) with varying magnitudes,
        # and actions 12-15 do the opposite.
        row_step = np.round(action[0] * self.MAX_STEP_SIZE / 2 +
                            action[1] * self.MAX_STEP_SIZE / 4 +
                            action[2] * self.MAX_STEP_SIZE / 8 +
                            action[3] * self.MAX_STEP_SIZE / 16 -
                            action[4] * self.MAX_STEP_SIZE / 2 -
                            action[5] * self.MAX_STEP_SIZE / 4 -
                            action[6] * self.MAX_STEP_SIZE / 8 -
                            action[7] * self.MAX_STEP_SIZE / 16)
        column_step = np.round(action[8] * self.MAX_STEP_SIZE / 2 +
                               action[9] * self.MAX_STEP_SIZE / 4 +
                               action[10] * self.MAX_STEP_SIZE / 8 +
                               action[11] * self.MAX_STEP_SIZE / 16 -
                               action[12] * self.MAX_STEP_SIZE / 2 -
                               action[13] * self.MAX_STEP_SIZE / 4 -
                               action[14] * self.MAX_STEP_SIZE / 8 -
                               action[15] * self.MAX_STEP_SIZE / 16)

        row_step = np.round(row_step *
                            (1 + np.random.normal(scale=self.NOISE_MAGNITUDE)))
        column_step = np.round(
            column_step * (1 + np.random.normal(scale=self.NOISE_MAGNITUDE)))
        self.row_position = self.row_position + int(row_step)
        self.column_position = self.column_position + int(column_step)
        # Respect the boundaries of the block_image_data
        self.row_position = max(self.row_position, self.row_min)
        self.row_position = min(self.row_position, self.row_max)
        self.column_position = max(self.column_position, self.column_min)
        self.column_position = min(self.column_position, self.column_max)

        # At random intervals, jump to a random position in the world
        if np.random.random_sample() < self.JUMP_FRACTION:
            self.column_position = np.random.random_integers(
                self.column_min, self.column_max)
            self.row_position = np.random.random_integers(
                self.row_min, self.row_max)

        # Create the sensory input vector
        fov = self.block_image_data[self.row_position -
                                    self.fov_height / 2:self.row_position +
                                    self.fov_height / 2, self.column_position -
                                    self.fov_width / 2:self.column_position +
                                    self.fov_width / 2]
        center_surround_pixels = wtools.center_surround(
            fov, self.fov_span, self.fov_span)
        unsplit_sensors = center_surround_pixels.ravel()
        self.sensors = np.concatenate(
            (np.maximum(unsplit_sensors,
                        0), np.abs(np.minimum(unsplit_sensors, 0))))

        self.reward = 0
        if ((np.abs(self.column_position - self.TARGET_COLUMN) <
             self.REWARD_REGION_WIDTH / 2)
                and (np.abs(self.row_position - self.TARGET_ROW) <
                     self.REWARD_REGION_WIDTH / 2)):
            self.reward += self.REWARD_MAGNITUDE
        return self.sensors, self.reward