Beispiel #1
0
def evaluate_model_components(n_agents=5, n_episodes=35):
    all_rewards_hipp = np.zeros((n_agents, n_episodes))
    all_rewards_striat = np.zeros((n_agents, n_episodes))
    for ag in tqdm(range(n_agents), desc='Agent'):

        a = TDAgent(env=WaterMazeEnv(),
                    lesion_hippocampus=True,
                    lesion_striatum=False)
        a2 = TDAgent(env=WaterMazeEnv(),
                     lesion_hippocampus=False,
                     lesion_striatum=True)

        a2.hippocampus.learning_rate = .04
        a2.hippocampus.negative_learning_rate = .04
        a2.hippocampus.lamb = .7

        a.env.start_x, a.env.start_y = [.4, 1.4]
        a2.env.start_x, a2.env.start_y = [.4, 1.4]
        a2.env.curr_orientation = 0
        a.env.curr_orientation = 0

        for ep in tqdm(range(n_episodes), leave=False, desc='Trial'):
            t, reward, locs, choices = a.train_one_episode()
            t2, reward2, locs, choices = a2.train_one_episode()

            all_rewards_hipp[ag, ep] = t
            all_rewards_striat[ag, ep] = t2
    return all_rewards_hipp, all_rewards_striat
Beispiel #2
0
    def __init__(self, env=WaterMazeEnv(), learning_rate=.1, gamma=.98):

        self.env = env
        self.learning_rate = learning_rate
        self.gamma = gamma

        # sensory system:
        self.rf_radius = 1
        self.max_viewing_angle = 135
        self.rf_x, self.rf_y = vs.make_receptive_fields_simple(
            n_angles=21,
            radius=self.rf_radius,
            max_angle=self.max_viewing_angle,
            n_radii=6)
        self.sens_neuron_activations = np.zeros(self.rf_x.flatten().shape)
        self.previous_sensory_activations = np.zeros(
            self.sens_neuron_activations.shape)
        self.max_firing_rate = 10
        self.rf_var = .05

        # RL system:
        self.striatum_activation = np.zeros(self.env.actions.shape)
        self.previous_striatum_activation = None
        self.weight_mat = np.zeros((self.striatum_activation.shape[0],
                                    self.sens_neuron_activations.shape[0]))
        self.delta = 0
Beispiel #3
0
    def __init__(self, env=WaterMazeEnv(), learning_rate=.001, gamma=.98, lamb=.76):
        """

        :param env: Instance of environment class.
        :param learning_rate: Learning rate for Q learning.
        :param gamma: Future reward discount factor.
        :param lamb: Eligibility trace decay parameter.
        """
        self.env = env
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.lamb = lamb

        # Make the landmark cells
        self.field_width = 5  # 27.5
        self.max_viewing_angle = 175  # 135
        self.n_landmark_cells = 100
        self.landmark_cell_centres = np.linspace(-self.max_viewing_angle, self.max_viewing_angle, self.n_landmark_cells)
        self.LC_activations = np.zeros(self.n_landmark_cells)
        self.previous_LC_activity = None

        # Make the action cells
        self.n_action_cells = len(self.env.actions)
        self.previous_striatum_activation = None
        self.striatum_activation = np.zeros(self.n_action_cells)
        self.weight_mat = np.zeros((self.n_action_cells, self.n_landmark_cells))
        self.weight_mat = np.random.rand(self.n_action_cells, self.n_landmark_cells) * .004
        self.eligibility_trace = np.zeros(self.weight_mat.shape)
        self.generalisation_phase_activity = None
        self.generalisation_phase_var = 22.5
def plot_model_performance(ax, colour_palette):
    env = WaterMazeEnv()
    tsplot_boot(ax,
                all_rewards_striat[:, :35] * env.time_bin,
                color=colour_palette[0])
    tsplot_boot(ax,
                all_rewards_hipp[:, :35] * env.time_bin,
                color=colour_palette[1])

    # Hide the right and top spines
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.legend(['Striatum', 'Hippocampus'], fontsize=10)
    ax.set_xlabel('Trials', fontsize=12)
    ax.set_ylabel('Escape time (s)', fontsize=12)
Beispiel #5
0
def get_full_model_trajectories(n_episodes):
    all_choices = []
    all_locs = []
    a = TDAgent(env=WaterMazeEnv(),
                lesion_hippocampus=False,
                lesion_striatum=False)
    a.hippocampus.learning_rate = .04
    a.hippocampus.negative_learning_rate = .04
    a.hippocampus.lamb = .76
    a.env.start_x, a.env.start_y = [.4, 1.4]
    for ep in tqdm(range(n_episodes)):
        t, reward, locs, choices = a.train_one_episode()
        all_locs.append(locs)
        all_choices.append(choices)
    return np.array(all_choices), np.array(all_locs)
Beispiel #6
0
    def __init__(self,
                 env=WaterMazeEnv(),
                 epsilon=.1,
                 lesion_striatum=False,
                 lesion_hippocampus=False):
        """Initialise the agent with a hippocampus and striatum. If both are lesioned, behaviour will be random.

        :param (float) epsilon: Randomness parameter.
        :param (bool) lesion_striatum: Inactivates the striatum model.
        :param (bool) lesion_hippocampus: Inactivates the hippocampus model.
        """
        self.epsilon = epsilon
        if lesion_hippocampus and lesion_striatum:
            self.epsilon = 1  # Output random behaviour if no striatum and no hippocampus are present.
        self.striatum_lesion = lesion_striatum
        self.hippocampus_lesion = lesion_hippocampus
        self.env = env
        self.hippocampus = Hippocampus(self.env)
        self.striatum = Striatum(self.env)
        self.reached_goal = False
Beispiel #7
0
    def __init__(self,
                 env=WaterMazeEnv(),
                 learning_rate=.001,
                 negative_learning_rate=.001,
                 gamma=.98,
                 lamb=.67):
        self.env = env
        self.learning_rate = learning_rate
        self.negative_learning_rate = negative_learning_rate
        self.gamma = gamma
        self.lamb = lamb
        self.max_turning_angle = 60

        # Create the place cells:
        self.field_width = .3
        self.field_centres = self.create_place_cells()
        if isinstance(self.env, PlusMaze):
            self.geodesic_field_centres = self.env.lookup_geodesic_coordinate(
                self.field_centres.T).T
        self.n_place_cells = self.field_centres.shape[1]
        self.max_response = self.get_max_response()
        self.previous_place_cell_responses = None
        self.place_cell_responses = None
        self.update_place_cell_response()

        # And the goal cell
        self.value = 0
        self.previous_value = 0

        self.weights = np.zeros(self.place_cell_responses.shape)
        self.weights = np.random.rand(self.n_place_cells) * .04
        self.eligibility_trace = np.zeros(self.weights.shape)

        # Allocentric actions (0 is east)
        # Note that the only actions currently available to the agent will be -60 to 60 degrees away.
        self.allocentric_directions = [
            0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330
        ]

        self.remembered_goal_location = [0, 0]
Beispiel #8
0
    def __init__(self,
                 env=WaterMazeEnv(),
                 learning_rate=.5,
                 negative_learning_rate=.05,
                 goal_cell_decay_factor=.99):
        self.env = env
        self.learning_rate = learning_rate
        self.negative_learning_rate = negative_learning_rate
        self.goal_cell_decay_factor = goal_cell_decay_factor
        self.max_turning_angle = 60

        # Create the place cells:
        self.field_width = .3
        self.field_centres = self.create_place_cells()
        if isinstance(self.env, PlusMaze):
            self.geodesic_field_centres = self.env.lookup_geodesic_coordinate(
                self.field_centres.T).T
        self.n_place_cells = self.field_centres.shape[1]
        self.max_response = self.get_max_response()
        self.previous_place_cell_responses = None
        self.place_cell_responses = None
        self.update_place_cell_response()

        # And the goal cell
        self.goal_cell_rate = 0
        self.weights = np.zeros(self.n_place_cells)
        if isinstance(self.env, PlusMaze):
            self.max_goal_response = .05
        else:
            self.max_goal_response = 15  #5
            self.cur_max = self.max_goal_response

        # Allocentric actions (0 is east)
        # Note that the only actions currently available to the agent will be -60 to 60 degrees away.
        self.allocentric_directions = [
            0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330
        ]

        self.remembered_goal_location = [0, 0]
Beispiel #9
0
def get_agent_choices(n_agents=20, n_episodes=41):

    agent_choices = []

    for agent in tqdm(range(n_agents)):

        all_locs = []
        all_choices = []
        a = TDAgent(env=WaterMazeEnv(), lesion_hippocampus=False)

        a.hippocampus.learning_rate = .04
        a.hippocampus.negative_learning_rate = .04
        a.hippocampus.lamb = .8

        a.env.start_x, a.env.start_y = [.4, 1.4]
        for ep in range(n_episodes):
            t, reward, locs, choices = a.train_one_episode()
            all_locs.append(locs)
            all_choices.append(choices)

        agent_choices.append(all_choices)
    return np.array(agent_choices)
    def __init__(self, n_trials=30, env=WaterMazeEnv()):

        WaterMazeAgent.__init__(self, n_trials=n_trials, env=env)
        self.env.trial = 0

        self.field_centres = self.create_place_cells()
        self.field_width = .09
        self.max_response = utils.gauss2d([0, 0], self.field_width**2, [0, 0])
        self.place_cell_responses = np.zeros(self.field_centres[0].shape)
        self.previous_place_cell_responses = np.zeros(
            self.field_centres[0].shape)
        self.update_place_cell_response()

        self.actions = {
            idx: direction
            for (idx, direction) in zip(range(12), range(0, 360, 30))
        }
        self.actions[0] = 360

        self.current_action = np.random.choice(len(self.actions))
        self.previous_action = None

        # initialise critic:
        self.critic_weights = np.zeros(self.place_cell_responses.shape)
        self.max_sum_weights = 1.
        self.critic_activation = np.dot(self.critic_weights,
                                        self.place_cell_responses)
        self.previous_critic_activation = None

        # initialise actor:
        self.action_weights = np.zeros(
            (len(self.actions), self.place_cell_responses.shape[0]))
        self.action_values = np.dot(self.action_weights,
                                    self.place_cell_responses)
        self.policy = self.softmax(self.action_values)

        self.policies = []
        self.policies.append(self.evaluate_policy_at_field_centres())
Beispiel #11
0
    def __init__(self, n_trials, env=WaterMazeEnv()):

        self.env = env
        self.n_trials = n_trials

        # Initialise agent settings
        self.reward = 0
        self.total_reward = 0
        self.learning_rate = .1
        self.beta = 2  # exploration/exploitation index
        self.gamma = .9975  # future rewards discount factor
        self.delta = 0

        self.initial_position = [-.7, -.7]
        self.current_position = self.initial_position
        self.reached_platform = False
        self.current_action = None
        self.actions = None

        # Initialise position log:
        self.position_log = pd.DataFrame(columns=['X position', 'Y position', 'Trial'])
        self.position_log.index.name = 'Time bin'
        self.position_log.loc[0] = [self.current_position[0], self.current_position[1], self.env.trial]
    os.makedirs(output_folder)

#from combined_agent import TDAgent


def get_platform_and_landmark_locations(env, number=9):
    angles = np.linspace(0, 2 * np.pi, number)
    r = env.maze_radius / 2
    platform_locations = [[r * np.cos(a), r * np.sin(a)] for a in angles]
    landmark_locations = [[r * np.cos(a), r * np.sin(a) + .1] for a in angles]
    platform_locations = env.maze_centre + platform_locations
    landmark_locations = env.maze_centre + landmark_locations
    return platform_locations, landmark_locations


envi = WaterMazeEnv()
platform_locations, landmark_locations = get_platform_and_landmark_locations(
    envi)
sessions = [0, 4, 1, 5, 2, 6, 3, 7, 4, 8, 3, 0]

n_sims = 1
n_trials = 4
n_sessions = 12

escape_times = np.zeros((n_sims, n_sessions, n_trials))
session_ids = list(range(len(platform_locations)))

sessions = [0, 4, 1, 5, 2, 6, 3, 7, 4, 8, 3, 0]

first_trials_control_trajectory = []
    hipp_colour = current_palette[1]
    #plt.rcParams['font.family'] = 'sans-serif'  # maybe try helvetica on mac
    #plt.rcParams['font.sans-serif'] = 'Coolvetica'

    fig = plt.figure()
    gs = GridSpec(2, 4)

    ax1 = fig.add_subplot(gs[0, 0])
    ax2 = fig.add_subplot(gs[0, 1])
    ax3 = fig.add_subplot(gs[1, :2])
    ax4 = fig.add_subplot(gs[0:, 2:])

    early_trial = 3
    late_trial = 45

    plot_trajectories(ax1, WaterMazeEnv(), all_locs[early_trial],
                      all_choices[early_trial], current_palette)
    plot_trajectories(ax2, WaterMazeEnv(), all_locs[late_trial],
                      all_choices[late_trial], current_palette)
    plot_model_performance(ax3, colour_palette=current_palette)
    plot_choice_proportions(ax4, current_palette)

    ax1.text(.5,
             .8,
             'Trial {}'.format(early_trial),
             transform=ax1.transAxes,
             fontsize=12,
             ha='center',
             style='italic')
    ax2.text(.5,
             .8,
Beispiel #14
0
            else:
                return str_action, 'striatum'
        elif hc_value > str_value:
            return hc_action, 'hippocampus'
        else:
            return str_action, 'striatum'


if __name__ == '__main__':
    n_agents = 23
    n_episodes = 272
    all_rewards = np.zeros((n_agents, n_episodes))
    all_escape_times = np.zeros((n_agents, n_episodes))

    df = pd.DataFrame(columns=[
        'Agent_nr', 'Trial', 'StartState', 'Action1', 'Action2', 'Terminus',
        'Reward'
    ])
    for agent in tqdm(range(n_agents)):
        #a = NonSpatialAgent(env=DeterministicTask(), lesion_hippocampus=True, lesion_striatum=False, epsilon=0.2)
        a = Agent(env=WaterMazeEnv(), lesion_hippocampus=True)
        for ep in range(n_episodes):
            data = a.train_one_episode()
            all_escape_times[agent, ep] = data[0]
            #all_rewards[agent, ep] = data[-1]
            #df.loc[len(df)] = [agent, ep] + data

    df.to_csv(
        os.path.join(a.env.output_folder,
                     'BehaviourDriftingRewardsStriatum23July.csv'))
Beispiel #15
0
def load_data(filename):
    data = np.load(os.path.join(results_directory, filename))
    return data


def get_platform_and_landmark_locations(env, number=9):
    angles = np.linspace(0, 2 * np.pi, number)
    r = env.maze_radius / 2
    platform_locations = [[r * np.cos(a), r * np.sin(a)] for a in angles]
    landmark_locations = [[r * np.cos(a), r * np.sin(a) + .1] for a in angles]
    platform_locations = env.maze_centre + platform_locations
    landmark_locations = env.maze_centre + landmark_locations
    return platform_locations, landmark_locations


envi = WaterMazeEnv()
platform_locations, landmark_locations = get_platform_and_landmark_locations(
    envi)
sessions = [0, 4, 1, 5, 2, 6, 3, 7, 4, 8, 3, 0]


def plot_escape_times(ax, data, colour_palette):
    ax.plot(np.arange(1, 12),
            data['ctrl1'][:-1],
            'o-',
            color=colour_palette[1])
    ax.plot(np.arange(1, 12),
            data['ctrl4'][:-1],
            'o-',
            fillstyle='none',
            color=colour_palette[1])