def evaluate_model_components(n_agents=5, n_episodes=35): all_rewards_hipp = np.zeros((n_agents, n_episodes)) all_rewards_striat = np.zeros((n_agents, n_episodes)) for ag in tqdm(range(n_agents), desc='Agent'): a = TDAgent(env=WaterMazeEnv(), lesion_hippocampus=True, lesion_striatum=False) a2 = TDAgent(env=WaterMazeEnv(), lesion_hippocampus=False, lesion_striatum=True) a2.hippocampus.learning_rate = .04 a2.hippocampus.negative_learning_rate = .04 a2.hippocampus.lamb = .7 a.env.start_x, a.env.start_y = [.4, 1.4] a2.env.start_x, a2.env.start_y = [.4, 1.4] a2.env.curr_orientation = 0 a.env.curr_orientation = 0 for ep in tqdm(range(n_episodes), leave=False, desc='Trial'): t, reward, locs, choices = a.train_one_episode() t2, reward2, locs, choices = a2.train_one_episode() all_rewards_hipp[ag, ep] = t all_rewards_striat[ag, ep] = t2 return all_rewards_hipp, all_rewards_striat
def __init__(self, env=WaterMazeEnv(), learning_rate=.1, gamma=.98): self.env = env self.learning_rate = learning_rate self.gamma = gamma # sensory system: self.rf_radius = 1 self.max_viewing_angle = 135 self.rf_x, self.rf_y = vs.make_receptive_fields_simple( n_angles=21, radius=self.rf_radius, max_angle=self.max_viewing_angle, n_radii=6) self.sens_neuron_activations = np.zeros(self.rf_x.flatten().shape) self.previous_sensory_activations = np.zeros( self.sens_neuron_activations.shape) self.max_firing_rate = 10 self.rf_var = .05 # RL system: self.striatum_activation = np.zeros(self.env.actions.shape) self.previous_striatum_activation = None self.weight_mat = np.zeros((self.striatum_activation.shape[0], self.sens_neuron_activations.shape[0])) self.delta = 0
def __init__(self, env=WaterMazeEnv(), learning_rate=.001, gamma=.98, lamb=.76): """ :param env: Instance of environment class. :param learning_rate: Learning rate for Q learning. :param gamma: Future reward discount factor. :param lamb: Eligibility trace decay parameter. """ self.env = env self.learning_rate = learning_rate self.gamma = gamma self.lamb = lamb # Make the landmark cells self.field_width = 5 # 27.5 self.max_viewing_angle = 175 # 135 self.n_landmark_cells = 100 self.landmark_cell_centres = np.linspace(-self.max_viewing_angle, self.max_viewing_angle, self.n_landmark_cells) self.LC_activations = np.zeros(self.n_landmark_cells) self.previous_LC_activity = None # Make the action cells self.n_action_cells = len(self.env.actions) self.previous_striatum_activation = None self.striatum_activation = np.zeros(self.n_action_cells) self.weight_mat = np.zeros((self.n_action_cells, self.n_landmark_cells)) self.weight_mat = np.random.rand(self.n_action_cells, self.n_landmark_cells) * .004 self.eligibility_trace = np.zeros(self.weight_mat.shape) self.generalisation_phase_activity = None self.generalisation_phase_var = 22.5
def plot_model_performance(ax, colour_palette): env = WaterMazeEnv() tsplot_boot(ax, all_rewards_striat[:, :35] * env.time_bin, color=colour_palette[0]) tsplot_boot(ax, all_rewards_hipp[:, :35] * env.time_bin, color=colour_palette[1]) # Hide the right and top spines ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.legend(['Striatum', 'Hippocampus'], fontsize=10) ax.set_xlabel('Trials', fontsize=12) ax.set_ylabel('Escape time (s)', fontsize=12)
def get_full_model_trajectories(n_episodes): all_choices = [] all_locs = [] a = TDAgent(env=WaterMazeEnv(), lesion_hippocampus=False, lesion_striatum=False) a.hippocampus.learning_rate = .04 a.hippocampus.negative_learning_rate = .04 a.hippocampus.lamb = .76 a.env.start_x, a.env.start_y = [.4, 1.4] for ep in tqdm(range(n_episodes)): t, reward, locs, choices = a.train_one_episode() all_locs.append(locs) all_choices.append(choices) return np.array(all_choices), np.array(all_locs)
def __init__(self, env=WaterMazeEnv(), epsilon=.1, lesion_striatum=False, lesion_hippocampus=False): """Initialise the agent with a hippocampus and striatum. If both are lesioned, behaviour will be random. :param (float) epsilon: Randomness parameter. :param (bool) lesion_striatum: Inactivates the striatum model. :param (bool) lesion_hippocampus: Inactivates the hippocampus model. """ self.epsilon = epsilon if lesion_hippocampus and lesion_striatum: self.epsilon = 1 # Output random behaviour if no striatum and no hippocampus are present. self.striatum_lesion = lesion_striatum self.hippocampus_lesion = lesion_hippocampus self.env = env self.hippocampus = Hippocampus(self.env) self.striatum = Striatum(self.env) self.reached_goal = False
def __init__(self, env=WaterMazeEnv(), learning_rate=.001, negative_learning_rate=.001, gamma=.98, lamb=.67): self.env = env self.learning_rate = learning_rate self.negative_learning_rate = negative_learning_rate self.gamma = gamma self.lamb = lamb self.max_turning_angle = 60 # Create the place cells: self.field_width = .3 self.field_centres = self.create_place_cells() if isinstance(self.env, PlusMaze): self.geodesic_field_centres = self.env.lookup_geodesic_coordinate( self.field_centres.T).T self.n_place_cells = self.field_centres.shape[1] self.max_response = self.get_max_response() self.previous_place_cell_responses = None self.place_cell_responses = None self.update_place_cell_response() # And the goal cell self.value = 0 self.previous_value = 0 self.weights = np.zeros(self.place_cell_responses.shape) self.weights = np.random.rand(self.n_place_cells) * .04 self.eligibility_trace = np.zeros(self.weights.shape) # Allocentric actions (0 is east) # Note that the only actions currently available to the agent will be -60 to 60 degrees away. self.allocentric_directions = [ 0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330 ] self.remembered_goal_location = [0, 0]
def __init__(self, env=WaterMazeEnv(), learning_rate=.5, negative_learning_rate=.05, goal_cell_decay_factor=.99): self.env = env self.learning_rate = learning_rate self.negative_learning_rate = negative_learning_rate self.goal_cell_decay_factor = goal_cell_decay_factor self.max_turning_angle = 60 # Create the place cells: self.field_width = .3 self.field_centres = self.create_place_cells() if isinstance(self.env, PlusMaze): self.geodesic_field_centres = self.env.lookup_geodesic_coordinate( self.field_centres.T).T self.n_place_cells = self.field_centres.shape[1] self.max_response = self.get_max_response() self.previous_place_cell_responses = None self.place_cell_responses = None self.update_place_cell_response() # And the goal cell self.goal_cell_rate = 0 self.weights = np.zeros(self.n_place_cells) if isinstance(self.env, PlusMaze): self.max_goal_response = .05 else: self.max_goal_response = 15 #5 self.cur_max = self.max_goal_response # Allocentric actions (0 is east) # Note that the only actions currently available to the agent will be -60 to 60 degrees away. self.allocentric_directions = [ 0, 30, 60, 90, 120, 150, 180, 210, 240, 270, 300, 330 ] self.remembered_goal_location = [0, 0]
def get_agent_choices(n_agents=20, n_episodes=41): agent_choices = [] for agent in tqdm(range(n_agents)): all_locs = [] all_choices = [] a = TDAgent(env=WaterMazeEnv(), lesion_hippocampus=False) a.hippocampus.learning_rate = .04 a.hippocampus.negative_learning_rate = .04 a.hippocampus.lamb = .8 a.env.start_x, a.env.start_y = [.4, 1.4] for ep in range(n_episodes): t, reward, locs, choices = a.train_one_episode() all_locs.append(locs) all_choices.append(choices) agent_choices.append(all_choices) return np.array(agent_choices)
def __init__(self, n_trials=30, env=WaterMazeEnv()): WaterMazeAgent.__init__(self, n_trials=n_trials, env=env) self.env.trial = 0 self.field_centres = self.create_place_cells() self.field_width = .09 self.max_response = utils.gauss2d([0, 0], self.field_width**2, [0, 0]) self.place_cell_responses = np.zeros(self.field_centres[0].shape) self.previous_place_cell_responses = np.zeros( self.field_centres[0].shape) self.update_place_cell_response() self.actions = { idx: direction for (idx, direction) in zip(range(12), range(0, 360, 30)) } self.actions[0] = 360 self.current_action = np.random.choice(len(self.actions)) self.previous_action = None # initialise critic: self.critic_weights = np.zeros(self.place_cell_responses.shape) self.max_sum_weights = 1. self.critic_activation = np.dot(self.critic_weights, self.place_cell_responses) self.previous_critic_activation = None # initialise actor: self.action_weights = np.zeros( (len(self.actions), self.place_cell_responses.shape[0])) self.action_values = np.dot(self.action_weights, self.place_cell_responses) self.policy = self.softmax(self.action_values) self.policies = [] self.policies.append(self.evaluate_policy_at_field_centres())
def __init__(self, n_trials, env=WaterMazeEnv()): self.env = env self.n_trials = n_trials # Initialise agent settings self.reward = 0 self.total_reward = 0 self.learning_rate = .1 self.beta = 2 # exploration/exploitation index self.gamma = .9975 # future rewards discount factor self.delta = 0 self.initial_position = [-.7, -.7] self.current_position = self.initial_position self.reached_platform = False self.current_action = None self.actions = None # Initialise position log: self.position_log = pd.DataFrame(columns=['X position', 'Y position', 'Trial']) self.position_log.index.name = 'Time bin' self.position_log.loc[0] = [self.current_position[0], self.current_position[1], self.env.trial]
os.makedirs(output_folder) #from combined_agent import TDAgent def get_platform_and_landmark_locations(env, number=9): angles = np.linspace(0, 2 * np.pi, number) r = env.maze_radius / 2 platform_locations = [[r * np.cos(a), r * np.sin(a)] for a in angles] landmark_locations = [[r * np.cos(a), r * np.sin(a) + .1] for a in angles] platform_locations = env.maze_centre + platform_locations landmark_locations = env.maze_centre + landmark_locations return platform_locations, landmark_locations envi = WaterMazeEnv() platform_locations, landmark_locations = get_platform_and_landmark_locations( envi) sessions = [0, 4, 1, 5, 2, 6, 3, 7, 4, 8, 3, 0] n_sims = 1 n_trials = 4 n_sessions = 12 escape_times = np.zeros((n_sims, n_sessions, n_trials)) session_ids = list(range(len(platform_locations))) sessions = [0, 4, 1, 5, 2, 6, 3, 7, 4, 8, 3, 0] first_trials_control_trajectory = []
hipp_colour = current_palette[1] #plt.rcParams['font.family'] = 'sans-serif' # maybe try helvetica on mac #plt.rcParams['font.sans-serif'] = 'Coolvetica' fig = plt.figure() gs = GridSpec(2, 4) ax1 = fig.add_subplot(gs[0, 0]) ax2 = fig.add_subplot(gs[0, 1]) ax3 = fig.add_subplot(gs[1, :2]) ax4 = fig.add_subplot(gs[0:, 2:]) early_trial = 3 late_trial = 45 plot_trajectories(ax1, WaterMazeEnv(), all_locs[early_trial], all_choices[early_trial], current_palette) plot_trajectories(ax2, WaterMazeEnv(), all_locs[late_trial], all_choices[late_trial], current_palette) plot_model_performance(ax3, colour_palette=current_palette) plot_choice_proportions(ax4, current_palette) ax1.text(.5, .8, 'Trial {}'.format(early_trial), transform=ax1.transAxes, fontsize=12, ha='center', style='italic') ax2.text(.5, .8,
else: return str_action, 'striatum' elif hc_value > str_value: return hc_action, 'hippocampus' else: return str_action, 'striatum' if __name__ == '__main__': n_agents = 23 n_episodes = 272 all_rewards = np.zeros((n_agents, n_episodes)) all_escape_times = np.zeros((n_agents, n_episodes)) df = pd.DataFrame(columns=[ 'Agent_nr', 'Trial', 'StartState', 'Action1', 'Action2', 'Terminus', 'Reward' ]) for agent in tqdm(range(n_agents)): #a = NonSpatialAgent(env=DeterministicTask(), lesion_hippocampus=True, lesion_striatum=False, epsilon=0.2) a = Agent(env=WaterMazeEnv(), lesion_hippocampus=True) for ep in range(n_episodes): data = a.train_one_episode() all_escape_times[agent, ep] = data[0] #all_rewards[agent, ep] = data[-1] #df.loc[len(df)] = [agent, ep] + data df.to_csv( os.path.join(a.env.output_folder, 'BehaviourDriftingRewardsStriatum23July.csv'))
def load_data(filename): data = np.load(os.path.join(results_directory, filename)) return data def get_platform_and_landmark_locations(env, number=9): angles = np.linspace(0, 2 * np.pi, number) r = env.maze_radius / 2 platform_locations = [[r * np.cos(a), r * np.sin(a)] for a in angles] landmark_locations = [[r * np.cos(a), r * np.sin(a) + .1] for a in angles] platform_locations = env.maze_centre + platform_locations landmark_locations = env.maze_centre + landmark_locations return platform_locations, landmark_locations envi = WaterMazeEnv() platform_locations, landmark_locations = get_platform_and_landmark_locations( envi) sessions = [0, 4, 1, 5, 2, 6, 3, 7, 4, 8, 3, 0] def plot_escape_times(ax, data, colour_palette): ax.plot(np.arange(1, 12), data['ctrl1'][:-1], 'o-', color=colour_palette[1]) ax.plot(np.arange(1, 12), data['ctrl4'][:-1], 'o-', fillstyle='none', color=colour_palette[1])