def figure_2_a(model_path): model = PrefrontalLSTM(0, 2) model.load_state_dict(torch.load(model_path)) env = TaskOne(mode='monkey') env.set_test() y = [] C_r = [] C_l = [] R_r = [] R_l = [] held_out = [] for _ in range(4000): actions, rewards = run_episode(env, model) p_l, _ = (env.initial_probability) held_out.append((0.1 < p_l < 0.2) or (0.3 < p_l < 0.4)) C_r.append(sum(actions)) C_l.append(len(actions) - C_r[-1]) R_r.append(1 + sum([r for a, r in zip(actions, rewards) if a == 1])) R_l.append(1 + sum([r for a, r in zip(actions, rewards) if a == 0])) C_r = np.array(C_r) C_l = np.array(C_l) R_r = np.array(R_r) R_l = np.array(R_l) held_out = np.array(held_out) y = np.log2(C_r / C_l) x = np.log2(R_r / R_l) m = LinearRegression() m.fit(x.reshape(-1, 1), y.reshape(-1, 1)) plt.scatter(x[~held_out], y[~held_out], label="Not held out", c='blue', s=5) plt.scatter(x[held_out], y[held_out], label="Held out parameters", c='red', s=5) lims = [ np.min([plt.xlim(), plt.ylim()]), # min of both axes np.max([plt.xlim(), plt.ylim()]), # max of both axes ] plt.plot(lims, lims, 'k-', zorder=0) test_X = np.arange(*plt.xlim(), 0.25) plt.plot(test_X, m.predict(test_X.reshape(-1, 1)), linestyle='--', zorder=0, color='gray') plt.ylabel(r'$\log_2(\frac{C_R}{C_L})$', fontsize=20) plt.xlabel(r'$\log_2(\frac{R_R}{R_L})$', fontsize=20) plt.legend() plt.show()
def figure_2_b(model_path): model = PrefrontalLSTM(0, 2) model.load_state_dict(torch.load(model_path)) env = TaskOne(mode='monkey') env.set_test() activations = {} layer2name = {} hook_function = lambda m, i, o: activations[layer2name[m]].append(o) for name, layer in model._modules.items(): layer2name[layer] = name activations[name] = [] layer.register_forward_hook(hook_function) action_mat = [] reward_mat = [] value_mat = [] for _ in range(2000): actions, rewards, values = run_episode(env, model, return_values=True) reward_mat.append(rewards) action_mat.append(actions) value_mat.append(values) action_mat = np.array(action_mat) value_mat = np.array(value_mat) reward_mat = np.array(reward_mat) #activation_mat = torch.cat([x[0] for i, x in enumerate(activations['lstm']) if i % 100 != 0 and i%100 != 1]).squeeze().numpy() activation_mat = torch.cat([ x[0] for i, x in enumerate(activations['lstm']) if i % 100 != 0 and i % 100 != 1 ]).squeeze().numpy() value_corr, _ = spearmanr(activation_mat, value_mat[:, 2:].reshape(-1), axis=0) action_corr, _ = spearmanr(activation_mat, action_mat[:, 2:].reshape(-1), axis=0) reward_corr, _ = spearmanr(activation_mat, reward_mat[:, :-2].reshape(-1), axis=0) rewardxaction_corr, _ = spearmanr(activation_mat, reward_mat[:, :-2].reshape(-1) * action_mat[:, 2:].reshape(-1), axis=0) value_corr = np.abs(value_corr[:-1, -1]) action_corr = np.abs(action_corr[:-1, -1]) reward_corr = np.abs(reward_corr[:-1, -1]) rewardxaction_corr = np.abs(rewardxaction_corr[:-1, -1]) plt.bar(range(4), \ [np.mean(x) for x in [action_corr, reward_corr, rewardxaction_corr, value_corr]], \ tick_label=[r'$a_{t-1}$', r'$r_{t-1}$',r'$a_{t-1}\times r_{t-1}$', r'$v_t$'], zorder=0) plt.ylabel("Correlation") plt.scatter([0] * 48, action_corr, c='red') plt.scatter([1] * 48, reward_corr, c='red') plt.scatter([2] * 48, rewardxaction_corr, c='red') plt.scatter([3] * 48, value_corr, c='red') plt.show()
def figure_2_c(model_path): model = PrefrontalLSTM(0, 2) model.load_state_dict(torch.load(model_path)) env = TaskOne(mode='monkey') env.set_test() X_actions = [] X_rewards = [] y = [] for _ in range(4000): actions, rewards = run_episode(env, model) for i, a in enumerate(actions): if i <= 33 + 15: #Only consider data from last 2/3s continue X_rewards.append(rewards[i - 15:i]) X_actions.append(actions[i - 15:i]) y.append(a) X_actions = np.array(X_actions) X_rewards = np.array(X_rewards) y = np.array(y) model = LogisticRegression() model.fit(X_actions, y) action_coefficients = model.coef_[0] model = LogisticRegression() model.fit(X_rewards, y) reward_coefficients = model.coef_[0] fig, axes = plt.subplots(2, 1) axes[0].plot(reward_coefficients) axes[1].plot(action_coefficients) axes[0].set_title("Rewards") axes[1].set_title("Actions") for ax in axes: ax.set_ylabel('Coefficient') ax.set_xlabel('Trial lag') ax.plot(ax.get_xlim(), (0, 0), linestyle='--', color='black') ax.yaxis.set_label_position("right") ax.yaxis.tick_right() ax.set_xticks(range(15)) ax.set_xticklabels(np.arange(15, 0, -1)) plt.show()
def figure_5_d(model_path, N=8, episodes=500): model = PrefrontalLSTM(2, 2) model.load_state_dict(torch.load(model_path)) commons = [] uncommons = [] coefs = np.zeros((N, 5, 4)) for seed in range(N): env = TwoStep(seed=seed) type_to_idx = { (0, "common"): [1, 0, 0, 0], (1, "common"): [0, 1, 0, 0], (0, "uncommon"): [0, 0, 1, 0], (1, "uncommon"): [0, 0, 0, 1] } idx_to_type = { 0: (0, "common"), 1: (1, "common"), 2: (0, "uncommon"), 3: (1, "uncommon") } X = [] y = [] for _ in range(episodes): actions, rewards, infos = run_episode(env, model, return_infos=True) trial_types = [] first_actions = [] outcomes = [] for action, reward, info in zip(actions, rewards, infos): if info['state_transition'] is None: if len(trial_types) > 0: outcomes.append(first_actions[-1] == first_actions[-2]) trial_types.append(type_to_idx[(reward, state_transition)]) else: first_actions.append(action) state_transition = info['state_transition'] for i, outcome in enumerate(outcomes[4:]): idx = i + 5 X.append(trial_types[idx - 5:idx]) y += outcomes[4:] X = np.array(X) #(N_samples, t, reward_type) y = np.array(y) l_model = LogisticRegression() l_model.fit(X.reshape(-1, 20), y) coefs[seed, :, :] = l_model.coef_.reshape(5, 4) color = ['red', 'blue'] linestyle = {"common": '-', "uncommon": ':'} for idx, (reward, transition) in idx_to_type.items(): plt.plot(coefs[:, ::-1, idx].mean(axis=0), c=color[reward], linestyle=linestyle[transition]) for i in range(N): if transition == "common": plt.scatter(np.arange(5), coefs[i, ::-1, idx], c=color[reward]) else: plt.scatter(np.arange(5), coefs[i, ::-1, idx], c=color[reward], facecolors='none') plt.ylabel('Regression Weights') plt.xticks(range(5), labels=range(1, 6)) plt.xlabel('Trials ago') plt.show()
def figure_5_b(model_path, N=8, episodes=500): model = PrefrontalLSTM(2, 2) model.load_state_dict(torch.load(model_path)) commons = [] uncommons = [] for seed in range(N): env = TwoStep(seed=seed) n = {1: {"common": 0, "uncommon": 0}, 0: {"common": 0, "uncommon": 0}} stay = { 1: { "common": 0, "uncommon": 0 }, 0: { "common": 0, "uncommon": 0 } } for _ in range(episodes): actions, rewards, infos = run_episode(env, model, return_infos=True) prev_action = -1 for i, (action, reward, info) in enumerate(zip(actions, rewards, infos)): if info['state_transition'] is None: if i >= 2: #Since we can't do it on the first trial n[prev_reward][prev_state_transition] += 1 if prev_first_action == first_action: stay[prev_reward][prev_state_transition] += 1 prev_first_action = first_action prev_state_transition = state_transition prev_reward = reward else: first_action = action state_transition = info['state_transition'] commons.append([ stay[1]["common"] / n[1]["common"], stay[0]["common"] / n[0]["common"] ]) uncommons.append([ stay[1]["uncommon"] / n[1]["uncommon"], stay[0]["uncommon"] / n[0]["uncommon"] ]) commons = np.array(commons) uncommons = np.array(uncommons) width = 0.35 gap = 0.05 plt.bar(np.arange(2) - width / 2 - gap, commons.mean(axis=0), width, label='Common', color='blue', zorder=0) plt.bar(np.arange(2) + width / 2 + gap, uncommons.mean(axis=0), width, label='Uncommon', color='red', zorder=0) plt.scatter(np.tile(np.arange(2) - width / 2 - gap, N), commons.reshape(-1), c='black') plt.scatter(np.tile(np.arange(2) + width / 2 + gap, N), uncommons.reshape(-1), c='black') plt.xticks((0, 1), labels=["Rewarded", "Unrewarded"]) plt.ylim(0.5, 1) plt.legend() plt.show()
def figure_5_b(model_path, N=3, episodes=100): model = PrefrontalLSTM(126, 4, hidden_size=192) model.load_state_dict(torch.load(model_path)) commons = [] uncommons = [] env = TwoStepsGridWorld(seed=42) for seed in range(N): n = {1: {"common": 0, "uncommon": 0}, 0: {"common": 0, "uncommon": 0}} stay = { 1: { "common": 0, "uncommon": 0 }, 0: { "common": 0, "uncommon": 0 } } for _ in range(episodes): actions, rewards, infos = run_episode(env, model, return_infos=True) last_first_position = None for info in infos: if info != {}: n[info["reward"]][info["state_transition"]] += 1 if last_first_position is not None and last_first_position == info[ "first_position"]: stay[info["reward"]][info["state_transition"]] += 1 last_first_position = info["first_position"] prev_action = -1 commons.append([ stay[1]["common"] / n[1]["common"], stay[0]["common"] / n[0]["common"] ]) uncommons.append([ stay[1]["uncommon"] / n[1]["uncommon"], stay[0]["uncommon"] / n[0]["uncommon"] ]) commons = np.array(commons) uncommons = np.array(uncommons) width = 0.35 gap = 0.05 plt.bar(np.arange(2) - width / 2 - gap, commons.mean(axis=0), width, label='Common', color='blue', zorder=0) plt.bar(np.arange(2) + width / 2 + gap, uncommons.mean(axis=0), width, label='Uncommon', color='red', zorder=0) plt.scatter(np.tile(np.arange(2) - width / 2 - gap, N), commons.reshape(-1), c='black') plt.scatter(np.tile(np.arange(2) + width / 2 + gap, N), uncommons.reshape(-1), c='black') plt.xticks((0, 1), labels=["Rewarded", "Unrewarded"]) plt.ylim(0.5, 1) plt.legend() plt.show()