Beispiel #1
0
def render_episode(model_path):
    model = PrefrontalLSTM(126, 4, hidden_size=192)
    model.load_state_dict(torch.load(model_path))
    commons = []
    uncommons = []
    env = TwoStepsGridWorld(seed=42)
    state = env.reset()
    env.env.render()
    time.sleep(20)
    done = False
    reward = 0.0
    action = torch.tensor(0)
    hidden = None
    cell = None
    with torch.no_grad():
        while not done:
            time.sleep(0.10)
            value, action_space, (hidden, cell) = model(state,
                                                        reward,
                                                        action,
                                                        hidden=hidden,
                                                        cell=cell)
            action_distribution = distributions.Categorical(action_space)
            action = action_distribution.sample()
            state, reward, done, info = env.step(action.item())
            env.env.render()
def figure_2_a(model_path):
    model = PrefrontalLSTM(0, 2)
    model.load_state_dict(torch.load(model_path))
    env = TaskOne(mode='monkey')
    env.set_test()

    y = []
    C_r = []
    C_l = []
    R_r = []
    R_l = []
    held_out = []
    for _ in range(4000):
        actions, rewards = run_episode(env, model)
        p_l, _ = (env.initial_probability)
        held_out.append((0.1 < p_l < 0.2) or (0.3 < p_l < 0.4))
        C_r.append(sum(actions))
        C_l.append(len(actions) - C_r[-1])
        R_r.append(1 + sum([r for a, r in zip(actions, rewards) if a == 1]))
        R_l.append(1 + sum([r for a, r in zip(actions, rewards) if a == 0]))

    C_r = np.array(C_r)
    C_l = np.array(C_l)
    R_r = np.array(R_r)
    R_l = np.array(R_l)
    held_out = np.array(held_out)

    y = np.log2(C_r / C_l)
    x = np.log2(R_r / R_l)
    m = LinearRegression()
    m.fit(x.reshape(-1, 1), y.reshape(-1, 1))

    plt.scatter(x[~held_out],
                y[~held_out],
                label="Not held out",
                c='blue',
                s=5)
    plt.scatter(x[held_out],
                y[held_out],
                label="Held out parameters",
                c='red',
                s=5)

    lims = [
        np.min([plt.xlim(), plt.ylim()]),  # min of both axes
        np.max([plt.xlim(), plt.ylim()]),  # max of both axes
    ]

    plt.plot(lims, lims, 'k-', zorder=0)
    test_X = np.arange(*plt.xlim(), 0.25)
    plt.plot(test_X,
             m.predict(test_X.reshape(-1, 1)),
             linestyle='--',
             zorder=0,
             color='gray')
    plt.ylabel(r'$\log_2(\frac{C_R}{C_L})$', fontsize=20)
    plt.xlabel(r'$\log_2(\frac{R_R}{R_L})$', fontsize=20)
    plt.legend()
    plt.show()
def figure_2_b(model_path):
    model = PrefrontalLSTM(0, 2)
    model.load_state_dict(torch.load(model_path))
    env = TaskOne(mode='monkey')
    env.set_test()

    activations = {}
    layer2name = {}
    hook_function = lambda m, i, o: activations[layer2name[m]].append(o)
    for name, layer in model._modules.items():
        layer2name[layer] = name
        activations[name] = []
        layer.register_forward_hook(hook_function)

    action_mat = []
    reward_mat = []
    value_mat = []
    for _ in range(2000):
        actions, rewards, values = run_episode(env, model, return_values=True)
        reward_mat.append(rewards)
        action_mat.append(actions)
        value_mat.append(values)
    action_mat = np.array(action_mat)
    value_mat = np.array(value_mat)
    reward_mat = np.array(reward_mat)
    #activation_mat = torch.cat([x[0] for i, x in enumerate(activations['lstm']) if i % 100 != 0 and i%100 != 1]).squeeze().numpy()
    activation_mat = torch.cat([
        x[0] for i, x in enumerate(activations['lstm'])
        if i % 100 != 0 and i % 100 != 1
    ]).squeeze().numpy()
    value_corr, _ = spearmanr(activation_mat,
                              value_mat[:, 2:].reshape(-1),
                              axis=0)
    action_corr, _ = spearmanr(activation_mat,
                               action_mat[:, 2:].reshape(-1),
                               axis=0)
    reward_corr, _ = spearmanr(activation_mat,
                               reward_mat[:, :-2].reshape(-1),
                               axis=0)
    rewardxaction_corr, _ = spearmanr(activation_mat,
                                      reward_mat[:, :-2].reshape(-1) *
                                      action_mat[:, 2:].reshape(-1),
                                      axis=0)

    value_corr = np.abs(value_corr[:-1, -1])
    action_corr = np.abs(action_corr[:-1, -1])
    reward_corr = np.abs(reward_corr[:-1, -1])
    rewardxaction_corr = np.abs(rewardxaction_corr[:-1, -1])

    plt.bar(range(4), \
            [np.mean(x) for x in [action_corr, reward_corr, rewardxaction_corr, value_corr]], \
            tick_label=[r'$a_{t-1}$', r'$r_{t-1}$',r'$a_{t-1}\times r_{t-1}$', r'$v_t$'], zorder=0)
    plt.ylabel("Correlation")
    plt.scatter([0] * 48, action_corr, c='red')
    plt.scatter([1] * 48, reward_corr, c='red')
    plt.scatter([2] * 48, rewardxaction_corr, c='red')
    plt.scatter([3] * 48, value_corr, c='red')
    plt.show()
def figure_2_c(model_path):
    model = PrefrontalLSTM(0, 2)
    model.load_state_dict(torch.load(model_path))
    env = TaskOne(mode='monkey')
    env.set_test()
    X_actions = []
    X_rewards = []
    y = []
    for _ in range(4000):
        actions, rewards = run_episode(env, model)
        for i, a in enumerate(actions):
            if i <= 33 + 15:
                #Only consider data from last 2/3s
                continue
            X_rewards.append(rewards[i - 15:i])
            X_actions.append(actions[i - 15:i])
            y.append(a)
    X_actions = np.array(X_actions)
    X_rewards = np.array(X_rewards)
    y = np.array(y)

    model = LogisticRegression()
    model.fit(X_actions, y)
    action_coefficients = model.coef_[0]

    model = LogisticRegression()
    model.fit(X_rewards, y)
    reward_coefficients = model.coef_[0]

    fig, axes = plt.subplots(2, 1)
    axes[0].plot(reward_coefficients)
    axes[1].plot(action_coefficients)
    axes[0].set_title("Rewards")
    axes[1].set_title("Actions")
    for ax in axes:
        ax.set_ylabel('Coefficient')
        ax.set_xlabel('Trial lag')
        ax.plot(ax.get_xlim(), (0, 0), linestyle='--', color='black')
        ax.yaxis.set_label_position("right")
        ax.yaxis.tick_right()
        ax.set_xticks(range(15))
        ax.set_xticklabels(np.arange(15, 0, -1))
    plt.show()
def pca_plot(model_path, probabilities=[0.99, 0.01]):
    model = PrefrontalLSTM(0, 2)
    model.load_state_dict(torch.load(model_path))
    model.eval()

    activations = {}
    layer2name = {}
    hook_function = lambda m, i, o: activations[layer2name[m]].append(o)
    for name, layer in model._modules.items():
        layer2name[layer] = name
        activations[name] = []
        layer.register_forward_hook(hook_function)

    env = TaskOne(mode='bandit')

    #because of the seed, we don't need to worry about different PCA projections
    for _ in range(10):
        run_episode(env, model)

    activation_mat = torch.cat([x[0] for x in activations['lstm']
                                ]).squeeze().numpy()
    pca = PCA(n_components=2)
    pca.fit(activation_mat)

    activations['lstm'] = []
    actions, _ = run_episode(env, model, probabilities)
    actions = np.array(actions)

    activation_mat = torch.cat([x[0] for x in activations['lstm']
                                ]).squeeze().numpy()
    activation_x_y = pca.transform(activation_mat)

    fig, ax = plt.subplots()
    ax.set_xlabel("PCA 1")
    ax.set_ylabel("PCA 2")
    l_idx = actions == 0
    ax.scatter(activation_x_y[l_idx, 0],
               activation_x_y[l_idx, 1],
               c=np.arange(100)[l_idx],
               cmap='copper')
    ax.scatter(activation_x_y[~l_idx, 0],
               activation_x_y[~l_idx, 1],
               c=np.arange(100)[~l_idx],
               marker='x',
               cmap='copper')
    ax.set_title(f"P_l={probabilities[0]}, P_r={probabilities[1]}")
    plt.show()
def get_regrets(model_path, probabilities=[0.75, 0.25], n_samples=500):
    model = PrefrontalLSTM(0, 2)
    model.load_state_dict(torch.load(model_path))
    model.eval()

    env = TaskOne(mode='bandit')

    all_regrets = []
    all_actions = []
    all_rewards = []

    for _ in range(500):
        actions, rewards = run_episode(env, model, probs=probabilities.copy())
        all_regrets.append(cumulative_regret(probabilities, rewards))
        all_rewards.append(rewards)
        all_actions.append(actions)

    all_regrets = np.array(all_regrets)
    all_actions = np.array(all_actions)
    all_rewards = np.array(all_rewards)
    return all_regrets, all_actions, all_rewards
def figure_5_d(model_path, N=8, episodes=500):
    model = PrefrontalLSTM(2, 2)
    model.load_state_dict(torch.load(model_path))
    commons = []
    uncommons = []
    coefs = np.zeros((N, 5, 4))
    for seed in range(N):
        env = TwoStep(seed=seed)
        type_to_idx = {
            (0, "common"): [1, 0, 0, 0],
            (1, "common"): [0, 1, 0, 0],
            (0, "uncommon"): [0, 0, 1, 0],
            (1, "uncommon"): [0, 0, 0, 1]
        }
        idx_to_type = {
            0: (0, "common"),
            1: (1, "common"),
            2: (0, "uncommon"),
            3: (1, "uncommon")
        }
        X = []
        y = []
        for _ in range(episodes):
            actions, rewards, infos = run_episode(env,
                                                  model,
                                                  return_infos=True)

            trial_types = []
            first_actions = []
            outcomes = []

            for action, reward, info in zip(actions, rewards, infos):
                if info['state_transition'] is None:
                    if len(trial_types) > 0:
                        outcomes.append(first_actions[-1] == first_actions[-2])
                    trial_types.append(type_to_idx[(reward, state_transition)])
                else:
                    first_actions.append(action)
                    state_transition = info['state_transition']
            for i, outcome in enumerate(outcomes[4:]):
                idx = i + 5
                X.append(trial_types[idx - 5:idx])
            y += outcomes[4:]
        X = np.array(X)  #(N_samples, t, reward_type)
        y = np.array(y)
        l_model = LogisticRegression()
        l_model.fit(X.reshape(-1, 20), y)
        coefs[seed, :, :] = l_model.coef_.reshape(5, 4)

    color = ['red', 'blue']
    linestyle = {"common": '-', "uncommon": ':'}
    for idx, (reward, transition) in idx_to_type.items():
        plt.plot(coefs[:, ::-1, idx].mean(axis=0),
                 c=color[reward],
                 linestyle=linestyle[transition])
        for i in range(N):
            if transition == "common":
                plt.scatter(np.arange(5), coefs[i, ::-1, idx], c=color[reward])
            else:
                plt.scatter(np.arange(5),
                            coefs[i, ::-1, idx],
                            c=color[reward],
                            facecolors='none')
    plt.ylabel('Regression Weights')
    plt.xticks(range(5), labels=range(1, 6))
    plt.xlabel('Trials ago')
    plt.show()
def figure_5_b(model_path, N=8, episodes=500):
    model = PrefrontalLSTM(2, 2)
    model.load_state_dict(torch.load(model_path))
    commons = []
    uncommons = []
    for seed in range(N):
        env = TwoStep(seed=seed)
        n = {1: {"common": 0, "uncommon": 0}, 0: {"common": 0, "uncommon": 0}}
        stay = {
            1: {
                "common": 0,
                "uncommon": 0
            },
            0: {
                "common": 0,
                "uncommon": 0
            }
        }
        for _ in range(episodes):
            actions, rewards, infos = run_episode(env,
                                                  model,
                                                  return_infos=True)

            prev_action = -1

            for i, (action, reward,
                    info) in enumerate(zip(actions, rewards, infos)):
                if info['state_transition'] is None:
                    if i >= 2:  #Since we can't do it on the first trial
                        n[prev_reward][prev_state_transition] += 1
                        if prev_first_action == first_action:
                            stay[prev_reward][prev_state_transition] += 1

                    prev_first_action = first_action
                    prev_state_transition = state_transition
                    prev_reward = reward
                else:
                    first_action = action
                    state_transition = info['state_transition']

        commons.append([
            stay[1]["common"] / n[1]["common"],
            stay[0]["common"] / n[0]["common"]
        ])
        uncommons.append([
            stay[1]["uncommon"] / n[1]["uncommon"],
            stay[0]["uncommon"] / n[0]["uncommon"]
        ])

    commons = np.array(commons)
    uncommons = np.array(uncommons)
    width = 0.35
    gap = 0.05
    plt.bar(np.arange(2) - width / 2 - gap,
            commons.mean(axis=0),
            width,
            label='Common',
            color='blue',
            zorder=0)
    plt.bar(np.arange(2) + width / 2 + gap,
            uncommons.mean(axis=0),
            width,
            label='Uncommon',
            color='red',
            zorder=0)
    plt.scatter(np.tile(np.arange(2) - width / 2 - gap, N),
                commons.reshape(-1),
                c='black')
    plt.scatter(np.tile(np.arange(2) + width / 2 + gap, N),
                uncommons.reshape(-1),
                c='black')
    plt.xticks((0, 1), labels=["Rewarded", "Unrewarded"])
    plt.ylim(0.5, 1)
    plt.legend()
    plt.show()
Beispiel #9
0
def figure_5_b(model_path, N=3, episodes=100):
    model = PrefrontalLSTM(126, 4, hidden_size=192)
    model.load_state_dict(torch.load(model_path))
    commons = []
    uncommons = []
    env = TwoStepsGridWorld(seed=42)
    for seed in range(N):
        n = {1: {"common": 0, "uncommon": 0}, 0: {"common": 0, "uncommon": 0}}
        stay = {
            1: {
                "common": 0,
                "uncommon": 0
            },
            0: {
                "common": 0,
                "uncommon": 0
            }
        }
        for _ in range(episodes):
            actions, rewards, infos = run_episode(env,
                                                  model,
                                                  return_infos=True)
            last_first_position = None
            for info in infos:
                if info != {}:
                    n[info["reward"]][info["state_transition"]] += 1
                    if last_first_position is not None and last_first_position == info[
                            "first_position"]:
                        stay[info["reward"]][info["state_transition"]] += 1
                    last_first_position = info["first_position"]

            prev_action = -1

        commons.append([
            stay[1]["common"] / n[1]["common"],
            stay[0]["common"] / n[0]["common"]
        ])
        uncommons.append([
            stay[1]["uncommon"] / n[1]["uncommon"],
            stay[0]["uncommon"] / n[0]["uncommon"]
        ])

    commons = np.array(commons)
    uncommons = np.array(uncommons)
    width = 0.35
    gap = 0.05
    plt.bar(np.arange(2) - width / 2 - gap,
            commons.mean(axis=0),
            width,
            label='Common',
            color='blue',
            zorder=0)
    plt.bar(np.arange(2) + width / 2 + gap,
            uncommons.mean(axis=0),
            width,
            label='Uncommon',
            color='red',
            zorder=0)
    plt.scatter(np.tile(np.arange(2) - width / 2 - gap, N),
                commons.reshape(-1),
                c='black')
    plt.scatter(np.tile(np.arange(2) + width / 2 + gap, N),
                uncommons.reshape(-1),
                c='black')
    plt.xticks((0, 1), labels=["Rewarded", "Unrewarded"])
    plt.ylim(0.5, 1)
    plt.legend()
    plt.show()
Beispiel #10
0
from meta_rl.models import PrefrontalLSTM
from meta_rl.training import train
from meta_rl.tasks import TaskOne, TwoStepsGridWorld
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
from scipy.interpolate import make_interp_spline, BSpline
import gym

model = PrefrontalLSTM(126, 4, hidden_size=192)
model.train()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0007)
env = TwoStepsGridWorld()
state = env.reset()
loss = []
rewards = []
t_range = tqdm(range(10000))
for i in t_range:
    l, r, a = train(env, model, optimizer, discount_factor=0.90)
    t_range.set_description("Current r: {:10.2f}".format(sum(r)))
    loss.append(l)
    rewards.append(sum(r))

torch.save(model.state_dict(), 'grid_world_2.pt')
fig, axs = plt.subplots(2)


def smooth(X):
    X = np.array(X)
    T = np.arange(len(X))
from meta_rl.models import PrefrontalLSTM
from meta_rl.training import train
from meta_rl.tasks import TaskOne, TwoStep, HumanTwoStep
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
from scipy.interpolate import make_interp_spline, BSpline
import gym

model = PrefrontalLSTM(2, 2, hidden_size=192)
model.train()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0007)
env = HumanTwoStep()
loss = []
rewards = []
t_range = tqdm(range(30000))
for i in t_range:
    l, r, a = train(env, model, optimizer, discount_factor=0.90)
    t_range.set_description("Current loss: {:10.2f}".format(l))
    loss.append(l)
    rewards.append(sum(r) / len(r))

torch.save(model.state_dict(), 'human_task_two_30k_192.pt')
fig, axs = plt.subplots(2)

def smooth(X):
    X = np.array(X)
    T = np.arange(len(X))
    return np.poly1d(np.polyfit(T, X, 5))(T)
Beispiel #12
0
from meta_rl.models import PrefrontalLSTM
from meta_rl.training import train
from meta_rl.tasks import TaskOne
import torch
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
from scipy.interpolate import make_interp_spline, BSpline
import gym

model = PrefrontalLSTM(0, 2)
model.train()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.0007)
env = TaskOne(mode='monkey')
loss = []
rewards = []
t_range = tqdm(range(30000))
for i in t_range:
    l, r, a = train(env, model, optimizer, discount_factor=0.90)
    t_range.set_description("Current loss: {}".format(l))
    loss.append(l)
    rewards.append(sum(r) / len(r))

torch.save(model.state_dict(), 'monkey_action_item.pt')
fig, axs = plt.subplots(2)


def smooth(X):
    X = np.array(X)
    T = np.arange(len(X))
    return np.poly1d(np.polyfit(T, X, 5))(T)