Python RL_utils Examples, RL_utils Python Examples

Example #1

0

Show file

File: RL_plotting.py Project: dydcfg/Two_Step

def _plot_dist(mean_U, SD_U, rnge, col = 'r', ls = '-', normalize = False):
    #Transform population distribution from true to unconstrained space.
    if rnge == 'unc':
        T = np.arange(mean_U - 3 * SD_U, mean_U + 3 * SD_U, 6 * SD_U / 100.)
        dUdT = 1.
        U = T
        x_ticks = np.linspace(np.ceil(T[0]),np.floor(T[-1]),3)
        if x_ticks[0] == x_ticks[2]:
            x_ticks = np.round([T[0],T[-1]],1)
    elif rnge == 'unit':
        T = np.arange(0.001,0.999,0.001)
        U = ru.inverse_sigmoid(T)
        dUdT = ru.inv_sigmoid_grad(T)
        x_ticks = [0,0.5,1]
    elif rnge == 'pos':
        T_range = np.exp([mean_U - 3 * SD_U, mean_U + 3 * SD_U])
        T = np.linspace(T_range[0], T_range[1],100)
        U = np.log(T)
        dUdT = 1./T
        x_ticks = np.linspace(np.ceil(T[0]),np.floor(T[-1]),3)
    spacing = T[1] - T[0]
    dist = sp.stats.norm(mean_U,SD_U).pdf(U) * dUdT
    if normalize:
        dist = dist / np.max(dist)
    else:
        dist = dist / (dist.sum() * spacing)
    if col:
        p.plot(T, dist, color = col, linestyle = ls, linewidth = 1.5)
    else:
        p.plot(T, dist, linestyle = ls, linewidth = 1.5)

Example #2

0

Show file

File: RL_agents.py Project: dydcfg/Two_Step

    def session_likelihood(self, session, params_T):#, return_trial_data = False):

        # Unpack trial events.
        choices, second_steps, outcomes = ut.CTSO_unpack(session.CTSO, 'CSO')

        n_trials = len(choices)
        prev_second_steps = np.zeros(n_trials + 1, int)
        prev_second_steps[1:] = second_steps

        # Unpack parameters.
        alpha, iTemp, lambd, D = params_T[:4]   # Q value decay parameter.
        if self.use_kernels: bias, CK, SSK  = params_T[-3:]
 
        #Variables.
        Q_td_f = np.zeros([n_trials + 1 , 2, 2])         # Model free action values at first step. indicies: trial, first step, previous second step.
        Q_td_s = np.zeros([n_trials + 1 , 2])            # Model free action values at second step.

        for i, (c, s, v, o) in enumerate(zip(choices, second_steps, prev_second_steps, outcomes)): # loop over trials.

            nc = 1 - c  # Action not chosen at first step. (0 or 1)
            ns = 1 - s  # State not reached at second step. (0 or 1)
            nv = 1 - v  # State not reached at second step on previous trial (0 or 1)

            # Update model free action values. 

            if True: #use_Q_decay: 

                Q_td_f[i+1,nc,  v] = Q_td_f[i+1,nc,  v] * (1. - D)   # First step forgetting.
                Q_td_f[i+1, 0, nv] = Q_td_f[i+1, 0, nv] * (1. - D)   # First step forgetting.
                Q_td_f[i+1, 1, nv] = Q_td_f[i+1, 1, nv] * (1. - D)   # First step forgetting.

                Q_td_s[i+1,ns] = Q_td_s[i, ns] * (1. - D) # Second step forgetting.


            Q_td_f[i+1, c, v] = (1. - alpha) * Q_td_f[i+1, c, v] + \
                          alpha * (Q_td_s[i,s] + lambd * (o - Q_td_s[i,s])) # First step TD update.
      
            Q_td_s[i+1,s] = (1. - alpha) * Q_td_s[i,s] +  alpha * o           # Second step TD update.

        # Evaluate choice probabilities and likelihood. 

        Q_net = Q_td_f[np.arange(n_trials + 1), :, prev_second_steps]

        if True:# use_kernels:
            Q_net[:,0] += kernel_Qs(session, bias, CK, SSK)

        choice_probs = ru.array_softmax(Q_net, iTemp)
        trial_log_likelihood = ru.protected_log(choice_probs[np.arange(n_trials), choices])
        session_log_likelihood = np.sum(trial_log_likelihood)

        if False:#return_trial_data:
            pass
        else:
            return session_log_likelihood

Example #3

0

Show file

File: RL_agents.py Project: dydcfg/Two_Step

    def session_likelihood(self, session, params_T):#, return_trial_data = False):

        # Unpack trial events.
        choices, second_steps, outcomes = ut.CTSO_unpack(session.CTSO, 'CSO')

        # Unpack parameters.
        alpha, iTemp, lambd, D = params_T[:4]   # Q value decay parameter.
        if self.use_kernels: bias, CK, SSK  = params_T[-3:]

        #Variables.
        n_trials = len(choices)
        Q_td_f = np.zeros([n_trials + 1 , 2])       # Model free first step action values (low, high).
        Q_td_s = np.zeros([n_trials + 1 , 2])       # Model free second step action values (right, left).

        for i, (c, s, o) in enumerate(zip(choices, second_steps, outcomes)): # loop over trials.

            nc = 1 - c  # Action not chosen at first step.
            ns = 1 - s  # State not reached at second step.

            # Update model free action values. 

            Q_td_f[i+1,nc] = Q_td_f[i, nc] * (1. - D)   # First step forgetting.
            Q_td_s[i+1,ns] = Q_td_s[i, ns] * (1. - D)   # Second step forgetting.

            Q_td_f[i+1,c] = (1. - alpha) * Q_td_f[i,c] + \
                            alpha * (Q_td_s[i,s] + lambd * (o - Q_td_s[i,s])) # First step TD update.
      
            Q_td_s[i+1,s] = (1. - alpha) * Q_td_s[i,s] +  alpha * o       # Second step TD update.

        # Evaluate choice probabilities and likelihood. 

        Q_net = Q_td_f

        if self.use_kernels:
            Q_net[:,0] += kernel_Qs(session, bias, CK, SSK)

        choice_probs = ru.array_softmax(Q_net, iTemp)
        trial_log_likelihood = ru.protected_log(choice_probs[np.arange(n_trials), choices])
        session_log_likelihood = np.sum(trial_log_likelihood)

        if False:#return_trial_data:
            return {'Q_net'       : Q_net[:-1,:],  # Action values
                    'Q_td'        : Q_td_f[:-1,:],
                    'P_net'       : iTemp *           (Q_net[:-1,1] - (Q_net[:-1,0] + bias)), # Preferences.
                    'P_td'        : iTemp * (1 - W) * (Q_td_f [:-1,1]  - Q_td_f [:-1,0]),
                    'P_k'         : - kernel_Qs(session, 0., CK, SSK)[:-1],
                    'choice_probs': choice_probs}
        else:
            return session_log_likelihood

Example #4

0

Show file

File: logistic_regression_agents.py Project: dydcfg/Two_Step

    def session_likelihood(self, session, params_T, eval_grad = False):

        bias = params_T[0]
        weights = params_T[1:]

        choices = session.CTSO['choices']

        if not hasattr(session,'predictors'):
            predictors = self._get_session_predictors(session) # Get array of predictors
        else:
            predictors = session.predictors

        assert predictors.shape[0] == session.n_trials,  'predictor array does not match number of trials.'
        assert predictors.shape[1] == len(weights), 'predictor array does not match number of weights.'

        if self.trial_select: # Only use subset of trials.
            trials_to_use = self._select_trials(session)
            choices = choices[trials_to_use]
            predictors = predictors[trials_to_use,:]

        # Evaluate session log likelihood.

        Q = np.dot(predictors,weights) + bias
        P = ru.logistic(Q)  # Probability of making choice 1
        Pc = 1 - P - choices + 2. * choices * P  

        session_log_likelihood = sum(ru.protected_log(Pc)) 

        # Evaluate session log likelihood gradient.

        if eval_grad:
            dLdQ  = - 1 + 2 * choices + Pc - 2 * choices * Pc
            dLdB = sum(dLdQ) # Likelihood gradient w.r.t. bias paramter.
            dLdW = sum(np.tile(dLdQ,(len(weights),1)).T * predictors, 0) # Likelihood gradient w.r.t weights.
            session_log_likelihood_gradient = np.append(dLdB,dLdW)
            return (session_log_likelihood, session_log_likelihood_gradient)
        else:
            return session_log_likelihood

Example #5

0

Show file

File: RL_agents.py Project: dydcfg/Two_Step

    def session_likelihood(self, session, params_T):#, return_trial_data = False):

        # Unpack trial events.
        choices, second_steps, outcomes = ut.CTSO_unpack(session.CTSO, 'CSO')

        # Unpack parameters.
        winstay = params_T[0]   # Transition decay rate.
        if self.use_kernels: bias, CK, SSK  = params_T[-3:]

        #Variables.
        n_trials = len(choices)
        Q_net = np.zeros([n_trials + 1 , 2]) 

        Q_net[np.arange(1, n_trials), choices[:-1]] += (outcomes[:-1] - 0.5) * winstay # Win-stay lose-shift effect.        

        if self.use_kernels:
            Q_net[:,0] += kernel_Qs(session, bias, CK, SSK)

        choice_probs = ru.array_softmax(Q_net, 1.)
        trial_log_likelihood = ru.protected_log(choice_probs[np.arange(n_trials), choices])
        session_log_likelihood = np.sum(trial_log_likelihood)

        return session_log_likelihood

Example #6

0

Show file

File: model_comparison.py Project: dydcfg/Two_Step

def eval_calibration(sessions, agent, population_fit, use_MAP=True, n_bins=10, fixed_widths=False, to_plot=False):
    """Caluculate real choice probabilities as function of model choice probabilities."""

    session_fits = population_fit["MAP_fits"]

    assert len(session_fits[0]["params_T"]) == agent.n_params, "agent n_params does not match population_fit."
    assert len(sessions) == len(session_fits), "Number of fits does not match number of sessions."
    assert population_fit["agent_name"] == agent.name, "Agent name different from that used for fits."

    # Create arrays containing model choice probabilites and true choices for each trial.
    session_choices, session_choice_probs = ([], [])
    for fit, session in zip(session_fits, sessions):
        if use_MAP:
            params_T = fit["params_T"]
        else:
            params_T = ru.sample_params_T_from_pop_params(population_fit["pop_params"], agent)
        session_choices.append(session.CTSO["choices"].tolist())
        session_choice_probs.append(agent.session_likelihood(session, params_T, return_trial_data=True)["choice_probs"])
    choices = np.hstack(session_choices)
    choice_probs = np.vstack(session_choice_probs)[:, 1]

    # Calculate true vs model choice probs.
    true_probs = np.zeros(n_bins)
    model_probs = np.zeros(n_bins)
    if fixed_widths:  # Bins of equal width in model choice probability.
        bin_edges = np.linspace(0, 1, n_bins + 1)
        bin_width = bin_edges[1] - bin_edges[0]
    else:  # Bins of equal trial number.
        choices = choices[np.argsort(choice_probs)]
        choice_probs.sort()
        bin_edges = choice_probs[np.round(np.linspace(0, len(choice_probs) - 1, n_bins + 1)).astype(int)]
        bin_edges[0] = 0.0
    for b in range(n_bins):
        true_probs[b] = np.mean(choices[np.logical_and(bin_edges[b] < choice_probs, choice_probs <= bin_edges[b + 1])])
        model_probs[b] = np.mean(
            choice_probs[np.logical_and(bin_edges[b] < choice_probs, choice_probs <= bin_edges[b + 1])]
        )
        calibration = {"true_probs": true_probs, "model_probs": model_probs}
    if to_plot:
        rp.calibration_plot(calibration)
    print ("Fraction correct: {}".format(sum((choice_probs > 0.5) == choices.astype(bool)) / float(len(choices))))
    chosen_probs = np.hstack([choice_probs[choices == 1], 1.0 - choice_probs[choices == 0]])
    print ("Geometric mean choice prob: {}".format(np.exp(np.mean(np.log(chosen_probs)))))
    return calibration

Example #7

0

Show file

File: RL_agents.py Project: dydcfg/Two_Step

    def session_likelihood(self, session, params_T):#, return_trial_data = False):

        # Unpack trial events.
        choices, second_steps, outcomes = ut.CTSO_unpack(session.CTSO, 'CSO')

        # Unpack parameters.
        alpha, iTemp, lambd, W, tlr, D, tdec, A, alr = params_T[:9]   # Learning rate for arbitration.
        if self.use_kernels: bias, CK, SSK  = params_T[-3:]

        #Variables.
        n_trials = len(choices)
        Q_td_f = np.zeros([n_trials + 1 , 2])       # Model free first step action values (low, high).
        Q_td_s = np.zeros([n_trials + 1 , 2])       # Model free second step action values (right, left).
        arb    = np.zeros(n_trials + 1)             # Arbitration parameter, positive means more model based.
        trans_probs = np.zeros([n_trials + 1 , 2])  # Transition probabilities for low and high pokes.
        trans_probs[0,:] = 0.5  # Initialize first trial transition probabilities.

        for i, (c, s, o) in enumerate(zip(choices, second_steps, outcomes)): # loop over trials.

            nc = 1 - c  # Action not chosen at first step.
            ns = 1 - s  # State not reached at second step.

            # Update model free action values. 

            Q_td_f[i+1,nc] = Q_td_f[i, nc] * (1. - D)   # First step forgetting.
            Q_td_s[i+1,ns] = Q_td_s[i, ns] * (1. - D)   # Second step forgetting.

            Q_td_f[i+1,c] = (1. - alpha) * Q_td_f[i,c] + \
                            alpha * (Q_td_s[i,s] + lambd * (o - Q_td_s[i,s])) # First step TD update.
      
            Q_td_s[i+1,s] = (1. - alpha) * Q_td_s[i,s] +  alpha * o           # Second step TD update.

            # Update transition probabilities.

            trans_probs[i+1,nc] = trans_probs[i,nc] - tdec * (trans_probs[i,nc] - 0.5)  # Transition prob. forgetting.
            state_prediction_error = (s == 0) - trans_probs[i,c]
            trans_probs[i+1,c] = trans_probs[i,c] + tlr * state_prediction_error         # Transition prob. update.

            # Update Arbitration.

            arb[i + 1] = arb[i] + alr * (abs(state_prediction_error) - arb[i])

        # Evaluate choice probabilities and likelihood. 

        Q_mb = trans_probs * np.tile(Q_td_s[:,0],[2,1]).T + \
                (1. - trans_probs) * np.tile(Q_td_s[:,1],[2,1]).T # Model based action values. 

        W_arb = np.tile(ru.sigmoid(W - A * arb),[2,1]).T  # Trial by trial model basedness.

        Q_net = W_arb * Q_mb + (1. - W_arb) * Q_td_f # Mixture of model based and model free values.

        if self.use_kernels:
            Q_net[:,0] += kernel_Qs(session, bias, CK, SSK)

        choice_probs = ru.array_softmax(Q_net, iTemp)
        trial_log_likelihood = ru.protected_log(choice_probs[np.arange(n_trials), choices])
        session_log_likelihood = np.sum(trial_log_likelihood)

        if False:#return_trial_data:
            return {'Q_net'       : Q_net[:-1,:],  # Action values
                    'Q_td'        : Q_td_f[:-1,:],
                    'Q_mb'        : Q_mb[:-1,:],
                    'W_arb'       : W_arb[:-1,:],
                    'P_net'       : iTemp *           (Q_net[:-1,1] - (Q_net[:-1,0] + bias)), # Preferences.
                    'P_td'        : iTemp * (1 - W) * (Q_td_f [:-1,1]  - Q_td_f [:-1,0]),
                    'P_mb'        : iTemp * W *       (Q_mb [:-1,1]  - Q_mb [:-1,0]),
                    'P_k'         : - kernel_Qs(session, 0., CK, SSK)[:-1],
                    'choice_probs': choice_probs}
        else:
            return session_log_likelihood

Example #8

0

Show file

File: RL_agents.py Project: dydcfg/Two_Step

    def session_likelihood(self, session, params_T):#, return_trial_data = False):

        # Unpack trial events.
        choices, transitions, second_steps, outcomes = ut.CTSO_unpack(session.CTSO, 'CTSO')
        session_start_trials = session.blocks['session_start_trials']
        # Unpack parameters.
        alpha, iTemp, lambd, W, tlr, D, tdec = params_T[:7]   # Transition decay rate.
        if self.use_kernels: bias, CK, SSK  = params_T[-3:]

        #Variables.
        n_trials = len(choices)
        Q_td_f = np.zeros([n_trials + 1 , 2])       # Model free first step action values (low, high).
        Q_td_s = np.zeros([n_trials + 1 , 2])       # Model free second step action values (right, left).
        trans_probs = np.zeros([n_trials + 1 , 2])  # Transition probabilities for low and high pokes.
        trans_probs[0,:] = 0.5  # Initialize first trial transition probabilities.

        for i, (f, c, t, s, o) in enumerate(zip(session_start_trials,
                                                choices, transitions, second_steps, outcomes)): # loop over trials.

            nc = 1 - c  # Action not chosen at first step.
            ns = 1 - s  # State not reached at second step.

            # Update model free action values. 

            Q_td_f[i+1,nc] = Q_td_f[i, nc] * (1. - D)   # First step forgetting.
            Q_td_s[i+1,ns] = Q_td_s[i, ns] * (1. - D)   # Second step forgetting.

            Q_td_f[i+1,c] = (1. - alpha) * Q_td_f[i,c] + \
                            alpha * (Q_td_s[i,s] + lambd * (o - Q_td_s[i,s])) # First step TD update.
      
            Q_td_s[i+1,s] = (1. - alpha) * Q_td_s[i,s] +  alpha * o           # Second step TD update.

            # Update transition probabilities.

            trans_probs[i+1,nc] = trans_probs[i,nc] - tdec * (trans_probs[i,nc] - 0.5)  # Transition prob. forgetting.

            trans_probs[i+1,c] = (1. - tlr) * trans_probs[i,c] + tlr * (s == 0)         # Transition prob. update.

        # Evaluate choice probabilities and likelihood. 

        Q_mb = trans_probs * np.tile(Q_td_s[:,0],[2,1]).T + \
                (1. - trans_probs) * np.tile(Q_td_s[:,1],[2,1]).T # Model based action values. 

        Q_net = W * Q_mb + (1. - W) * Q_td_f # Mixture of model based and model free values.

        if self.use_kernels:
            Q_net[:,0] += kernel_Qs(session, bias, CK, SSK)

        choice_probs = ru.array_softmax(Q_net, iTemp)
        trial_log_likelihood = ru.protected_log(choice_probs[np.arange(n_trials), choices])
        session_log_likelihood = np.sum(trial_log_likelihood)

        if False:#return_trial_data:
            return {'Q_net'       : Q_net[:-1,:],  # Action values
                    'Q_td'        : Q_td_f[:-1,:],
                    'Q_mb'        : Q_mb[:-1,:],
                    'P_net'       : iTemp *           (Q_net[:-1,1] - (Q_net[:-1,0] + bias)), # Preferences.
                    'P_td'        : iTemp * (1 - W) * (Q_td_f [:-1,1]  - Q_td_f [:-1,0]),
                    'P_mb'        : iTemp * W *       (Q_mb [:-1,1]  - Q_mb [:-1,0]),
                    'P_k'         : - kernel_Qs(session, 0., CK, SSK)[:-1],
                    'choice_probs': choice_probs}
        else:
            return session_log_likelihood