Python DirichletSelector Examples

Programming Language: Python

Namespace/Package Name: action_selection

Method/Function: DirichletSelector

Examples at hotexamples.com: 5

Python DirichletSelector - 5 examples found. These are the top rated real world Python examples of action_selection.DirichletSelector extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: run_trajectory_examples.py Project: SSchwoebel/BalancingControl

def run_action_selection(post, prior, like, trials=100, crit_factor=0.5):

    ac_sel = asl.DirichletSelector(trials, 2, npi, factor=crit_factor)
    samples = []
    for t in range(trials):
        ac_sel.select_desired_action(t, 0, post, list(range(npi)), like, prior)
        RT = int(ac_sel.RT[t, 0])
        samples.append(list(ac_sel.accepted_pis[:RT]))

    return ac_sel.RT.squeeze().astype(int), samples

Example #2

Show file

def run_action_selection(post,
                         prior,
                         like,
                         trials=100,
                         crit_factor=0.5,
                         calc_dkl=False):

    ac_sel = asl.DirichletSelector(trials,
                                   2,
                                   npi,
                                   factor=crit_factor,
                                   calc_dkl=calc_dkl)
    for t in range(trials):
        ac_sel.select_desired_action(t, 0, post, list(range(npi)), like, prior)

    if calc_dkl:
        return ac_sel.RT.squeeze(), ac_sel.DKL_post.squeeze(
        ), ac_sel.DKL_prior.squeeze()
    else:
        return ac_sel.RT.squeeze()

Example #3

Show file

File: run_example_MAB_RT.py Project: SSchwoebel/BalancingControl

def run_agent(par_list, trials, T, ns, na, nr, nc, deval=False, ESS=None):

    #set parameters:
    #learn_pol: initial concentration paramter for policy prior
    #trans_prob: reward probability
    #avg: True for average action selection, False for maximum selection
    #Rho: Environment's reward generation probabilities as a function of time
    #utility: goal prior, preference p(o)
    learn_pol, trans_prob, avg, Rho, utility = par_list
    """
    create matrices
    """

    #generating probability of observations in each state
    A = np.eye(ns)

    #state transition generative probability (matrix)
    B = np.zeros((ns, ns, na))

    for i in range(0, na):
        B[i + 1, :, i] += 1

    # agent's beliefs about reward generation

    # concentration parameters
    C_alphas = np.ones((nr, ns, nc))
    # initialize state in front of levers so that agent knows it yields no reward
    C_alphas[0, 0, :] = 100
    for i in range(1, nr):
        C_alphas[i, 0, :] = 1

    # agent's initial estimate of reward generation probability
    C_agent = np.zeros((nr, ns, nc))
    for c in range(nc):
        C_agent[:, :,
                c] = np.array([(C_alphas[:, i, c]) / (C_alphas[:, i, c]).sum()
                               for i in range(ns)]).T

    # context transition matrix

    p = trans_prob
    q = 1. - p
    transition_matrix_context = np.zeros((nc, nc))
    transition_matrix_context += q / (nc - 1)
    for i in range(nc):
        transition_matrix_context[i, i] = p
    """
    create environment (grid world)
    """

    environment = env.MultiArmedBandid(A, B, Rho, trials=trials, T=T)
    """
    create policies
    """

    pol = np.array(list(itertools.product(list(range(na)), repeat=T - 1)))

    npi = pol.shape[0]

    # concentration parameters
    alphas = np.zeros((npi, nc)) + learn_pol

    prior_pi = alphas / alphas.sum(axis=0)
    """
    set state prior (where agent thinks it starts)
    """

    state_prior = np.zeros((ns))

    state_prior[0] = 1.
    """
    set action selection method
    """

    if ESS is not None:

        ac_sel = asl.DirichletSelector(trials=trials,
                                       T=T,
                                       number_of_actions=na)

    elif avg:

        ac_sel = asl.AveragedSelector(trials=trials, T=T, number_of_actions=na)

    else:

        ac_sel = asl.MaxSelector(trials=trials, T=T, number_of_actions=na)
    """
    set context prior
    """

    prior_context = np.zeros((nc)) + 0.1 / (nc - 1)
    prior_context[0] = 0.9
    """
    set up agent
    """

    # perception
    bayes_prc = prc.HierarchicalPerception(A,
                                           B,
                                           C_agent,
                                           transition_matrix_context,
                                           state_prior,
                                           utility,
                                           prior_pi,
                                           alphas,
                                           C_alphas,
                                           T=T)

    # agent
    bayes_pln = agt.BayesianPlanner(
        bayes_prc,
        ac_sel,
        pol,
        trials=trials,
        T=T,
        prior_states=state_prior,
        prior_policies=prior_pi,
        number_of_states=ns,
        prior_context=prior_context,
        learn_habit=True,
        learn_rew=True,
        #save_everything = True,
        number_of_policies=npi,
        number_of_rewards=nr)
    """
    create world
    """

    w = world.World(environment, bayes_pln, trials=trials, T=T)
    """
    simulate experiment
    """
    if not deval:
        w.simulate_experiment(range(trials))

    else:
        w.simulate_experiment(range(trials // 2))
        # reset utility to implement devaluation
        ut = utility[1:].sum()
        bayes_prc.prior_rewards[2:] = ut / (nr - 2)
        bayes_prc.prior_rewards[:2] = (1 - ut) / 2

        w.simulate_experiment(range(trials // 2, trials))

    return w

Example #4

Show file

def run_agent(par_list, trials, T, ns, na, nr, nc, f, contexts, states, \
              state_trans=None, correct_choice=None, congruent=None,\
              num_in_run=None, random_draw=False, pol_lambda=0, r_lambda=0,
              one_context=False):
    #set parameters:
    #learn_pol: initial concentration paramter for policy prior
    #trans_prob: reward probability
    #avg: True for average action selection, False for maximum selection
    #Rho: Environment's reward generation probabilities as a function of time
    #utility: goal prior, preference p(o)
    learn_pol, trans_prob, Rho, utility, unc = par_list


    """
    create matrices
    """


    #generating probability of observations in each state
    A = np.eye(ns)


    #state transition generative probability (matrix)
    if state_trans is None:
        B = np.zeros((ns, ns, na))

        for i in range(0,na):
            B[i+1,:,i] += 1
    else:
        B = state_trans.copy()

    # agent's beliefs about reward generation

    # concentration parameters
    C_alphas = np.ones((nr, ns, nc))
    # initialize state in front of levers so that agent knows it yields no reward
    C_alphas[:,:4,:] = np.array([100,1])[:,None,None]
    # C_alphas[:,4:,0] = np.array([[1, 2],print(self.Rho.shape)
    #                               [2, 1]])
    # C_alphas[:,4:,1] = np.array([[2, 1],
    #                               [1, 2]])

    # agent's initial estimate of reward generation probability
    C_agent = np.zeros((nr, ns, nc))
    for c in range(nc):
        C_agent[:,:,c] = np.array([(C_alphas[:,i,c])/(C_alphas[:,i,c]).sum() for i in range(ns)]).T


    # context transition matrix

    if nc>1:
        p = trans_prob
        q = 1.-p
        transition_matrix_context = np.zeros((nc, nc))
        transition_matrix_context += q/(nc-1)
        for i in range(nc):
            transition_matrix_context[i,i] = p
    else:
        transition_matrix_context = np.array([[1]])

    # context observation matrix

    if nc > 1:
        D = np.zeros((nc,nc)) + unc
        for c in range(nc):
            D[c,c] = 1-(unc*(nc-1))
    else:
        D = np.array([[1]])

    """
    create environment (grid world)
    """
    if not one_context:
        
        environment = env.TaskSwitching(A, B, Rho, D, states, contexts, \
                                    trials = trials, T = T,\
                                    correct_choice=correct_choice, \
                                    congruent=congruent, \
                                    num_in_run=num_in_run)
            
    else:
        
        environment = env.TaskSwitchingOneConext(A, B, Rho, D, states, contexts, \
                                    trials = trials, T = T,\
                                    correct_choice=correct_choice, \
                                    congruent=congruent, \
                                    num_in_run=num_in_run)
        


    """
    create policies
    """

    pol = np.array(list(itertools.product(list(range(na)), repeat=T-1)))

    npi = pol.shape[0]

    # concentration parameters
    alphas = np.zeros((npi, nc)) + learn_pol

    prior_pi = alphas / alphas.sum(axis=0)


    """
    set state prior (where agent thinks it starts)
    """

    state_prior = np.zeros((ns))

    state_prior[:4] = 1./4

    """
    set action selection method
    """

    ac_sel = asl.DirichletSelector(trials=trials, T=T, number_of_actions=na, factor=f, calc_dkl=False, calc_entropy=False, draw_true_post=random_draw)

    """
    set context prior
    """

    if nc > 1:
        prior_context = np.zeros((nc)) + 0.1/(nc-1)
        prior_context[0] = 0.9
    else:
        prior_context = np.array([1])

    """
    set up agent
    """

    # perception
    bayes_prc = prc.HierarchicalPerception(A, B, C_agent, transition_matrix_context, 
                                        state_prior, utility, prior_pi, alphas, 
                                        C_alphas, T=T, generative_model_context=D, 
                                        pol_lambda=pol_lambda, r_lambda=r_lambda,
                                        non_decaying=4)

    # agent
    bayes_pln = agt.BayesianPlanner(bayes_prc, ac_sel, pol,
                      trials = trials, T = T,
                      prior_states = state_prior,
                      prior_policies = prior_pi,
                      number_of_states = ns,
                      prior_context = prior_context,
                      learn_habit = True,
                      learn_rew = True,
                      #save_everything = True,
                      number_of_policies = npi,
                      number_of_rewards = nr)


    """
    create world
    """

    w = world.World(environment, bayes_pln, trials = trials, T = T)

    """
    simulate experiment
    """
    w.simulate_experiment(range(trials))

    return w

Example #5

Show file

File: run_example_gridworld.py Project: SSchwoebel/BalancingControl

def run_agent(par_list, trials=trials, T=T, Lx=Lx, Ly=Ly, ns=ns, na=na):

    #set parameters:
    #obs_unc: observation uncertainty condition
    #state_unc: state transition uncertainty condition
    #goal_pol: evaluate only policies that lead to the goal
    #utility: goal prior, preference p(o)
    obs_unc, state_unc, goal_pol, avg, context, utility, h, q = par_list
    """
    create matrices
    """

    vals = np.array([1., 2 / 3., 1 / 2., 1. / 2.])

    #generating probability of observations in each state
    A = np.eye(ns) + const
    np.fill_diagonal(A, 1 - (ns - 1) * const)

    #generate horizontal gradient for observation uncertainty condition
    # if obs_unc:

    #     condition = 'obs'

    #     for s in range(ns):
    #         x = s//Ly
    #         y = s%Ly

    #         c = 1#vals[L - y - 1]

    #         # look for neighbors
    #         neighbors = []
    #         if (s-4)>=0 and (s-4)!=g1:
    #             neighbors.append(s-4)

    #         if (s%4)!=0 and (s-1)!=g1:
    #             neighbors.append(s-1)

    #         if (s+4)<=(ns-1) and (s+4)!=g1:
    #             neighbors.append(s+4)

    #         if ((s+1)%4)!=0 and (s+1)!=g1:
    #             neighbors.append(s+1)

    #         A[s,s] = c
    #         for n in neighbors:
    #             A[n,s] = (1-c)/len(neighbors)

    #state transition generative probability (matrix)
    B = np.zeros((ns, ns, na)) + const

    cert_arr = np.zeros(ns)
    for s in range(ns):
        x = s // Ly
        y = s % Ly

        #state uncertainty condition
        if state_unc:
            if (x == 0) or (y == 3):
                c = vals[0]
            elif (x == 1) or (y == 2):
                c = vals[1]
            elif (x == 2) or (y == 1):
                c = vals[2]
            else:
                c = vals[3]

            condition = 'state'

        else:
            c = 1.

        cert_arr[s] = c
        for u in range(na):
            x = s // Ly + actions[u][0]
            y = s % Ly + actions[u][1]

            #check if state goes over boundary
            if x < 0:
                x = 0
            elif x == Lx:
                x = Lx - 1

            if y < 0:
                y = 0
            elif y == Ly:
                y = Ly - 1

            s_new = Ly * x + y
            if s_new == s:
                B[s, s, u] = 1 - (ns - 1) * const
            else:
                B[s, s, u] = 1 - c + const
                B[s_new, s, u] = c - (ns - 1) * const

    B_c = np.broadcast_to(B[:, :, :, np.newaxis], (ns, ns, na, nc))
    print(B.shape)
    """
    create environment (grid world)
    """
    Rho = np.zeros((nr, ns)) + const
    Rho[0, :] = 1 - (nr - 1) * const
    Rho[:, np.argmax(utility)] = [0 + const, 1 - (nr - 1) * const]
    print(Rho)
    util = np.array([1 - np.amax(utility), np.amax(utility)])

    environment = env.GridWorld(A,
                                B,
                                Rho,
                                trials=trials,
                                T=T,
                                initial_state=start)

    Rho_agent = np.ones((nr, ns, nc)) / nr

    if True:
        templates = np.ones_like(Rho_agent)
        templates[0] *= 100
        assert ns == nc
        for s in range(ns):
            templates[0, s, s] = 1
            templates[1, s, s] = 100
        dirichlet_rew_params = templates
    else:
        dirichlet_rew_params = np.ones_like(Rho_agent)
    """
    create policies
    """

    if goal_pol:
        pol = []
        su = 3
        for p in itertools.product([0, 1], repeat=T - 1):
            if (np.array(p)[0:6].sum() == su) and (np.array(p)[-1] != 1):
                pol.append(list(p))

        pol = np.array(pol) + 2
    else:
        pol = np.array(list(itertools.product(list(range(na)), repeat=T - 1)))

    #pol = pol[np.where(pol[:,0]>1)]

    npi = pol.shape[0]

    prior_policies = np.ones((npi, nc)) / npi
    dirichlet_pol_param = np.zeros_like(prior_policies) + h
    """
    set state prior (where agent thinks it starts)
    """

    state_prior = np.zeros((ns))

    # state_prior[0] = 1./4.
    # state_prior[1] = 1./4.
    # state_prior[4] = 1./4.
    # state_prior[5] = 1./4.
    state_prior[start] = 1
    """
    set context prior and matrix
    """

    context_prior = np.ones(nc)
    trans_matrix_context = np.ones((nc, nc))
    if nc > 1:
        # context_prior[0] = 0.9
        # context_prior[1:] = 0.1 / (nc-1)
        context_prior /= nc
        trans_matrix_context[:] = (1 - q) / (nc - 1)
        np.fill_diagonal(trans_matrix_context, q)
    """
    set action selection method
    """

    if avg:

        sel = 'avg'

        ac_sel = asl.DirichletSelector(trials=trials,
                                       T=T,
                                       factor=0.5,
                                       number_of_actions=na,
                                       calc_entropy=False,
                                       calc_dkl=False,
                                       draw_true_post=True)
    else:

        sel = 'max'

        ac_sel = asl.MaxSelector(trials=trials, T=T, number_of_actions=na)


#    ac_sel = asl.AveragedPolicySelector(trials = trials, T = T,
#                                        number_of_policies = npi,
#                                        number_of_actions = na)
    """
    set up agent
    """
    #bethe agent
    if agent == 'bethe':

        agnt = 'bethe'

        # perception and planning

        bayes_prc = prc.HierarchicalPerception(
            A,
            B_c,
            Rho_agent,
            trans_matrix_context,
            state_prior,
            util,
            prior_policies,
            dirichlet_pol_params=dirichlet_pol_param,
            dirichlet_rew_params=dirichlet_rew_params)

        bayes_pln = agt.BayesianPlanner(
            bayes_prc,
            ac_sel,
            pol,
            trials=trials,
            T=T,
            prior_states=state_prior,
            prior_policies=prior_policies,
            prior_context=context_prior,
            number_of_states=ns,
            learn_habit=True,
            learn_rew=True,
            #save_everything = True,
            number_of_policies=npi,
            number_of_rewards=nr)
    #MF agent
    else:

        agnt = 'mf'

        # perception and planning

        bayes_prc = prc.MFPerception(A, B, state_prior, utility, T=T)

        bayes_pln = agt.BayesianMFPlanner(bayes_prc, [],
                                          ac_sel,
                                          trials=trials,
                                          T=T,
                                          prior_states=state_prior,
                                          policies=pol,
                                          number_of_states=ns,
                                          number_of_policies=npi)
    """
    create world
    """

    w = world.World(environment, bayes_pln, trials=trials, T=T)
    """
    simulate experiment
    """

    if not context:
        w.simulate_experiment()
    else:
        w.simulate_experiment(curr_trials=range(0, trials // 2))
        Rho_new = np.zeros((nr, ns)) + const
        Rho_new[0, :] = 1 - (nr - 1) * const
        Rho_new[:, g2] = [0 + const, 1 - (nr - 1) * const]
        print(Rho_new)
        w.environment.Rho[:] = Rho_new
        #w.agent.perception.generative_model_rewards = Rho_new
        w.simulate_experiment(curr_trials=range(trials // 2, trials))
    """
    plot and evaluate results
    """
    #find successful and unsuccessful runs
    #goal = np.argmax(utility)
    successfull_g1 = np.where(environment.hidden_states[:, -1] == g1)[0]
    if context:
        successfull_g2 = np.where(environment.hidden_states[:, -1] == g2)[0]
        unsuccessfull1 = np.where(environment.hidden_states[:, -1] != g1)[0]
        unsuccessfull2 = np.where(environment.hidden_states[:, -1] != g2)[0]
        unsuccessfull = np.intersect1d(unsuccessfull1, unsuccessfull2)
    else:
        unsuccessfull = np.where(environment.hidden_states[:, -1] != g1)[0]

    #total  = len(successfull)

    #plot start and goal state
    start_goal = np.zeros((Lx, Ly))

    x_y_start = (start // Ly, start % Ly)
    start_goal[x_y_start] = 1.
    x_y_g1 = (g1 // Ly, g1 % Ly)
    start_goal[x_y_g1] = -1.
    x_y_g2 = (g2 // Ly, g2 % Ly)
    start_goal[x_y_g2] = -2.

    palette = [(159 / 255, 188 / 255, 147 / 255),
               (135 / 255, 170 / 255, 222 / 255),
               (242 / 255, 241 / 255, 241 / 255),
               (242 / 255, 241 / 255, 241 / 255),
               (199 / 255, 174 / 255, 147 / 255),
               (199 / 255, 174 / 255, 147 / 255)]

    #set up figure params
    # ~ factor = 3
    # ~ grid_plot_kwargs = {'vmin': -2, 'vmax': 2, 'center': 0, 'linecolor': '#D3D3D3',
    # ~ 'linewidths': 7, 'alpha': 1, 'xticklabels': False,
    # ~ 'yticklabels': False, 'cbar': False,
    # ~ 'cmap': palette}#sns.diverging_palette(120, 45, as_cmap=True)} #"RdBu_r",

    # ~ # plot grid
    # ~ fig = plt.figure(figsize=[factor*5,factor*4])

    # ~ ax = fig.gca()

    # ~ annot = np.zeros((Lx,Ly))
    # ~ for i in range(Lx):
    # ~ for j in range(Ly):
    # ~ annot[i,j] = i*Ly+j

    # ~ u = sns.heatmap(start_goal, ax = ax, **grid_plot_kwargs, annot=annot, annot_kws={"fontsize": 40})
    # ~ ax.invert_yaxis()
    # ~ plt.savefig('grid.svg', dpi=600)
    # ~ #plt.show()

    # ~ # set up paths figure
    # ~ fig = plt.figure(figsize=[factor*5,factor*4])

    # ~ ax = fig.gca()

    # ~ u = sns.heatmap(start_goal, zorder=2, ax = ax, **grid_plot_kwargs)
    # ~ ax.invert_yaxis()

    # ~ #find paths and count them
    # ~ n1 = np.zeros((ns, na))

    # ~ for i in successfull_g1:

    # ~ for j in range(T-1):
    # ~ d = environment.hidden_states[i, j+1] - environment.hidden_states[i, j]
    # ~ if d not in [1,-1,Ly,-Ly,0]:
    # ~ print("ERROR: beaming")
    # ~ if d == 1:
    # ~ n1[environment.hidden_states[i, j],0] +=1
    # ~ if d == -1:
    # ~ n1[environment.hidden_states[i, j]-1,0] +=1
    # ~ if d == Ly:
    # ~ n1[environment.hidden_states[i, j],1] +=1
    # ~ if d == -Ly:
    # ~ n1[environment.hidden_states[i, j]-Ly,1] +=1

    # ~ n2 = np.zeros((ns, na))

    # ~ if context:
    # ~ for i in successfull_g2:

    # ~ for j in range(T-1):
    # ~ d = environment.hidden_states[i, j+1] - environment.hidden_states[i, j]
    # ~ if d not in [1,-1,Ly,-Ly,0]:
    # ~ print("ERROR: beaming")
    # ~ if d == 1:
    # ~ n2[environment.hidden_states[i, j],0] +=1
    # ~ if d == -1:
    # ~ n2[environment.hidden_states[i, j]-1,0] +=1
    # ~ if d == Ly:
    # ~ n2[environment.hidden_states[i, j],1] +=1
    # ~ if d == -Ly:
    # ~ n2[environment.hidden_states[i, j]-Ly,1] +=1

    # ~ un = np.zeros((ns, na))

    # ~ for i in unsuccessfull:

    # ~ for j in range(T-1):
    # ~ d = environment.hidden_states[i, j+1] - environment.hidden_states[i, j]
    # ~ if d not in [1,-1,Ly,-Ly,0]:
    # ~ print("ERROR: beaming")
    # ~ if d == 1:
    # ~ un[environment.hidden_states[i, j],0] +=1
    # ~ if d == -1:
    # ~ un[environment.hidden_states[i, j]-1,0] +=1
    # ~ if d == Ly:
    # ~ un[environment.hidden_states[i, j],1] +=1
    # ~ if d == -Ly:
    # ~ un[environment.hidden_states[i, j]-4,1] +=1

    # ~ total_num = n1.sum() + n2.sum() + un.sum()

    # ~ if np.any(n1 > 0):
    # ~ n1 /= total_num

    # ~ if np.any(n2 > 0):
    # ~ n2 /= total_num

    # ~ if np.any(un > 0):
    # ~ un /= total_num

    # ~ #plotting
    # ~ for i in range(ns):

    # ~ x = [i%Ly + .5]
    # ~ y = [i//Ly + .5]

    # ~ #plot uncertainties
    # ~ if obs_unc:
    # ~ plt.plot(x,y, 'o', color=(219/256,122/256,147/256), markersize=factor*12/(A[i,i])**2, alpha=1.)
    # ~ if state_unc:
    # ~ plt.plot(x,y, 'o', color=(100/256,149/256,237/256), markersize=factor*12/(cert_arr[i])**2, alpha=1.)

    # ~ #plot unsuccessful paths
    # ~ for j in range(2):

    # ~ if un[i,j]>0.0:
    # ~ if j == 0:
    # ~ xp = x + [x[0] + 1]
    # ~ yp = y + [y[0] + 0]
    # ~ if j == 1:
    # ~ xp = x + [x[0] + 0]
    # ~ yp = y + [y[0] + 1]

    # ~ plt.plot(xp,yp, '-', color='#D5647C', linewidth=factor*75*un[i,j],
    # ~ zorder = 9, alpha=1)

    # ~ #set plot title
    # ~ #plt.title("Planning: successful "+str(round(100*total/trials))+"%", fontsize=factor*9)

    # ~ #plot successful paths on top
    # ~ for i in range(ns):

    # ~ x = [i%Ly + .5]
    # ~ y = [i//Ly + .5]

    # ~ for j in range(2):

    # ~ if n1[i,j]>0.0:
    # ~ if j == 0:
    # ~ xp = x + [x[0] + 1]
    # ~ yp = y + [y[0]]
    # ~ if j == 1:
    # ~ xp = x + [x[0] + 0]
    # ~ yp = y + [y[0] + 1]
    # ~ plt.plot(xp,yp, '-', color='#4682B4', linewidth=factor*75*n1[i,j],
    # ~ zorder = 10, alpha=1)

    # ~ #plot successful paths on top
    # ~ if context:
    # ~ for i in range(ns):

    # ~ x = [i%Ly + .5]
    # ~ y = [i//Ly + .5]

    # ~ for j in range(2):

    # ~ if n2[i,j]>0.0:
    # ~ if j == 0:
    # ~ xp = x + [x[0] + 1]
    # ~ yp = y + [y[0]]
    # ~ if j == 1:
    # ~ xp = x + [x[0] + 0]
    # ~ yp = y + [y[0] + 1]
    # ~ plt.plot(xp,yp, '-', color='#55ab75', linewidth=factor*75*n2[i,j],
    # ~ zorder = 10, alpha=1)

    # ~ #print("percent won", total/trials, "state prior", np.amax(utility))

    # ~ plt.savefig('chosen_paths_'+name_str+'h'+str(h)+'.svg')
    #plt.show()

    # max_RT = np.amax(w.agent.action_selection.RT[:,0])
    # plt.figure()
    # plt.plot(w.agent.action_selection.RT[:,0], '.')
    # plt.ylim([0,1.05*max_RT])
    # plt.xlim([0,trials])
    # plt.savefig("Gridworld_Dir_h"+str(h)+".svg")
    # plt.show()
    """
    save data
    """

    if save_data:
        jsonpickle_numpy.register_handlers()

        ut = np.amax(utility)
        p_o = '{:02d}'.format(round(ut * 10).astype(int))
        fname = agnt + '_' + condition + '_' + sel + '_initUnc_' + p_o + '.json'
        fname = os.path.join(data_folder, fname)
        pickled = pickle.encode(w)
        with open(fname, 'w') as outfile:
            json.dump(pickled, outfile)

    return w