def I_gate_model(state_size, nepisodes, stat_window, ep_0):
    #n = 512
    beginTime = time.time()
    num_agents = 100
    n = 1024
    hrr_size = n
    nagent_actions = 4
    gate_actions = 2
    #ncolors = 3
    nslots = 1
    nroles = 1
    nrolestates = 2
    Closed, Open = 0, 1
    agent_actions = ['up', 'down', 'left', 'right']
    colors = ['purple', '']  # external colors
    #colors = ['red','green','purple',''] # external colors
    ncolors = len(colors)
    nothing = len(colors) - 1
    #wm_colors = ['wm_red','wm_green',''] # internal colors
    roles = ['role_avail', 'role_unavail']
    gate_actions = ['closed', 'open']
    gateclosed = 'gateclosed'
    gateopen = 'gateopen'

    nrows, ncols = state_size
    state_table = state_machine.state_machine.generate_state_table_bottle(
        nrows, ncols)
    #state_table = state_machine.state_machine.generate_state_table_bottle_v2(nrows,ncols) # additional top row

    LTM_obj = hrr.LTM(n, True)
    #agent = gate.Q_learning(n,LTM_obj,epsilon=0.05,bias=1)
    agent = gate.Q_learning(n, LTM_obj, epsilon=0.1, bias=1)
    #i_gate = ma.micro_agents(num_agents,hrr_size,LTM_obj,bias=1,lrate=0.1,td_lambda=0.9,epsilon=0.01)
    i_gate = gate.Q_learning(n, LTM_obj, epsilon=0.1, bias=1)
    o_gate = gate.Q_learning(n, LTM_obj, epsilon=0.1, bias=1)
    #o_gate = ma.micro_agents(num_agents,hrr_size,LTM_obj,bias=1,lrate=0.1,td_lambda=0.9,epsilon=0.01)
    WM = gate.wm_content(colors, nslots, LTM_obj)
    #print('number of items',len(WM.wm_items))
    #print('items',WM.wm_items[0],WM.wm_items[1],WM.wm_items[2])
    s_mac = state_machine.state_machine(state_table)
    nsteps = 100
    goal = [[0, ncols // 2]]
    #goal = [ [0,0],[0,ncols-1],[0,ncols//2] ]
    #goal = [ [2,0],[2,ncols-1] ]
    #goal = [ [2,0],[2,ncols-1],[0,ncols//2] ] # make goals equal distance
    value_function = np.zeros((nrows, ncols))  # store Q values for statitics
    # track performance
    opt_array = []
    diff_array = []
    t1 = ""

    #handhold = 400000
    anneal = ep_0

    o_gate_correct = 0  # stats for output gate
    o_gate_total = 0  # stats for output gate
    o_gate_correct_shared = 0
    o_gate_correct_non = 0
    shared_total = 0
    non_total = 0
    o_gate_stat = []
    mean_o_gate = []
    accuracy = []
    acc_array = []
    i_gate_init_correct = []
    mean_acc = 0
    testing = False
    check = True
    r = 0
    episode = 1
    #for episode in range(nepisodes):
    memory_states = []
    memory_states_testing = []
    memory = ''
    correct_mem_use = 'n/a'
    diff_save = []
    diff_save_red, diff_red = [], []
    diff_save_green, diff_green = [], []
    diff_save_purple, diff_purple = [], []
    test_flag = False
    while episode < nepisodes + 1:
        never_opened_flag = True
        ATR = ''
        WM.flush_all_wm_maint()  # flushes contents of working memory
        #print('episode:',episode)
        o_gate_correct_shared_local = 0
        shared_total_local = 0
        count = 0
        #t1 = str(count)
        t1 = ''
        row = random.randrange(
            2, nrows)  # get random row for state (in shared area)
        col = random.randrange(0, ncols)  # get random col for state
        cur_loc = [row, col]  # current location of agent
        color_signal = random.randrange(0, ncolors - 1)  # get random color
        color = color_signal  # used for reward because color signal may change
        '''
        if color_signal == nothing:
            color = random.randrange(0,ncolors-1)
        '''
        #r = reward(cur_loc,colors[color],state_size) if color!=nothing else 0
        '''
        if episode == anneal:
            agent.epsilon,i_gate.epsilon,o_gate.epsilon = 0,0,0
        '''
        if check and episode == anneal:
            agent.epsilon, i_gate.epsilon, o_gate.epsilon = 0, 0, 0
            testing = True
            check = False

        if testing:
            nepisodes = 5 * stat_window
            episode = 0
            testing = False
            test_flag = True

        #optimal_steps = optimal_path_length(cur_loc,color_signal) # get optimal steps to goal
        optimal_steps = optimal_path_length_bottle(
            cur_loc, colors[color_signal])  # get optimal steps to goal
        #optimal_steps = optimal_path_length_bottle_v2(cur_loc,color_signal) # get optimal steps to goal
        state = (state_table[row][col])["state"]  # current state

        role_i = 0 if color_signal != nothing else 1
        slot = 0
        igate_restrict = [0, 1]  # restricted set
        ogate_restrict = [1]  # restricted set, ogate alway open
        '''
        if episode < handhold: # hand holding
            ogate_restrict = [0] if row >= 2 else [1]
        else:
            ogate_restrict = [0,1]
        '''
        #print("cue:",colors[color_signal])
        #print("role_i:",roles[role_i])
        i_gate_input = roles[role_i] + '*' + state + '*' + t1
        LTM_obj.encode(i_gate_input)
        igate_restrict = [0] if color_signal == nothing else [
            0, 1
        ]  # gate can only open if something is present
        i_gate_state, i_value, i_max, i_input, i_values = i_gate.action(
            i_gate_input, gate_actions, igate_restrict)
        WM.wm_in_flow(i_gate_state, slot,
                      color_signal)  # control flow of wm contents
        #print("i_gate:",i_gate_state)
        #print("wm_in:",WM.get_one_wm_maint(slot))
        role_o = 1 if WM.wm_maint_slot_is_empty(slot) else 0
        #print("role_o:",roles[role_o])
        o_gate_input = roles[role_o] + '*' + state + '*' + t1
        LTM_obj.encode(o_gate_input)
        #ogate_restrict = [0] if WM.wm_maint_slot_is_empty(slot) else [0,1] # gate can only open if memory is present in slot
        o_gate_state, o_value, o_max, o_input, o_values = o_gate.action(
            o_gate_input, gate_actions, ogate_restrict)
        WM.wm_out_flow(o_gate_state, slot)  # wm out contents for given slots
        wm_out = WM.get_one_wm_output(slot)
        #print("o_gate:",o_gate_state)
        #print("wm_out:",WM.get_one_wm_output(slot))
        #print("\n\n")
        '''
        if o_gate_state == Closed and not WM.wm_maint_slot_is_empty(slot):
            agent_input = gateclosed+'*'+colors[color_signal]+'*'+state+'*'+wm_out+'*'+t1
            LTM_obj.encode(agent_input)
        else:
            agent_input = colors[color_signal]+'*'+state+'*'+wm_out+'*'+t1
            LTM_obj.encode(agent_input)
            print("before:",LTM_obj.encode(WM.get_one_wm_maint(slot)))
            WM.update_wm_maint(slot,nothing)
            print("after:",LTM_obj.encode(WM.get_one_wm_maint(slot)))
        '''
        # context for it agent is remembering initial color que or not
        #print(colors[color_signal])
        #print(gate_actions[i_gate_state])
        #print(gate_actions[o_gate_state])
        '''
        if o_gate_state == Closed and never_opened_flag:
            if not WM.wm_maint_slot_is_empty(slot):
                ATR = 'remember'
            else:
                ATR = 'not_remember'
        
        if o_gate_state == Open:
            #pass
            WM.update_wm_maint(slot,nothing)
            memory_states.append(state)
            if test_flag:
                memory_states_testing.append(state)
            memory = state
            ATR = 'used'
            never_opened_flag = False
        '''
        #print(ATR)
        #print()
        agent_input = colors[
            color_signal] + '*' + state + '*' + wm_out + '*' + ATR
        LTM_obj.encode(agent_input)
        action_restrict = (state_table[row][col]
                           )["action_restrict"]  # get restricted action set
        action, a_value, a_max, a_input, a_values = agent.action(
            agent_input, agent_actions, action_restrict)  # agent action
        #print("agent_value: ",a_value)
        #print("input_value: ",i_value)
        #print("output_value:",o_value)

        o_gate_correct_shared += 1 if o_gate_state == Closed else 0
        o_gate_correct_shared_local += 1 if o_gate_state == Closed else 0
        shared_total += 1
        shared_total_local += 1
        if (color_signal != nothing
                and i_gate_state == Open) or (color_signal == nothing
                                              and i_gate_state == Closed):
            i_gate_init_correct.append(1)
        else:
            i_gate_init_correct.append(0)

        i_gate.set_eligibility_zero()
        o_gate.set_eligibility_zero()
        agent.set_eligibility_zero()  # set eligibility trace to zero
        WM.flush_all_wm_output()
        count += 1
        for step in range(nsteps):
            #t1 = str(count)
            t1 = ''
            r = reward(cur_loc, colors[color],
                       state_size) if color != nothing else 0
            #r = reward_v2(cur_loc,colors[color],state_size) if color!=nothing else 0
            #r = punishment(cur_loc,colors[color],state_size) if color!=nothing else -1
            #print(state)
            if color != nothing and (cur_loc == goal[color]
                                     or cur_loc in goal):
                i_gate.eligibility_trace_update(i_input)
                o_gate.eligibility_trace_update(o_input)
                agent.eligibility_trace_update(a_input)

                # if you get to a goal and it's 0, dont apply the delta
                i_gate.td_update_goal(r, i_value)
                o_gate.td_update_goal(r, o_value)
                agent.td_update_goal(r, a_value)
                #print('goal!!!')
                o_gate_correct_shared_local = 0
                shared_total_local = 0
                break

            p_a_input = agent_input
            p_i_input = i_gate_input
            p_o_input = o_gate_input
            p_a_action = action
            p_i_action = i_gate_state
            p_o_action = o_gate_state
            p_action_restrict = action_restrict
            p_igate_restrict = igate_restrict
            p_ogate_restrict = ogate_restrict

            p_loc = cur_loc
            p_i_value = i_value  # Q val for input gate
            p_i_max = i_max
            p_o_value = o_value  # Q val for output gate
            p_o_max = o_max
            p_a_value = a_value  # Q val for agent
            p_a_max = a_max

            # update eligibility traces
            i_gate.eligibility_trace_update(i_input)
            o_gate.eligibility_trace_update(o_input)
            agent.eligibility_trace_update(a_input)

            # move agent
            state, cur_loc = s_mac.move(p_loc, action)
            row, col = cur_loc[0], cur_loc[1]
            color_signal = nothing  # turn off cue

            role_i = 0 if color_signal != nothing else 1
            igate_restrict = [0]  # restricted set
            ogate_restrict = [1]  # restricted set, ogate always open
            #ogate_restrict = [1] if episode < handhold else [0,1] # hand holding
            '''
            if episode < handhold: # hand holding
                ogate_restrict = [0] if row >= 2 else [1]
            else:
                ogate_restrict = [0,1]
            '''
            #
            #print("cue:",colors[color_signal])
            #print("role_i:",roles[role_i])
            i_gate_input = roles[role_i] + '*' + state + '*' + t1
            LTM_obj.encode(i_gate_input)
            igate_restrict = [0] if color_signal == nothing else [
                0, 1
            ]  # gate can only open if something is present
            i_gate_state, i_value, i_max, i_input, i_values = i_gate.action(
                i_gate_input, gate_actions, igate_restrict)
            #print("i_gate:",i_gate_state)
            WM.wm_in_flow(i_gate_state, slot,
                          color_signal)  # control flow of wm contents
            #print("wm_in:",WM.get_one_wm_maint(slot))
            role_o = 1 if WM.wm_maint_slot_is_empty(slot) else 0
            o_gate_input = roles[role_o] + '*' + state + '*' + t1
            LTM_obj.encode(o_gate_input)
            #ogate_restrict = [0] if WM.wm_maint_slot_is_empty(slot) else [0,1] # gate can only open if memory is present in slot
            o_gate_state, o_value, o_max, o_input, o_values = o_gate.action(
                o_gate_input, gate_actions, ogate_restrict)
            #print("role_o:",roles[role_o])
            #print("o_gate:",o_gate_state)
            WM.wm_out_flow(o_gate_state,
                           slot)  # wm out contents for given slots
            wm_out = WM.get_one_wm_output(slot)
            #print("wm_out:",wm_out)
            #print("\n\n")
            '''
            if o_gate_state == Closed and not WM.wm_maint_slot_is_empty(slot):
                agent_input = gateclosed+'*'+colors[color_signal]+'*'+state+'*'+wm_out+'*'+t1
                LTM_obj.encode(agent_input)
            else:
                agent_input = colors[color_signal]+'*'+state+'*'+wm_out+'*'+t1
                LTM_obj.encode(agent_input)
                WM.update_wm_maint(slot,nothing)
            '''

            # context for it agent is remembering initial color que or not
            '''
            if o_gate_state == Closed and never_opened_flag:
                if not WM.wm_maint_slot_is_empty(slot):
                    ATR = 'remember'
                else:
                    ATR = 'not_remember'
            
            if o_gate_state == Open:
                #pass
                WM.update_wm_maint(slot,nothing)
                memory_states.append(state)
                if test_flag:
                    memory_states_testing.append(state)
                memory = state
                ATR = 'used'
                never_opened_flag = False
            '''
            agent_input = colors[
                color_signal] + '*' + state + '*' + wm_out + '*' + ATR
            LTM_obj.encode(agent_input)
            action_restrict = (state_table[row][col])[
                "action_restrict"]  # get restricted action set
            action, a_value, a_max, a_input, a_values = agent.action(
                agent_input, agent_actions, action_restrict)  # agent action

            if row >= 2:
                o_gate_correct_shared += 1 if o_gate_state == Closed else 0
                o_gate_correct_shared_local += 1 if o_gate_state == Closed else 0
                shared_total_local += 1
                shared_total += 1
            else:
                o_gate_correct_non += 1 if o_gate_state == Open else 0
                non_total += 1
            '''
            error1 = agent.td_update(r,a_value,p_a_value)
            error2 = o_gate.td_update(r,a_value,p_o_value)
            error3 = i_gate.td_update(r,a_value,p_i_value)
            '''
            '''
            if error1 < -.5 or error2 < -.5 or error3 < -.5:
                if error1 < -.5:
                    #print('agent error triggered')
                    agent.set_eligibility_zero()
                    #agent.eligibility_trace_update(a_input)
                    agent.td_update_goal(r,a_value)
                if error2 < -.5:
                    #print('ogate error triggered')
                    o_gate.set_eligibility_zero()
                    #o_gate.eligibility_trace_update(a_input)
                    o_gate.td_update_goal(r,o_value)
                if error3 < -5:
                    #print('igate error triggered')
                    i_gate.set_eligibility_zero()
                    #i_gate.eligibility_trace_update(a_input)
                    i_gate.td_update_goal(r,i_value)
                
                break
            '''

            error1 = agent.td_update(r, a_max, p_a_value)
            error2 = o_gate.td_update(r, a_max, p_o_value)
            error3 = i_gate.td_update(r, a_max, p_i_value)

            WM.flush_all_wm_output()
            count += 1
        if test_flag:
            if color != nothing:
                diff_save.append(abs(step - optimal_steps))
                if colors[color] == 'red':
                    diff_save_red.append(abs(step - optimal_steps))
                elif colors[color] == 'green':
                    diff_save_green.append(abs(step - optimal_steps))
                elif colors[color] == 'purple':
                    diff_save_purple.append(abs(step - optimal_steps))

        if color != nothing:
            step_diff = abs(step - optimal_steps)
            #print(step_diff)
            opt_array.append(step_diff)
            if step_diff == 0 and cur_loc == goal[color]:
                acc_array.append(1)
            else:
                acc_array.append(0)

            if colors[color] == 'red':
                diff_red.append(abs(step - optimal_steps))
            elif colors[color] == 'green':
                diff_green.append(abs(step - optimal_steps))
            elif colors[color] == 'purple':
                diff_purple.append(abs(step - optimal_steps))

        if (episode) % stat_window == 0:
            # print performance
            agent.epsilon *= 0.99
            i_gate.epsilon *= 0.99
            o_gate.epsilon *= 0.99
            print('episode:', episode)
            mean_diff = sum(opt_array) / len(opt_array)
            diff_array.append(mean_diff)
            mean_acc = sum(acc_array) / len(acc_array)
            accuracy.append(mean_acc)

            mean_diff_red = sum(diff_red) / len(diff_red) if len(
                diff_red) > 0 else 'N/A'
            mean_diff_green = sum(diff_green) / len(diff_green) if len(
                diff_green) > 0 else 'N/A'
            mean_diff_purple = sum(diff_purple) / len(diff_purple) if len(
                diff_purple) > 0 else 'N/A'
            #plt.plot(diff_array)
            #plt.show()
            #plt.plot(accuracy)
            #plt.show()
            opt_array = []
            acc_array = []
            diff_red, diff_green, diff_purple = [], [], []
            #print("agent_value:",a_value)
            #print("input_value:",i_value)
            #print("output_value:",o_value)
            #print('episode:',episode)
            print("mean diff:", mean_diff)
            print("mean accuracy:", mean_acc)
            igate_acc = sum(i_gate_init_correct) / len(i_gate_init_correct)
            print("mean igate:", igate_acc)
            print("mean diff red", mean_diff_red)
            print("mean diff green", mean_diff_green)
            print("mean diff purple", mean_diff_purple)
            i_gate_init_correct = []
            ###################
            if shared_total > 0:
                shared = o_gate_correct_shared / shared_total
                #print("mean o_gate_shared:",o_gate_correct_shared/shared_total)
            if non_total > 0:
                pass
                #print("mean o_gate_non:",o_gate_correct_non/non_total)
            if len(memory_states) > 0:
                correct_mem_use = (
                    memory_states.count('S12')) / len(memory_states)
                #print('correct memory_state use (S12):',correct_mem_use)
                #print('S10 memory:',(memory_states.count('S10'))/len(memory_states))
                #print('S11 memory:',(memory_states.count('S11'))/len(memory_states))
                #print('correct memory_state use (S12):',correct_mem_use)
                #print('S13 memory:',(memory_states.count('S13'))/len(memory_states))
                #print('S14 memory:',(memory_states.count('S14'))/len(memory_states))
                for x in range(nrows):
                    for y in range(ncols):
                        mem = 'S' + str(x) + str(y)
                        print(mem, 'memory:',
                              (memory_states.count(mem)) / len(memory_states))
            memory_states = []
            print()
            o_gate_correct_shared = 0
            o_gate_correct_non = 0
            shared_total = 0
            non_total = 0
            o_gate_total = 0
        episode += 1
    print(mean_acc, mean_diff, sep=',')

    #WA,fname1 = agent.get_weights(), "IO_Model_agent_weights.txt" # agent weights
    #WO,fname2 = o_gate.get_weights(), "IO_Model_output_weights.txt"# output gate weights
    #WI,fname3 = i_gate.get_weights(), "IO_Model_input_weights.txt"# input gate weights
    #store_weights(fname1,WA) # store agent weights
    #store_weights(fname2,WO) # store output gate weights
    #store_weights(fname3,WI) # store input gate weights
    #fname = 'step_diff_RGP.dat'
    #fname2 = 'step_diff_red_RGP.dat'
    #fname3 = 'step_diff_green_RGP.dat'
    #fname4 = 'step_diff_purple_RGP.dat'
    #fname5 = 'memory_states_RGP.dat'
    #store_weights(fname,diff_save)
    #store_weights(fname2,diff_save_red)
    #store_weights(fname3,diff_save_green)
    #store_weights(fname4,diff_save_purple)
    #store_weights(fname5,memory_states_testing)

    endTime = time.time()
    #print(endTime-beginTime,"seconds wall time")
    return LTM_obj, agent, i_gate, o_gate, WM
Ejemplo n.º 2
0
def IO_gate_model(state_size, nepisodes, stat_window):
    n = 1024
    #n = 1024
    nagent_actions = 4
    gate_actions = 2
    ncolors = 2
    nslots = 1
    nroles = 1
    nrolestates = 2
    Closed, Open = 0, 1
    agent_actions = ['up', 'down', 'left', 'right']
    colors = ['red', 'green', '']  # external colors
    nothing = len(colors) - 1
    #wm_colors = ['wm_red','wm_green',''] # internal colors
    roles = ['role_avail', 'role_unavail']
    gate_actions = ['closed', 'open']
    gateclosed = 'gateclosed'
    gateopen = 'gateopen'

    nrows, ncols = state_size
    state_table = state_machine.state_machine.generate_state_table_bottle(
        nrows, ncols)

    LTM_obj = hrr.LTM(n, True)
    #agent = gate.Q_learning(n,LTM_obj,epsilon=0.05,bias=1)
    agent = gate.Q_learning(n, LTM_obj, epsilon=0.01, bias=1)
    i_gate = gate.Q_learning(n, LTM_obj, epsilon=0.01, bias=1)
    o_gate = gate.Q_learning(n, LTM_obj, epsilon=0.01, bias=1)
    WM = gate.wm_content(colors, nslots, LTM_obj)
    s_mac = state_machine.state_machine(state_table)
    nsteps = 100
    goal = [[0, 0], [0, ncols - 1]]
    value_function = np.zeros((nrows, ncols))  # store Q values for statitics
    # track performance
    opt_array = []
    diff_array = []
    t1 = ""

    handhold = 400000
    anneal = 490000

    o_gate_correct = 0  # stats for output gate
    o_gate_total = 0  # stats for output gate
    o_gate_correct_shared = 0
    o_gate_correct_non = 0
    shared_total = 0
    non_total = 0
    o_gate_stat = []
    mean_o_gate = []
    accuracy = []
    acc_array = []
    shared = 0
    for episode in range(nepisodes):

        count = 0
        #t1 = str(count)
        t1 = "t1"
        row = random.randrange(
            2, nrows)  # get random row for state (in shared area)
        col = random.randrange(0, ncols)  # get random col for state
        cur_loc = [row, col]  # current location of agent
        color_signal = random.randrange(0, ncolors)  # get random color
        color = color_signal  # used for reward because color signal may change
        #r = reward(cur_loc,colors[color],state_size) if color!=nothing else 0

        if episode == anneal:
            agent.epsilon, i_gate.epsilon, o_gate.epsilon = 0, 0, 0
        #optimal_steps = optimal_path_length(cur_loc,color_signal) # get optimal steps to goal
        optimal_steps = optimal_path_length_bottle(
            cur_loc, color_signal)  # get optimal steps to goal
        state = (state_table[row][col])["state"]  # current state

        role_i = 0 if color_signal != nothing else 1
        slot = 0
        igate_restrict = [0, 1]  # restricted set
        ogate_restrict = [0, 1]  # restricted set
        '''
        if episode < handhold: # hand holding
            ogate_restrict = [0] if row >= 2 else [1]
        else:
            ogate_restrict = [0,1]
        '''

        WM.flush_all_wm_maint()
        #print("cue:",colors[color_signal])
        #print("role_i:",roles[role_i])
        i_gate_input = roles[role_i] + '*' + state + '*' + t1
        LTM_obj.encode(i_gate_input)
        i_gate_state, i_value, i_max, i_input, i_values = i_gate.action(
            i_gate_input, gate_actions, igate_restrict)
        WM.wm_in_flow(i_gate_state, slot,
                      color_signal)  # control flow of wm contents
        #print("i_gate:",i_gate_state)
        #print("wm_in:",WM.get_one_wm_maint(slot))
        role_o = 1 if WM.wm_maint_slot_is_empty(slot) else 0
        #print("role_o:",roles[role_o])
        o_gate_input = roles[role_o] + '*' + state + '*' + t1
        LTM_obj.encode(o_gate_input)
        o_gate_state, o_value, o_max, o_input, o_values = o_gate.action(
            o_gate_input, gate_actions, ogate_restrict)
        WM.wm_out_flow(o_gate_state, slot)  # wm out contents for given slots
        wm_out = WM.get_one_wm_output(slot)
        #print("o_gate:",o_gate_state)
        #print("wm_out:",WM.get_one_wm_output(slot))
        #print("\n\n")

        if o_gate_state == Closed:
            agent_input = gateclosed + '*' + colors[
                color_signal] + '*' + state + '*' + wm_out + '*' + t1
            LTM_obj.encode(agent_input)
        else:
            agent_input = colors[
                color_signal] + '*' + state + '*' + wm_out + '*' + t1
            LTM_obj.encode(agent_input)

        #agent_input = colors[color_signal]+'*'+state+'*'+wm_out+'*'+t1
        action_restrict = (state_table[row][col]
                           )["action_restrict"]  # get restricted action set
        action, a_value, a_max, a_input, a_values = agent.action(
            agent_input, agent_actions, action_restrict)  # agent action
        #print("agent_value: ",a_value)
        #print("input_value: ",i_value)
        #print("output_value:",o_value)

        o_gate_correct_shared += 1 if o_gate_state == Closed else 0
        shared_total += 1

        i_gate.set_eligibility_zero()
        o_gate.set_eligibility_zero()
        agent.set_eligibility_zero()  # set eligibility trace to zero
        WM.flush_all_wm_output()
        count += 1
        for step in range(nsteps):

            #t1 = str(count)
            t1 = ''
            r = reward(cur_loc, colors[color],
                       state_size) if color != nothing else 0
            #r = punishment(cur_loc,colors[color],state_size) if color!=nothing else -1
            #print(state)
            if color != nothing and cur_loc == goal[color]:
                i_gate.eligibility_trace_update(i_input)
                o_gate.eligibility_trace_update(o_input)
                agent.eligibility_trace_update(a_input)
                i_gate.td_update_goal(r, i_value)
                o_gate.td_update_goal(r, o_value)
                agent.td_update_goal(r, a_value)
                #print('goal!!!')
                break

            p_a_input = agent_input
            p_i_input = i_gate_input
            p_o_input = o_gate_input
            p_a_action = action
            p_i_action = i_gate_state
            p_o_action = o_gate_state
            p_action_restrict = action_restrict
            p_igate_restrict = igate_restrict
            p_ogate_restrict = ogate_restrict

            p_loc = cur_loc
            p_i_value = i_value  # Q val for input gate
            p_i_max = i_max
            p_o_value = o_value  # Q val for output gate
            p_o_max = o_max
            p_a_value = a_value  # Q val for agent
            p_a_max = a_max

            # update eligibility traces
            i_gate.eligibility_trace_update(i_input)
            o_gate.eligibility_trace_update(o_input)
            agent.eligibility_trace_update(a_input)
            # move agent
            state, cur_loc = s_mac.move(p_loc, action)
            row, col = cur_loc[0], cur_loc[1]
            color_signal = 2  # turn off cue

            role_i = 0 if color_signal != nothing else 1
            igate_restrict = [0]  # restricted set
            ogate_restrict = [0, 1]  # restricted set
            #ogate_restrict = [1] if episode < handhold else [0,1] # hand holding
            '''
            if episode < handhold: # hand holding
                ogate_restrict = [0] if row >= 2 else [1]
            else:
                ogate_restrict = [0,1]
            '''
            #
            #print("cue:",colors[color_signal])
            #print("role_i:",roles[role_i])
            i_gate_input = roles[role_i] + '*' + state + '*' + t1
            LTM_obj.encode(i_gate_input)
            i_gate_state, i_value, i_max, i_input, i_values = i_gate.action(
                i_gate_input, gate_actions, igate_restrict)
            #print("i_gate:",i_gate_state)
            WM.wm_in_flow(i_gate_state, slot,
                          color_signal)  # control flow of wm contents
            #print("wm_in:",WM.get_one_wm_maint(slot))
            role_o = 1 if WM.wm_maint_slot_is_empty(slot) else 0
            o_gate_input = roles[role_o] + '*' + state + '*' + t1
            LTM_obj.encode(o_gate_input)
            o_gate_state, o_value, o_max, o_input, o_values = o_gate.action(
                o_gate_input, gate_actions, ogate_restrict)
            #print("role_o:",roles[role_o])
            #print("o_gate:",o_gate_state)
            WM.wm_out_flow(o_gate_state,
                           slot)  # wm out contents for given slots
            wm_out = WM.get_one_wm_output(slot)
            #print("wm_out:",wm_out)
            #print("\n\n")

            if o_gate_state == Closed:
                agent_input = gateclosed + '*' + colors[
                    color_signal] + '*' + state + '*' + wm_out + '*' + t1
                LTM_obj.encode(agent_input)
            else:
                agent_input = colors[
                    color_signal] + '*' + state + '*' + wm_out + '*' + t1
                LTM_obj.encode(agent_input)

            #agent_input = colors[color_signal]+'*'+state+'*'+wm_out
            action_restrict = (state_table[row][col])[
                "action_restrict"]  # get restricted action set
            action, a_value, a_max, a_input, a_values = agent.action(
                agent_input, agent_actions, action_restrict)  # agent action
            # ----------------------------------
            # Bias gates to stay in a state for long periods of time

            a_beta = 0
            '''
            if a_value > 0:
                if p_a_action == action:
                    a_beta = 0
                else:
                    a_beta = 1
            else:
                if p_a_action == action:
                    a_beta = 1
                else:
                    a_beta = 0
            '''
            o_beta = 0
            '''
            if a_value > 0:
                if p_o_action == o_gate_state:
                    o_beta = 0
                else:
                    o_beta = 1
            else:
                if p_o_action == o_gate_state:
                    o_beta = 1
                else:
                    o_beta = 0
            '''
            i_beta = 0
            '''
            if a_value > 0:
                if p_i_action == i_gate_state:
                    i_beta = 0
                else:
                    i_beta = 1
            else:
                if p_i_action == i_gate_state:
                    i_beta = 1
                else:
                    i_beta = 0
            '''
            # -------------------------------------
            if row >= 2:
                o_gate_correct_shared += 1 if o_gate_state == Closed else 0
                shared_total += 1
            else:
                o_gate_correct_non += 1 if o_gate_state == Open else 0
                non_total += 1
            '''
            agent.td_update(r,a_value,p_a_value)
            o_gate.td_update(r,a_value,p_o_value)
            i_gate.td_update(r,a_value,p_i_value)
            '''

            agent.td_update_transfer(r, a_value, p_a_value, a_beta)
            o_gate.td_update_transfer(r, a_value, p_o_value, o_beta)
            i_gate.td_update_transfer(r, a_value, p_i_value, i_beta)

            WM.flush_all_wm_output()
            count += 1
        if color != nothing:
            step_diff = abs(step - optimal_steps)
            #print(step_diff)
            opt_array.append(step_diff)
            if step_diff == 0:
                acc_array.append(1)
            else:
                acc_array.append(0)

        if (episode + 1) % stat_window == 0:
            # print performance
            mean_diff = sum(opt_array) / len(opt_array)
            diff_array.append(mean_diff)
            mean_acc = sum(acc_array) / len(acc_array)
            accuracy.append(mean_acc)
            #plt.plot(diff_array)
            #plt.show()
            #plt.plot(accuracy)
            #plt.show()
            opt_array = []
            acc_array = []
            #print("agent_value:",a_value)
            #print("input_value:",i_value)
            #print("output_value:",o_value)
            #print("mean diff:",mean_diff)
            #print("mean accuracy:",mean_acc)

            ###################
            if shared_total > 0:
                shared = o_gate_correct_shared / shared_total
                #print("mean o_gate_shared:",o_gate_correct_shared/shared_total)

            if non_total > 0:
                pass
                #print("mean o_gate_non:",o_gate_correct_non/non_total)
            o_gate_correct_shared = 0
            o_gate_correct_non = 0
            shared_total = 0
            non_total = 0
            o_gate_total = 0
    print(shared)
    #WA,fname1 = agent.get_weights(), "IO_Model_agent_weights.txt" # agent weights
    #WO,fname2 = o_gate.get_weights(), "IO_Model_output_weights.txt"# output gate weights
    #WI,fname3 = i_gate.get_weights(), "IO_Model_input_weights.txt"# input gate weights
    #store_weights(fname1,WA) # store agent weights
    #store_weights(fname2,WO) # store output gate weights
    #store_weights(fname3,WI) # store input gate weights
    return LTM_obj, agent, i_gate, o_gate, WM