Beispiel #1
0
def flow_control_test(nstates, nepisodes):
    n = 8
    agent_actions = 2
    gate_actions = 2
    ncolors = 2
    nslots = 1
    nroles = 1

    states = hrr.hrrs(n, nstates)
    colors = hrr.hrrs(n, ncolors)  # external cue
    colors = np.row_stack((colors, identity_vector(n)))
    roles = hrr.hrrs(n, nroles)
    roles = np.row_stack((roles, identity_vector(n)))

    # preconvolve states
    role_state = hrr.oconvolve(roles, states)
    role_state = np.reshape(role_state, (nroles + 1, nstates, n))
    cue_state = hrr.oconvolve(colors, states)
    cue_state = np.reshape(cue_state, (ncolors + 1, nstates, n))
    #print(role_state.shape)
    #print(cue_state.shape)
    # create objects
    agent = RL_Obj(n, agent_actions)
    i_gate = RL_Obj(n, gate_actions)
    o_gate = RL_Obj(n, gate_actions)
    WM = wm_content(n, ncolors, nslots)

    for episode in range(nepisodes):
        state = random.randrange(0, nstates)
        color_signal = random.randrange(0, ncolors)

        role_i = 0  # role is available
        slot = 0  # slot number in use
        i_gate_input = role_state[role_i, state, :]  # input for in_gate
        i_gate_state, i_value, i_input = i_gate.action(i_gate_input)
        print('color_cue:', color_signal)
        print('i_gate_state:', i_gate_state)
        WM.wm_in_flow(i_gate_state, slot,
                      color_signal)  # control flow of wm maint contents
        print('wm_maint:', WM.get_wm_maint_statistics()[slot])
        print('wm_maint_contents:', WM.get_one_wm_maint(slot))
        role_o = 1 if WM.wm_maint_slot_is_empty(slot) else 0
        print('role_o:', role_o)
        o_gate_input = role_state[role_o, state, :]  # input for out_gate
        o_gate_state, o_value, o_input = i_gate.action(o_gate_input)
        print('o_gate_state:', o_gate_state)
        WM.wm_out_flow(o_gate_state, slot)  # control flow of wm out contents
        print('wm_output:', WM.get_wm_output_statistics()[slot])
        wm_out = WM.get_one_wm_output(slot)  # wm out contents for given slot
        print(wm_out)
        agent_input = hrr.convolve(cue_state[color_signal, state], wm_out)
        action, a_value, a_input = agent.action(agent_input)
        print('action:', action)
        print()
        print()
Beispiel #2
0
 def __init__(self, n, nitems, n_wm_slots):
     self.n = n
     self.nitems = nitems
     self.n_wm_slots = n_wm_slots
     self.wm_maint = [identity_vector(n)
                      ] * n_wm_slots  # init wm maint slots
     self.wm_output = [identity_vector(n)
                       ] * n_wm_slots  # init wm output slots
     self.wm_maint_statistics = [
         -1
     ] * n_wm_slots  # view of what's in wm_maint
     self.wm_output_statistics = [
         -1
     ] * n_wm_slots  # view of what's in wm_output
     self.wm_items = hrr.hrrs(
         n, nitems)  # encode available internal representations
     self.wm_items = np.row_stack((self.wm_items, identity_vector(n)))
Beispiel #3
0
 def __init__(self,
              n,
              nactions,
              bias=1,
              lrate=0.1,
              gamma=0.9,
              td_lambda=0.9,
              epsilon=0.01):
     self.n = n  # vector length
     self.nactions = nactions
     self.W = hrr.hrr(n)
     self.actions = hrr.hrrs(n, nactions)
     self.eligibility = hrr.hrr(n)
     self.bias = bias
     self.lrate = lrate
     self.gamma = gamma
     self.td_lambda = td_lambda
     self.epsilon = epsilon
Beispiel #4
0
def maze_task(nstates, nepisodes, stat_window):
    n = 128
    nactions = 2  # left right
    ncolors = 2  # red/blue
    #ngates = 1
    ngate_states = 2  # open/close
    nmaint_states = 2  # empty/full
    nroles = 1  # number of roles

    #goal for red is at 0, green at middle
    goal = [0, nstates // 2, None]
    reward = np.zeros((ncolors + 1, nstates))
    #reward = np.ones((ncolors+1,nstates)) # punishment based
    #reward = reward*-10
    # reward matrix for each context

    for x in range(ncolors):
        reward[x, goal[x]] = 1

    # punishment based reward
    '''
    for x in range(ncolors):
        reward[x,goal[x]] = 0
    '''
    # basic actions are left and right
    states = hrr.hrrs(n, nstates)
    actions = hrr.hrrs(n, nactions)

    # identity vector
    hrr_i = np.zeros(n)
    hrr_i[0] = 1

    # external color
    external = hrr.hrrs(n, ncolors)
    external = np.row_stack((external, hrr_i))

    # Internal Representations
    wm_slots = hrr.hrrs(n, ncolors)
    wm_slots = np.row_stack((wm_slots, hrr_i))

    # working memory contents
    wm_maint = [hrr_i]
    wm_output = [hrr_i]

    # wm maint state (empty,full)
    #wm_maint_state = hrr.hrrs(n,nmaint_states)

    # Gate
    #gates = hrr.hrrs(n,ngates)
    roles = hrr.hrrs(n, nroles)
    roles = np.row_stack((roles, hrr_i))

    # Gate state (open/closed)
    gate_states = hrr.hrrs(n, ngate_states)

    # Weight vectors
    IGate_W = hrr.hrr(n)  # Input gate
    OGate_W = hrr.hrr(n)  # Output gate
    AGate_W = hrr.hrr(n)  # Agent

    IBias, OBias, ABias = 1, 1, 1  # Bias for gates and agent
    eligibility = np.zeros(n)
    epsilon = 0.1
    nsteps = 100

    i_gate = input_gate(IGate_W, gate_states, eligibility)
    o_gate = output_gate(OGate_W, gate_states, eligibility)
    myagent = agent(AGate_W, actions, eligibility)
    wm_cont = wm_content(wm_maint, wm_output, wm_slots)
    counter = 0
    opt_array = []
    diff_sum = 0
    mycount = 0
    for episode in range(nepisodes):
        print('episode:', counter)
        counter += 1
        mycount += 1  # used for mean diff
        state = random.randrange(0, nstates)
        color_signal = random.randrange(0, ncolors)

        optimal_steps = optimal_path(state, goal[color_signal], nstates)
        #print(optimal_steps)
        # set external cue
        cue = color_signal
        color = cue
        #################
        close = False
        open = False
        wm, wm_num = wm_cont.get_wm_maint()
        i_gate_state, i_value, i_state = i_gate.gate_action(
            roles[0], states[state], close)
        wm_cont.update_wm_maint(i_gate_state, cue)
        wm_state = wm_cont.get_wm_maint_state()
        o_gate_state, o_value, o_state = o_gate.gate_action(
            roles[wm_state], states[state], open)
        wm_cont.update_wm_output(o_gate_state)
        wm_o, wm_out_num = wm_cont.get_wm_output()
        action, a_value, a_state = myagent.agent_action(
            external[cue], states[state], wm_o)
        #print(i_gate_state,o_gate_state)
        i_gate.set_eligibility_zero(n)
        o_gate.set_eligibility_zero(n)
        myagent.set_eligibility_zero(n)

        #testing purpose
        wm, wm_num = wm_cont.get_wm_maint()

        #print(wm_num,wm_out_num)
        # set output wm to identity vector
        wm_cont.set_wm_output(2)

        # decay epsilon
        if episode == 10000:
            i_gate.epsilon, o_gate.epsilon, myagent.epsilon = 0, 0, 0

        for step in range(nsteps):
            r = reward[color, state]

            if state == goal[color]:
                i_gate.eligibility_trace_update(i_state)
                o_gate.eligibility_trace_update(o_state)
                myagent.eligibility_trace_update(a_state)

                i_gate.td_update_goal(r, i_value)
                o_gate.td_update_goal(r, o_value)
                myagent.td_update_goal(r, a_value)
                #print('Made it to goal')
                break

            pstate = state  # maze state
            p_i_value = i_value  # Q val for input gate
            p_o_value = o_value  # Q val for output gate
            p_a_value = a_value  # Q val for agent

            # update eligibility traces
            i_gate.eligibility_trace_update(i_state)
            o_gate.eligibility_trace_update(o_state)
            myagent.eligibility_trace_update(a_state)

            # change state in maze by taking action
            state = ((state + np.array([-1, 1])) % nstates)[action]

            # turn off cue
            cue = 2

            close = True
            open = False
            wm, wm_num = wm_cont.get_wm_maint()
            i_gate_state, i_value, i_state = i_gate.gate_action(
                roles[1], states[state], close)
            wm_cont.update_wm_maint(i_gate_state, cue)
            wm_state = wm_cont.get_wm_maint_state()
            o_gate_state, o_value, o_state = o_gate.gate_action(
                roles[wm_state], states[state], open)
            wm_cont.update_wm_output(o_gate_state)
            wm_o, wm_out_num = wm_cont.get_wm_output()
            action, a_value, a_state = myagent.agent_action(
                external[cue], states[state], wm_o)

            #testing
            #print(wm_num,wm_out_num)
            # compute errors and update weights
            i_gate.td_update(r, i_value, p_i_value)
            o_gate.td_update(r, o_value, p_o_value)
            myagent.td_update(r, a_value, p_a_value)

            # set output wm to identity vector
            wm_cont.set_wm_output(2)
        #print('step:',step)
        # check for optimal steps being learned
        step_diff = abs(step - optimal_steps)
        diff_sum += step_diff
        if episode % stat_window == 0:
            mean_diff = diff_sum / mycount
            opt_array.append(mean_diff)
            mycount = 0
            diff_sum = 0
            # iGate
            '''
            one = hrr.convolve(states[:],hrr.convolve(roles[0],gate_states[0]))
            two = hrr.convolve(states[:],hrr.convolve(roles[0],gate_states[1]))
            three = hrr.convolve(states[:],hrr.convolve(roles[1],gate_states[0]))
            four = hrr.convolve(states[:],hrr.convolve(roles[1],gate_states[1]))
            V1 = list(map(lambda x: np.dot(x,i_gate.W)+IBias, one))
            V2 = list(map(lambda x: np.dot(x,i_gate.W)+IBias, two))
            V3 = list(map(lambda x: np.dot(x,i_gate.W)+IBias, three))
            V4 = list(map(lambda x: np.dot(x,i_gate.W)+IBias, four))
            '''
            # oGate
            '''
            one = hrr.convolve(states[:],hrr.convolve(roles[0],gate_states[0]))
            two = hrr.convolve(states[:],hrr.convolve(roles[0],gate_states[1]))
            three = hrr.convolve(states[:],hrr.convolve(roles[1],gate_states[0]))
            four = hrr.convolve(states[:],hrr.convolve(roles[1],gate_states[1]))
            V1 = list(map(lambda x: np.dot(x,o_gate.W)+OBias, one))
            V2 = list(map(lambda x: np.dot(x,o_gate.W)+OBias, two))
            V3 = list(map(lambda x: np.dot(x,o_gate.W)+OBias, three))
            V4 = list(map(lambda x: np.dot(x,o_gate.W)+OBias, four))
            '''
            # agent
            '''
            one = hrr.convolve(states[:],hrr.convolve(wm_slots[0],actions[0]))
            two = hrr.convolve(states[:],hrr.convolve(wm_slots[0],actions[1]))
            three = hrr.convolve(states[:],hrr.convolve(wm_slots[1],actions[0]))
            four = hrr.convolve(states[:],hrr.convolve(wm_slots[1],actions[1]))
            V1 = list(map(lambda x: np.dot(x,myagent.W)+ABias, one))
            V2 = list(map(lambda x: np.dot(x,myagent.W)+ABias, two))
            V3 = list(map(lambda x: np.dot(x,myagent.W)+ABias, three))
            V4 = list(map(lambda x: np.dot(x,myagent.W)+ABias, four))
            #V5 = list(map(lambda x: np.dot(x,W)+bias, s_s_a_wm[0,2,:,0,:]))
            #V6 = list(map(lambda x: np.dot(x,W)+bias, s_s_a_wm[0,2,:,1,:]))
            #V7 = list(map(lambda x: np.dot(x,W)+bias, s_s_a_wm[1,2,:,0,:]))
            #V8 = list(map(lambda x: np.dot(x,W)+bias, s_s_a_wm[1,2,:,1,:]))
            '''
            #plotly.offline.iplot
            '''
            plotly.offline.plot({
            "data": [Scatter(x=[x for x in range(len(V1))] , y=V1),
                    Scatter(x=[x for x in range(len(V2))] , y=V2),
                    Scatter(x=[x for x in range(len(V2))] , y=V3),
                    Scatter(x=[x for x in range(len(V2))] , y=V4)],
                    #Scatter(x=[x for x in range(len(V2))] , y=V5),
                    #Scatter(x=[x for x in range(len(V2))] , y=V6),
                    #Scatter(x=[x for x in range(len(V2))] , y=V7),
                    #Scatter(x=[x for x in range(len(V2))] , y=V8)],
            "layout": Layout(title="",xaxis=dict(title="state"),yaxis=dict(title="V(s)"))
            })
            '''
            # optimal steps
            plotly.offline.plot({
                "data":
                [Scatter(x=[x for x in range(len(opt_array))], y=opt_array)]
            })
    plt.plot(opt_array)
    plt.show()
Beispiel #5
0
def testing_maze(nstates, nepisodes, stat_window):
    n = 128
    nactions = 2
    goal = 0
    reward = np.zeros(nstates)
    reward[goal] = 1
    states = hrr.hrrs(n, nstates)

    agent = RL_Obj(n, nactions)
    nsteps = 100
    opt_array = []
    diff_sum = 0
    mycount = 0
    for episode in range(nepisodes):
        mycount += 1
        print('episode:', episode)
        state = random.randrange(0, nstates)
        #print('state:',state)
        action, value, my_input = agent.action(states[state])
        agent.set_eligibility_zero()
        optimal_steps = optimal_path(state, goal, nstates)
        #print('optimal steps',optimal_steps)
        for step in range(nsteps):
            r = reward[state]
            if state == goal:
                agent.eligibility_trace_update(my_input)
                agent.td_update_goal(r, value)
                break

            pstate = state
            pvalue = value
            #paction = action
            agent.eligibility_trace_update(my_input)
            state = ((state + np.array([-1, 1])) % nstates)[action]
            action, value, my_input = agent.action(states[state])
            agent.td_update(r, value, pvalue)

        step_diff = abs(step - optimal_steps)
        print('step_dif:', step_diff)
        diff_sum += step_diff
        if episode % stat_window == 0:
            mean_diff = diff_sum / mycount
            opt_array.append(mean_diff)
            mycount = 0
            diff_sum = 0

            V1 = list(
                map(lambda x: np.dot(x, agent.W) + agent.bias,
                    hrr.convolve(states, agent.actions[0])))
            V2 = list(
                map(lambda x: np.dot(x, agent.W) + agent.bias,
                    hrr.convolve(states, agent.actions[1])))

            plotly.offline.plot({
                "data": [
                    Scatter(x=[x for x in range(len(V1))], y=V1, name='left'),
                    Scatter(x=[x for x in range(len(V2))], y=V2, name='right')
                ],
                "layout":
                Layout(title="",
                       xaxis=dict(title="state"),
                       yaxis=dict(title="Q(s,a)"))
            })

    plt.plot(opt_array)
    plt.show()
Beispiel #6
0
def color_maze_task(nstates, nepisodes, stat_window):
    n = 128
    agent_actions = 2
    gate_actions = 2
    ncolors = 2
    nslots = 1
    nroles = 1

    # goals and rewards
    goal = [0, nstates // 2, None]

    reward = np.zeros((ncolors + 1, nstates))
    for x in range(ncolors):
        reward[x, goal[x]] = 1

    #####
    # punishment based reward
    '''
    reward = np.ones((ncolors+1,nstates))
    reward *= -1
    for x in range(ncolors):
        reward[x,goal[x]] = 0
    '''

    states = hrr.hrrs(n, nstates)
    colors = hrr.hrrs(n, ncolors)  # external cue
    colors = np.row_stack((colors, identity_vector(n)))
    roles = hrr.hrrs(n, nroles)
    roles = np.row_stack((roles, identity_vector(n)))

    # preconvolve states
    role_state = hrr.oconvolve(roles, states)
    role_state = np.reshape(role_state, (nroles + 1, nstates, n))
    cue_state = hrr.oconvolve(colors, states)
    cue_state = np.reshape(cue_state, (ncolors + 1, nstates, n))

    agent = RL_Obj(n, agent_actions)
    i_gate = Gate(n, gate_actions)
    o_gate = Gate(n, gate_actions)
    WM = wm_content(n, ncolors, nslots)

    nsteps = 100
    opt_array = []
    diff_sum = 0
    mycount = 0
    for episode in range(nepisodes):
        print('episode:', episode)
        mycount += 1
        state = random.randrange(0, nstates)
        color_signal = random.randrange(0, ncolors)
        color = color_signal
        optimal_steps = optimal_path(state, goal[color_signal],
                                     nstates)  # tracks number of optimal steps
        role_i = 0  # role is available
        slot = 0  # slot number in use
        forced_igate_state = None
        forced_ogate_state = None
        WM.flush_all_wm_maint()
        i_gate_input = role_state[role_i, state, :]  # input for in_gate
        i_gate_state, i_value, i_input = i_gate.action(i_gate_input,
                                                       forced_igate_state)

        WM.wm_in_flow(i_gate_state, slot,
                      color_signal)  # control flow of wm maint contents

        role_o = 1 if WM.wm_maint_slot_is_empty(slot) else 0
        o_gate_input = role_state[role_o, state, :]  # input for out_gate
        o_gate_state, o_value, o_input = o_gate.action(o_gate_input,
                                                       forced_ogate_state)

        WM.wm_out_flow(o_gate_state, slot)  # control flow of wm out contents

        wm_out = WM.get_one_wm_output(slot)  # wm out contents for given slot
        agent_input = hrr.convolve(cue_state[color_signal, state], wm_out)
        action, a_value, a_input = agent.action(agent_input)

        i_gate.set_eligibility_zero()
        o_gate.set_eligibility_zero()
        agent.set_eligibility_zero()

        # clear wm output
        WM.flush_all_wm_output()

        for step in range(nsteps):
            r = reward[color, state]

            if state == goal[color]:
                i_gate.eligibility_trace_update(i_input)
                o_gate.eligibility_trace_update(o_input)
                agent.eligibility_trace_update(a_input)

                i_gate.td_update_goal(r, i_value)
                o_gate.td_update_goal(r, o_value)
                agent.td_update_goal(r, a_value)
                break

            pstate = state  # maze state
            p_i_value = i_value  # Q val for input gate
            p_o_value = o_value  # Q val for output gate
            p_a_value = a_value  # Q val for agent

            # update eligibility traces
            i_gate.eligibility_trace_update(i_input)
            o_gate.eligibility_trace_update(o_input)
            agent.eligibility_trace_update(a_input)

            # change state in maze by taking action
            state = ((state + np.array([-1, 1])) % nstates)[action]

            # turn off cue
            color_signal = 2
            role_i = 1  # role is unavailable
            forced_igate_state = 'Closed'
            forced_ogate_state = 'Open'

            i_gate_input = role_state[role_i, state, :]  # input for in_gate
            i_gate_state, i_value, i_input = i_gate.action(
                i_gate_input, forced_igate_state)
            #
            WM.wm_in_flow(i_gate_state, slot,
                          color_signal)  # control flow of wm maint contents

            role_o = 1 if WM.wm_maint_slot_is_empty(
                slot) else 0  # checks if role is available in wm_maint
            o_gate_input = role_state[role_o, state, :]  # input for out_gate
            o_gate_state, o_value, o_input = o_gate.action(
                o_gate_input, forced_ogate_state)

            WM.wm_out_flow(o_gate_state,
                           slot)  # control flow of wm out contents

            wm_out = WM.get_one_wm_output(
                slot)  # wm out contents for given slot
            agent_input = hrr.convolve(cue_state[color_signal, state], wm_out)
            action, a_value, a_input = agent.action(agent_input)

            # td update
            i_gate.td_update(r, i_value, p_i_value)
            o_gate.td_update(r, o_value, p_o_value)
            agent.td_update(r, a_value, p_a_value)

            # clear wm output
            WM.flush_all_wm_output()

        step_diff = abs(step - optimal_steps)
        diff_sum += step_diff
        if episode % stat_window == 0:
            mean_diff = diff_sum / mycount
            opt_array.append(mean_diff)
            mycount = 0
            diff_sum = 0
            # optimal steps
            plotly.offline.plot({
                "data":
                [Scatter(x=[x for x in range(len(opt_array))], y=opt_array)]
            })