コード例 #1
0
def flow_control_test(nstates, nepisodes):
    n = 8
    agent_actions = 2
    gate_actions = 2
    ncolors = 2
    nslots = 1
    nroles = 1

    states = hrr.hrrs(n, nstates)
    colors = hrr.hrrs(n, ncolors)  # external cue
    colors = np.row_stack((colors, identity_vector(n)))
    roles = hrr.hrrs(n, nroles)
    roles = np.row_stack((roles, identity_vector(n)))

    # preconvolve states
    role_state = hrr.oconvolve(roles, states)
    role_state = np.reshape(role_state, (nroles + 1, nstates, n))
    cue_state = hrr.oconvolve(colors, states)
    cue_state = np.reshape(cue_state, (ncolors + 1, nstates, n))
    #print(role_state.shape)
    #print(cue_state.shape)
    # create objects
    agent = RL_Obj(n, agent_actions)
    i_gate = RL_Obj(n, gate_actions)
    o_gate = RL_Obj(n, gate_actions)
    WM = wm_content(n, ncolors, nslots)

    for episode in range(nepisodes):
        state = random.randrange(0, nstates)
        color_signal = random.randrange(0, ncolors)

        role_i = 0  # role is available
        slot = 0  # slot number in use
        i_gate_input = role_state[role_i, state, :]  # input for in_gate
        i_gate_state, i_value, i_input = i_gate.action(i_gate_input)
        print('color_cue:', color_signal)
        print('i_gate_state:', i_gate_state)
        WM.wm_in_flow(i_gate_state, slot,
                      color_signal)  # control flow of wm maint contents
        print('wm_maint:', WM.get_wm_maint_statistics()[slot])
        print('wm_maint_contents:', WM.get_one_wm_maint(slot))
        role_o = 1 if WM.wm_maint_slot_is_empty(slot) else 0
        print('role_o:', role_o)
        o_gate_input = role_state[role_o, state, :]  # input for out_gate
        o_gate_state, o_value, o_input = i_gate.action(o_gate_input)
        print('o_gate_state:', o_gate_state)
        WM.wm_out_flow(o_gate_state, slot)  # control flow of wm out contents
        print('wm_output:', WM.get_wm_output_statistics()[slot])
        wm_out = WM.get_one_wm_output(slot)  # wm out contents for given slot
        print(wm_out)
        agent_input = hrr.convolve(cue_state[color_signal, state], wm_out)
        action, a_value, a_input = agent.action(agent_input)
        print('action:', action)
        print()
        print()
コード例 #2
0
 def action(self, state_space):
     mystate = hrr.convolve(state_space, self.actions)
     values = np.dot(mystate, self.W) + self.bias
     #print(values)
     sm = softmax(values)
     action = np.argmax(sm)
     if random.random() < self.epsilon:
         action = random.randrange(0, self.nactions)
     # force gate to be closed
     #if close:
     #    action = 0
     #x = hrr.convolve(mystate,self.actions[action]) # input
     x = mystate[action]  # input
     #return action, values[action], my_state2[action]
     return action, values[action], x
コード例 #3
0
def testing_maze(nstates, nepisodes, stat_window):
    n = 128
    nactions = 2
    goal = 0
    reward = np.zeros(nstates)
    reward[goal] = 1
    states = hrr.hrrs(n, nstates)

    agent = RL_Obj(n, nactions)
    nsteps = 100
    opt_array = []
    diff_sum = 0
    mycount = 0
    for episode in range(nepisodes):
        mycount += 1
        print('episode:', episode)
        state = random.randrange(0, nstates)
        #print('state:',state)
        action, value, my_input = agent.action(states[state])
        agent.set_eligibility_zero()
        optimal_steps = optimal_path(state, goal, nstates)
        #print('optimal steps',optimal_steps)
        for step in range(nsteps):
            r = reward[state]
            if state == goal:
                agent.eligibility_trace_update(my_input)
                agent.td_update_goal(r, value)
                break

            pstate = state
            pvalue = value
            #paction = action
            agent.eligibility_trace_update(my_input)
            state = ((state + np.array([-1, 1])) % nstates)[action]
            action, value, my_input = agent.action(states[state])
            agent.td_update(r, value, pvalue)

        step_diff = abs(step - optimal_steps)
        print('step_dif:', step_diff)
        diff_sum += step_diff
        if episode % stat_window == 0:
            mean_diff = diff_sum / mycount
            opt_array.append(mean_diff)
            mycount = 0
            diff_sum = 0

            V1 = list(
                map(lambda x: np.dot(x, agent.W) + agent.bias,
                    hrr.convolve(states, agent.actions[0])))
            V2 = list(
                map(lambda x: np.dot(x, agent.W) + agent.bias,
                    hrr.convolve(states, agent.actions[1])))

            plotly.offline.plot({
                "data": [
                    Scatter(x=[x for x in range(len(V1))], y=V1, name='left'),
                    Scatter(x=[x for x in range(len(V2))], y=V2, name='right')
                ],
                "layout":
                Layout(title="",
                       xaxis=dict(title="state"),
                       yaxis=dict(title="Q(s,a)"))
            })

    plt.plot(opt_array)
    plt.show()
コード例 #4
0
def color_maze_task(nstates, nepisodes, stat_window):
    n = 128
    agent_actions = 2
    gate_actions = 2
    ncolors = 2
    nslots = 1
    nroles = 1

    # goals and rewards
    goal = [0, nstates // 2, None]

    reward = np.zeros((ncolors + 1, nstates))
    for x in range(ncolors):
        reward[x, goal[x]] = 1

    #####
    # punishment based reward
    '''
    reward = np.ones((ncolors+1,nstates))
    reward *= -1
    for x in range(ncolors):
        reward[x,goal[x]] = 0
    '''

    states = hrr.hrrs(n, nstates)
    colors = hrr.hrrs(n, ncolors)  # external cue
    colors = np.row_stack((colors, identity_vector(n)))
    roles = hrr.hrrs(n, nroles)
    roles = np.row_stack((roles, identity_vector(n)))

    # preconvolve states
    role_state = hrr.oconvolve(roles, states)
    role_state = np.reshape(role_state, (nroles + 1, nstates, n))
    cue_state = hrr.oconvolve(colors, states)
    cue_state = np.reshape(cue_state, (ncolors + 1, nstates, n))

    agent = RL_Obj(n, agent_actions)
    i_gate = Gate(n, gate_actions)
    o_gate = Gate(n, gate_actions)
    WM = wm_content(n, ncolors, nslots)

    nsteps = 100
    opt_array = []
    diff_sum = 0
    mycount = 0
    for episode in range(nepisodes):
        print('episode:', episode)
        mycount += 1
        state = random.randrange(0, nstates)
        color_signal = random.randrange(0, ncolors)
        color = color_signal
        optimal_steps = optimal_path(state, goal[color_signal],
                                     nstates)  # tracks number of optimal steps
        role_i = 0  # role is available
        slot = 0  # slot number in use
        forced_igate_state = None
        forced_ogate_state = None
        WM.flush_all_wm_maint()
        i_gate_input = role_state[role_i, state, :]  # input for in_gate
        i_gate_state, i_value, i_input = i_gate.action(i_gate_input,
                                                       forced_igate_state)

        WM.wm_in_flow(i_gate_state, slot,
                      color_signal)  # control flow of wm maint contents

        role_o = 1 if WM.wm_maint_slot_is_empty(slot) else 0
        o_gate_input = role_state[role_o, state, :]  # input for out_gate
        o_gate_state, o_value, o_input = o_gate.action(o_gate_input,
                                                       forced_ogate_state)

        WM.wm_out_flow(o_gate_state, slot)  # control flow of wm out contents

        wm_out = WM.get_one_wm_output(slot)  # wm out contents for given slot
        agent_input = hrr.convolve(cue_state[color_signal, state], wm_out)
        action, a_value, a_input = agent.action(agent_input)

        i_gate.set_eligibility_zero()
        o_gate.set_eligibility_zero()
        agent.set_eligibility_zero()

        # clear wm output
        WM.flush_all_wm_output()

        for step in range(nsteps):
            r = reward[color, state]

            if state == goal[color]:
                i_gate.eligibility_trace_update(i_input)
                o_gate.eligibility_trace_update(o_input)
                agent.eligibility_trace_update(a_input)

                i_gate.td_update_goal(r, i_value)
                o_gate.td_update_goal(r, o_value)
                agent.td_update_goal(r, a_value)
                break

            pstate = state  # maze state
            p_i_value = i_value  # Q val for input gate
            p_o_value = o_value  # Q val for output gate
            p_a_value = a_value  # Q val for agent

            # update eligibility traces
            i_gate.eligibility_trace_update(i_input)
            o_gate.eligibility_trace_update(o_input)
            agent.eligibility_trace_update(a_input)

            # change state in maze by taking action
            state = ((state + np.array([-1, 1])) % nstates)[action]

            # turn off cue
            color_signal = 2
            role_i = 1  # role is unavailable
            forced_igate_state = 'Closed'
            forced_ogate_state = 'Open'

            i_gate_input = role_state[role_i, state, :]  # input for in_gate
            i_gate_state, i_value, i_input = i_gate.action(
                i_gate_input, forced_igate_state)
            #
            WM.wm_in_flow(i_gate_state, slot,
                          color_signal)  # control flow of wm maint contents

            role_o = 1 if WM.wm_maint_slot_is_empty(
                slot) else 0  # checks if role is available in wm_maint
            o_gate_input = role_state[role_o, state, :]  # input for out_gate
            o_gate_state, o_value, o_input = o_gate.action(
                o_gate_input, forced_ogate_state)

            WM.wm_out_flow(o_gate_state,
                           slot)  # control flow of wm out contents

            wm_out = WM.get_one_wm_output(
                slot)  # wm out contents for given slot
            agent_input = hrr.convolve(cue_state[color_signal, state], wm_out)
            action, a_value, a_input = agent.action(agent_input)

            # td update
            i_gate.td_update(r, i_value, p_i_value)
            o_gate.td_update(r, o_value, p_o_value)
            agent.td_update(r, a_value, p_a_value)

            # clear wm output
            WM.flush_all_wm_output()

        step_diff = abs(step - optimal_steps)
        diff_sum += step_diff
        if episode % stat_window == 0:
            mean_diff = diff_sum / mycount
            opt_array.append(mean_diff)
            mycount = 0
            diff_sum = 0
            # optimal steps
            plotly.offline.plot({
                "data":
                [Scatter(x=[x for x in range(len(opt_array))], y=opt_array)]
            })
コード例 #5
0
    print("  ")

    eligibility = np.zeros(lengthHRR)

    currentLocation = randrange(0, worldSize)

    workingMemory = 0

    currentTask = randint(1, 2)
    currentSignal = currentTask

    for timestep in range(1, 100):

        currentReward = reward[currentSignal, currentLocation]
        currentState = hrr.convolve(
            hrr.convolve(world[currentLocation, :], signals[currentTask, :]),
            memory[workingMemory, :])
        currentValue = np.dot(currentState, weights) + bias

        # store previous information
        previousLocation = currentLocation
        previousState = currentState
        previousWM = workingMemory
        previousTask = currentTask
        previousValue = currentValue
        eligibility = td_lambda * eligibility

        # -----------------------------------------Working Memory update process----------------------------------------------

        # Threshold determines possible candidates for working memory mechanism
        if stateDimension < candidateThreshold: