Ejemplo n.º 1
0
    else:
        from urllib.request import urlretrieve
    urlretrieve(snapshot_url, snapshot_path)

load(resolver, snapshot_path)

epsilon.set_value(0)

##one-turn response function
prev_memory_state = T.vector("prev_mem_state_input", dtype=floatX)
observation = T.vector("observation_input", dtype=floatX)

prev_memory_tensor = prev_memory_state.reshape([1, -1])
observation_tensor = observation.reshape([1, -1])

new_state_tensor, Qvalues_tensor, action_tensor, _ = agent.get_agent_reaction(
    prev_memory_tensor, observation_tensor)

new_state, Qvalues, action = new_state_tensor[0], Qvalues_tensor[
    0], action_tensor[0]

get_agent_reaction = theano.function([prev_memory_state, observation],
                                     [new_state, Qvalues, action])


##auxilary transformer function
def response_to_observation(response, prev_action, is_alive=True):
    """creates a wikicat-format observation from"""

    observation = np.zeros(env.observation_size, dtype=floatX)

    observation[0] = response
Ejemplo n.º 2
0
##one-turn response function
prev_memory_state = T.vector("prev_mem_state_input",dtype=floatX)
observation = T.vector("observation_input",dtype=floatX)



prev_memory_tensor = prev_memory_state.reshape([1,-1])
prev_gru1 = prev_memory_tensor[:,:n_hid_1]
prev_gru2 = prev_memory_tensor[:,n_hid_1:]

observation_tensor = observation.reshape([1,-1])


action_channels,_, outputs, = agent.get_agent_reaction(
    {
        gru1:prev_gru1,
        gru2:prev_gru2
    },observation_tensor,)

action_tensor = action_channels[0]
Qvalues_tensor =  outputs[0]
new_state_tensor = outputs[1] #concatenated memory


new_state, Qvalues, action = new_state_tensor[0],Qvalues_tensor[0],action_tensor[0]


get_agent_reaction = theano.function([prev_memory_state,observation], [new_state,Qvalues,action])


Ejemplo n.º 3
0
epsilon.set_value(0)



##one-turn response function
prev_memory_state = T.vector("prev_mem_state_input",dtype=floatX)
observation = T.vector("observation_input",dtype=floatX)



prev_memory_tensor = prev_memory_state.reshape([1,-1])
observation_tensor = observation.reshape([1,-1])


new_state_tensor, Qvalues_tensor, action_tensor,_ = agent.get_agent_reaction(prev_memory_tensor,observation_tensor)


new_state, Qvalues, action = new_state_tensor[0],Qvalues_tensor[0],action_tensor[0]


get_agent_reaction = theano.function([prev_memory_state,observation], [new_state,Qvalues,action])



##auxilary transformer function
def response_to_observation(response,prev_action,is_alive=True):
    """creates a wikicat-format observation from"""
    
    observation = np.zeros(env.observation_size,dtype=floatX)
    
Ejemplo n.º 4
0
epsilon.set_value(0)

##one-turn response function
prev_memory_state = T.vector("prev_mem_state_input", dtype=floatX)
observation = T.vector("observation_input", dtype=floatX)

prev_memory_tensor = prev_memory_state.reshape([1, -1])
prev_gru1 = prev_memory_tensor[:, :n_hid_1]
prev_gru2 = prev_memory_tensor[:, n_hid_1:]

observation_tensor = observation.reshape([1, -1])

action_channels, _, outputs, = agent.get_agent_reaction(
    {
        gru1: prev_gru1,
        gru2: prev_gru2
    },
    observation_tensor,
)

action_tensor = action_channels[0]
Qvalues_tensor = outputs[0]
new_state_tensor = outputs[1]  #concatenated memory

new_state, Qvalues, action = new_state_tensor[0], Qvalues_tensor[
    0], action_tensor[0]

get_agent_reaction = theano.function([prev_memory_state, observation],
                                     [new_state, Qvalues, action])