Beispiel #1
0
 def step(self, action_e):
     # TODO implement clock_speed: step only if self.clock.to_step()
     if self.done:
         return self.reset()
     action_e = util.flatten_nonan(action_e)
     env_info_dict = self.u_env.step(action_e)
     reward_e, state_e, done_e = self.env_space.aeb_space.init_data_s(
         ENV_DATA_NAMES, e=self.e)
     for (a, b), body in util.ndenumerate_nonan(self.body_e):
         env_info_a = self.get_env_info(env_info_dict, a)
         reward_e[(a, b)] = env_info_a.rewards[b]
         state_e[(a, b)] = env_info_a.states[b]
         done_e[(a, b)] = env_info_a.local_done[b]
     self.done = (util.nonan_all(done_e)
                  or self.clock.get('t') > self.max_timestep)
     return reward_e, state_e, done_e
Beispiel #2
0
def multi_head_act_with_boltzmann(flat_nonan_body_a, state_a, net, tau):
    flat_nonan_state_a = util.flatten_nonan(state_a)
    torch_states = []
    for state in flat_nonan_state_a:
        state = state.astype('float')
        torch_states.append(
            Variable(torch.from_numpy(state).float().unsqueeze_(dim=0)))
    outs = net.wrap_eval(torch_states)
    out_with_temp = [torch.div(x, t) for x, t in zip(outs, tau)]
    logger.debug(f'taus: {tau}, outs: {outs}, out_with_temp: {out_with_temp}')
    flat_nonan_action_a = []
    for body, output in zip(flat_nonan_body_a, out_with_temp):
        probs = F.softmax(output).data.numpy()[0]
        action = np.random.choice(list(range(body.action_dim)), p=probs)
        logger.debug(f'''
        body: {body.aeb}, output: {output},
        probs: {probs}, action: {action}''')
        flat_nonan_action_a.append(action)
    return flat_nonan_action_a
Beispiel #3
0
def multi_head_act_with_epsilon_greedy(flat_nonan_body_a, state_a, net, epsilon):
    '''Multi-headed body flat_nonan_action_a on a single-pass from net. Uses epsilon-greedy but in a batch manner.'''
    flat_nonan_state_a = util.flatten_nonan(state_a)
    flat_nonan_action_a = []
    torch_states = []
    for state in flat_nonan_state_a:
        state = state.astype('float')
        torch_states.append(
            Variable(torch.from_numpy(state).float().unsqueeze_(dim=0)))
    outs = net.wrap_eval(torch_states)
    for body, e, output in zip(flat_nonan_body_a, epsilon, outs):
        logger.debug(f'body: {body.aeb}, epsilon: {e}')
        if e > np.random.rand():
            logger.debug(f'Random action')
            action = np.random.randint(body.action_dim)
        else:
            logger.debug(f'Greedy action')
            action = torch.max(output, dim=1)[1][0]
        flat_nonan_action_a.append(action)
        logger.debug(f'epsilon: {e}, outputs: {output}, action: {action}')
    return flat_nonan_action_a
Beispiel #4
0
def multi_act_with_boltzmann(flat_nonan_body_a, state_a, net, tau):
    flat_nonan_state_a = util.flatten_nonan(state_a)
    cat_state_a = np.concatenate(flat_nonan_state_a).astype(float)
    torch_state = Variable(torch.from_numpy(cat_state_a).float())
    out = net.wrap_eval(torch_state)
    flat_nonan_action_a = []
    start_idx = 0
    logger.debug(f'taus: {tau}')
    for body, t in zip(flat_nonan_body_a, tau):
        end_idx = start_idx + body.action_dim
        out_with_temp = torch.div(out[start_idx: end_idx], t)
        logger.debug(f'''
        tau: {t}, out: {out},
        out select: {out[start_idx: end_idx]},
        out with temp: {out_with_temp}''')
        probs = F.softmax(out_with_temp).data.numpy()
        action = np.random.choice(list(range(body.action_dim)), p=probs)
        logger.debug(f'''
        body: {body.aeb}, net idx: {start_idx}-{end_idx}
        probs: {probs}, action: {action}''')
        flat_nonan_action_a.append(action)
        start_idx = end_idx
    return flat_nonan_action_a
Beispiel #5
0
def multi_act_with_epsilon_greedy(flat_nonan_body_a, state_a, net, epsilon):
    '''Multi-body flat_nonan_action_a on a single-pass from net. Uses epsilon-greedy but in a batch manner.'''
    flat_nonan_state_a = util.flatten_nonan(state_a)
    cat_state_a = np.concatenate(flat_nonan_state_a)
    flat_nonan_action_a = []
    start_idx = 0
    for body, e in zip(flat_nonan_body_a, epsilon):
        logger.debug(f'body: {body.aeb}, epsilon: {e}')
        end_idx = start_idx + body.action_dim
        if e > np.random.rand():
            logger.debug(f'Random action')
            action = np.random.randint(body.action_dim)
        else:
            logger.debug(f'Greedy action')
            cat_state_a = cat_state_a.astype('float')
            torch_state = Variable(torch.from_numpy(cat_state_a).float())
            out = net.wrap_eval(torch_state)
            action = int(torch.max(out[start_idx: end_idx], dim=0)[1][0])
        flat_nonan_action_a.append(action)
        start_idx = end_idx
        logger.debug(f'''
        body: {body.aeb}, net idx: {start_idx}-{end_idx}
        action: {action}''')
    return flat_nonan_action_a
Beispiel #6
0
 def post_body_init(self):
     '''Run init for components that need bodies to exist first, e.g. memory or architecture.'''
     self.flat_nonan_body_e = util.flatten_nonan(self.body_e)
     self.check_u_brain_to_agent()
     logger.info(util.self_desc(self))
Beispiel #7
0
def test_flatten_nonan(arr, res):
    arr = np.array(arr)
    res = np.array(res)
    assert np.array_equal(util.flatten_nonan(arr), res)
Beispiel #8
0
 def post_body_init(self):
     '''Run init for components that need bodies to exist first, e.g. memory or architecture.'''
     self.flat_nonan_body_a = util.flatten_nonan(self.body_a)
     self.algorithm.post_body_init()
     logger.info(util.self_desc(self))