def sample_action(self, current_state, explore=True):

        # generate sensor data
        array_laser = utils.remapping_laser_data(current_state.laserScan)
        sensor = Variable(
            torch.FloatTensor(np.reshape(array_laser, (1, self.sensor_dim))))

        # generate target data
        target_polar = utils.target_transform(current_state)

        target = Variable(
            torch.FloatTensor(np.reshape(target_polar, (1, self.target_dim))))

        # generate action
        action = self.actor_ca(sensor=sensor, target=target).cpu().data.numpy()

        if explore and random.uniform(0, 1) > 0.7 and self.train_type is not 3:
            action = action + self.noise.sample()
            #action[0][0] = random.uniform(-1,1)
            #action[0][1] = random.uniform(-1,1)

        # constrain the action
        action[0][0] = utils.constrain_actions(action[0][0], 1)
        action[0][1] = utils.constrain_actions(action[0][1], 1)

        return action[0][0], action[0][1]
Beispiel #2
0
    def navigation(self, current_state):
        # generate sensor data
        array_laser = utils.remapping_laser_data(current_state.laserScan)
        sensor = Variable(torch.FloatTensor(np.reshape(array_laser, (1, self.sensor_dim))))

        # generate target data
        target_polar = utils.target_transform(current_state)
        target = Variable(torch.FloatTensor(np.reshape(target_polar, (1, self.target_dim))))
        
        # generate action
        target_driven_action = self.differential_driver.run(x=current_state.desired_x, y=current_state.desired_y)
        collision_avoidance_action = self.actor_ca(sensor=sensor, target=target).cpu().data.numpy()
        predict_state = self.evaluation_net.predict_state(array_laser.reshape(1, -1))

        # Collision avoidance ratio
        ratio = min(float(torch.kthvalue(sensor,1)[0]) / (-3.5), 1)
#        print ratio
        final_action = []
        for i in range(2):
            final_action.append((1.0 - ratio) * target_driven_action[0][i] + ratio * collision_avoidance_action[0][i] )
#        print final_action

        # constrain the action
        final_action[0] = utils.constrain_actions(final_action[0], 1)
        final_action[1] = utils.constrain_actions(final_action[1], 1)
#        print final_action

        return final_action[0], final_action[1]
    def sample_action(self, current_state, laser_data, explore=True):

        # reshape is needed here, because single sample and batch sample should be both 2-dim
        state = Variable(
            torch.FloatTensor(
                np.transpose(
                    np.reshape(
                        [current_state.desired_x, current_state.desired_y],
                        (self.state_dim, 1)))))
        # generate sensor data
        array_laser_1 = utils.remapping_laser_data(laser_data[0])
        array_laser_2 = utils.remapping_laser_data(laser_data[1])
        array_laser_3 = utils.remapping_laser_data(current_state.laserScan)

        sensor_1 = Variable(
            torch.FloatTensor(np.reshape(array_laser_1, (1, self.sensor_dim))))
        sensor_2 = Variable(
            torch.FloatTensor(np.reshape(array_laser_2, (1, self.sensor_dim))))
        sensor_3 = Variable(
            torch.FloatTensor(np.reshape(array_laser_3, (1, self.sensor_dim))))
        sensor = torch.cat([sensor_1, sensor_2, sensor_3], 1)

        # generate action
        if self.train_type == 1:
            action = self.actor_td(state=state).cpu().data.numpy()
        elif self.train_type == 2:
            action = self.actor_ca(sensor=sensor).cpu().data.numpy()
        else:
            action = self.differential_driver.run(x=current_state.desired_x,
                                                  y=current_state.desired_y)

        if explore and random.uniform(0, 1) > 0.7 and self.train_type is not 3:
            action = action + self.noise.sample()
            #action[0][0] = random.uniform(-1,1)
            #action[0][1] = random.uniform(-1,1)

        # constrain the action
        action[0][0] = utils.constrain_actions(action[0][0], 1)
        action[0][1] = utils.constrain_actions(action[0][1], 1)

        return action[0][0], action[0][1]
Beispiel #4
0
                        crash_time += 1
            time.sleep(0.1)
        resp_ = pytorch_io_service(all_controls) # make sure reset operation has been done

        # check if one agent start a new loop
        for i_flag in range(AGENT_NUMBER):
            if terminate_flag[i_flag] == 1:
                terminate_flag[i_flag] = 0 # reset flag
                addition_experience += 1
                experimence_mapping[i_flag] = addition_experience

        # temporary save experience, each loop of each agent should be saved separately
        for i_agents in range(AGENT_NUMBER):
            episode_experience[experimence_mapping[i_agents]].append(utils.combine_states(all_current_states, all_next_states, all_controls, i_agents))
            if SAVE_LIDAR is True:
                show_lidar_agents[i_agents].append(utils.remapping_laser_data(all_current_states.group_state[i_agents].laserScan))


    #####################################
    ##### For experience generating #####
    #####################################
    for i_experiment in episode_experience.keys():
        step_number = len(episode_experience[i_experiment])

        # forget too short experimence
        if step_number < MIN_EXPERIMENCE_NUMBER:
            continue

        # original HER experience
        new_goals = utils.sample_new_targets(episode_experience[i_experiment], HER_K)
        for i_step in range(step_number):