def test_multiple_resets(self):
     time.sleep(rospy.get_param('/world/delay_evaluation') + 2)
     for _ in range(2):
         experience, observation = self._environment.reset(
         )  # includes take off of two agents by fsm
         self.assertTrue(experience.action is None)
         self.assertEqual(experience.done, TerminationType.NotDone)
         count = 0
         while experience.done == TerminationType.NotDone:
             fake_adversarial_action = Action(
                 actor_name='tracking_fleeing_agent',
                 value=np.asarray([
                     0,
                     0,
                     5,  # tracking velocity
                     0,
                     3,
                     0,  # fleeing velocity
                     0,  # tracking angular z
                     0
                 ]))  # fleeing angular z
             print(f'{count} {fake_adversarial_action}')
             experience, observation = self._environment.step(
                 action=fake_adversarial_action)
             count += 1
             self.assertTrue(experience.observation is not None)
             self.assertTrue(experience.action is not None)
             self.assertTrue('frame' in experience.info.keys())
             self.assertEqual(experience.info['/tracking/cmd_vel'].value[2],
                              5)
             self.assertEqual(experience.info['/fleeing/cmd_vel'].value[1],
                              3)
         print(f'{count} vs {MAXSTEPS}')
         self.assertEqual(count, MAXSTEPS)
 def get_action(self, inputs, train: bool = False) -> Action:
     inputs = self.process_inputs(inputs)  # added line 15/10/2020
     output = self.sample(inputs, train=train)
     output = output.clamp(min=self.action_min,
                           max=self.action_max)  # added line 23/10/2020
     return Action(actor_name=get_filename_without_extension(__file__),
                   value=output)
예제 #3
0
 def _set_field(self, msg: Union[String, Twist, RosReward,
                                 CommonCommonStateBatteryStateChanged,
                                 Odometry], args: Tuple) -> None:
     field_name, _ = args
     if field_name == 'fsm_state':
         self._fsm_state = FsmState[msg.data]
     elif field_name == 'action':
         self._action = Action(actor_name='applied_action',
                               value=process_twist(msg).value)
     elif field_name == 'reward':
         self._reward = msg.reward
         self._terminal_state = TerminationType[msg.termination]
     elif field_name == 'reference_pose':
         self._reference_pose = np.asarray(
             [msg.point.x, msg.point.y, msg.point.z])
         self._calculate_ref_in_img()
     elif field_name == 'battery':
         self._battery = msg.percent
     elif field_name == 'trajectory':
         global_pose = process_odometry(msg)
         self._trajectory.append(global_pose)
     elif field_name == 'camera_orientation':
         self._camera_orientation = float(msg.angular.y)
     else:
         raise NotImplementedError
    def get_action(self,
                   inputs,
                   train: bool = False,
                   agent_id: int = -1) -> Action:
        positions = np.squeeze(self.process_inputs(inputs))
        try:
            bb = calculate_bounding_box(inputs)
            inputs_bb = (bb[3][0], bb[3][1], bb[4], bb[5])
            inputs_ = (bb[3][0] - 200) / 40
            inputs = torch.Tensor([inputs_])
            self.previous_input = inputs_
        except TypeError:
            inputs_bb = (200, 200, 40, 40)
            inputs = torch.Tensor([self.previous_input])
        if agent_id == 0:  # tracking agent ==> tracking_linear_y
            output = self.sample(inputs,
                                 train=train).clamp(min=self.action_min,
                                                    max=self.action_max)
            actions = np.stack(
                [0, output.data.cpu().numpy().squeeze(), 0, 0, 0, 0, 0, 0])
        elif agent_id == 1:  # fleeing agent ==> fleeing_linear_y
            output = self.sample(inputs, train=train,
                                 adversarial=True).clamp(min=self.action_min,
                                                         max=self.action_max)
            actions = np.stack(
                [0, 0, 0, 0,
                 output.data.cpu().numpy().squeeze(), 0, 0, 0])
        else:
            output = self.sample(inputs, train=train,
                                 adversarial=False).clamp(min=self.action_min,
                                                          max=self.action_max)
            adversarial_output = self.sample(inputs,
                                             train=train,
                                             adversarial=True).clamp(
                                                 min=self.action_min,
                                                 max=self.action_max)

            # rand_run = get_rand_run_ros(self.waypoint, np.asarray(positions[3:6]).squeeze(), self._playfield_size)
            # run_action = np.squeeze(rand_run[1])
            # self.waypoint = np.squeeze(rand_run[0])
            # hunt_action = np.squeeze(get_slow_hunt_ros(np.asarray(positions), self._playfield_size))
            # run_action = np.squeeze(get_slow_run_ros(inputs_bb, self._playfield_size))

            actions = np.stack([
                0,
                output.data.cpu().numpy().squeeze().item(), 0, 0,
                adversarial_output.data.cpu().numpy().squeeze().item(), 0, 0, 0
            ],
                               axis=-1)

            # actions = np.stack([0, output.data.cpu().numpy().squeeze().item(), 0, *run_action, 0, 0], axis=-1)

        # actions = self.adjust_height(positions, actions)  Not necessary, controller keeps altitude fixed
        actions = clip_action_according_to_playfield_size_flipped(
            positions.detach().numpy().squeeze(), actions,
            self._playfield_size)
        return Action(
            actor_name=
            "tracking_fleeing_agent",  # assume output [1, 8] so no batch!
            value=actions)
예제 #5
0
def process_twist(msg, sensor_stats: dict = None) -> Action:
    return Action(actor_name=sensor_stats['name'] if sensor_stats is not None
                  and 'name' in sensor_stats.keys() else '',
                  value=np.asarray([
                      msg.linear.x, msg.linear.y, msg.linear.z, msg.angular.x,
                      msg.angular.y, msg.angular.z
                  ]))
 def get_action(self, inputs, train: bool = False) -> Action:
     inputs = self.process_inputs(inputs)
     output = self.sample(inputs, train=train)
     output = output.clamp(min=self.action_min, max=self.action_max)
     return Action(
         actor_name=get_filename_without_extension(
             __file__),  # assume output [1, 2] so no batch!
         value=np.stack([
             *output.data.cpu().numpy().squeeze(),
             *get_slow_run(inputs.squeeze())
         ],
                        axis=-1))
예제 #7
0
    def get_action(self,
                   inputs,
                   train: bool = False,
                   agent_id: int = -1) -> Action:
        positions = np.squeeze(self.process_inputs(inputs))
        try:
            bb = calculate_bounding_box(inputs, orientation=(0, 0, 1))
            inputs_bb = (bb[3][0], bb[3][1], bb[4], bb[5])
            inputs = ((bb[3][0] - 200) / 40, (bb[3][1] - 200) / 40)
            inputs = np.squeeze(self.process_inputs(inputs))
            self.previous_input = inputs
        except TypeError:
            inputs = self.previous_input
            inputs_bb = (200, 200, 40, 40)
        if agent_id == 0:  # tracking agent ==> tracking_linear_y
            output = self.sample(inputs,
                                 train=train).clamp(min=self.action_min,
                                                    max=self.action_max)
            actions = np.stack(
                [output.data.cpu().numpy().squeeze(), 0, 0, 0, 0, 0, 0, 0])
        elif agent_id == 1:  # fleeing agent ==> fleeing_linear_y
            output = self.sample(inputs, train=train,
                                 adversarial=True).clamp(min=self.action_min,
                                                         max=self.action_max)
            actions = np.stack(
                [0, 0, 0,
                 output.data.cpu().numpy().squeeze(), 0, 0, 0, 0])
        else:
            output = self.action_max * self.sample(
                inputs, train=train, adversarial=False)
            adversarial_output = self.action_max * self.sample(
                inputs, train=train, adversarial=True)

            run_action = np.squeeze(
                get_slow_run_ros(inputs_bb, self._playfield_size))

            #actions = np.stack([*output.data.cpu().numpy().squeeze(), 0,
            #                    *adversarial_output.data.cpu().numpy().squeeze(), 0,
            #                    0, 0], axis=-1)
            print(inputs_bb)
            actions = np.stack(
                [*output.data.cpu().numpy().squeeze(), 0, *run_action, 0, 0],
                axis=-1)
            actions = np.stack([0, 0, 0, 1, 0, 0, 0, 0], axis=-1)
        # actions = self.adjust_height(positions, actions)  Not necessary, controller keeps altitude fixed
        actions = clip_action_according_to_playfield_size_flipped(
            positions.detach().numpy().squeeze(), actions,
            self._playfield_size)
        return Action(
            actor_name=
            "tracking_fleeing_agent",  # assume output [1, 8] so no batch!
            value=actions)
예제 #8
0
 def _process_image(self, msg: Image, args: tuple) -> None:
     # cprint(f'image received {msg.data}', self._logger)
     sensor_topic, sensor_stats = args
     processed_image = process_image(msg, sensor_stats=sensor_stats)
     processed_image = torch.Tensor(processed_image).permute(2, 0, 1).unsqueeze(0)
     assert processed_image.size()[0] == 1 and processed_image.size()[1] == 3
     output = self._model.forward([processed_image])[0].detach().cpu().numpy()
     cprint(f'output predicted {output}', self._logger, msg_type=MessageType.debug)
     action = Action(
         actor_name='dnn_actor',
         value=output  # assuming control is first output
     )
     self._publisher.publish(adapt_action_to_twist(action))
 def _set_field(self, msg: Union[String, Twist, RosReward,
                                 CommonCommonStateBatteryStateChanged,
                                 Odometry], args: Tuple) -> None:
     field_name, _ = args
     if field_name == 'fsm_state':
         self._fsm_state = FsmState[msg.data]
     elif field_name == 'action':
         self._action = Action(actor_name='applied_action',
                               value=process_twist(msg).value)
     elif field_name == 'reference_pose':
         self._reference_pose = np.asarray(
             [msg.point.x, msg.point.y, msg.point.z])
     else:
         raise NotImplementedError
예제 #10
0
 def get_action(self,
                input_img: np.ndarray,
                train: bool = False,
                agent_id: int = -1) -> Action:
     try:
         img = self.process_inputs(resize(input_img, (self.sz, self.sz)))
         boxes = self.yolov3_tiny.predict_img(img, conf_thresh=0.7)[0]
         inputs = find_person(boxes, self.previous_input)
         inputs = np.squeeze(self.process_inputs(inputs))
         self.previous_input = inputs
     except TypeError:
         inputs = self.previous_input
     output = self.action_max * self.forward(inputs)
     actions = np.stack([*output.data.cpu().numpy().squeeze(), 0, 0, 0, 0],
                        axis=-1)
     return Action(actor_name="drone_tracking_agent", value=actions)
 def get_action(self,
                inputs,
                train: bool = False,
                agent_id: int = -1) -> Action:
     inputs = self.process_inputs(inputs)
     if agent_id == 0:
         output = self.sample(inputs,
                              train=train).clamp(min=self.action_min,
                                                 max=self.action_max)
         actions = np.stack([
             *output.data.cpu().numpy().squeeze(),
             *get_slow_run(inputs.squeeze())
         ],
                            axis=-1)
     elif agent_id == 1:
         output = self.sample(inputs, train=train,
                              adversarial=True).clamp(min=self.action_min,
                                                      max=self.action_max)
         actions = np.stack([
             *get_slow_hunt(inputs.squeeze()),
             *output.data.cpu().numpy().squeeze()
         ],
                            axis=-1)
     else:
         output = self.sample(inputs, train=train,
                              adversarial=False).clamp(min=self.action_min,
                                                       max=self.action_max)
         adversarial_output = self.sample(inputs,
                                          train=train,
                                          adversarial=True).clamp(
                                              min=self.action_min,
                                              max=self.action_max)
         actions = np.stack([
             *output.data.cpu().numpy().squeeze(),
             *adversarial_output.data.cpu().numpy().squeeze()
         ],
                            axis=-1)
     return Action(
         actor_name=get_filename_without_extension(
             __file__),  # assume output [1, 2] so no batch!
         value=actions)
 def _set_field(self, msg: Union[String, Twist, RosReward,
                                 CommonCommonStateBatteryStateChanged],
                args: Tuple) -> None:
     field_name, sensor_stats = args
     if field_name == 'fsm_state':
         self._fsm_state = FsmState[msg.data]
     elif field_name == 'action':
         self._action = Action(actor_name='applied_action',
                               value=process_twist(msg).value)
     elif field_name == 'reward':
         self._reward = msg.reward
         self._terminal_state = TerminationType[msg.termination]
     elif field_name == 'waypoint':
         self._waypoint = np.asarray(msg.data)
     elif field_name == 'battery':
         self._battery = msg.percent
     else:
         raise NotImplementedError
     cprint(f'set field {field_name}',
            self._logger,
            msg_type=MessageType.debug)
예제 #13
0
 def _set_field(self, msg, args: Tuple) -> None:
     field_name, sensor_stats = args
     if field_name == "fsm_state":
         self.fsm_state = FsmState[msg.data]
     elif field_name == "observation":
         msg_type = camelcase_to_snake_format(ros_message_to_type_str(msg))
         self.observation = eval(f"process_{msg_type}(msg, sensor_stats)")
     elif field_name == "action":
         self.action = Action(
             actor_name=self._config.ros_config.action_topic,
             value=process_twist(msg).value,
         )
     elif field_name == "reward":
         self.reward = msg.reward
         self.terminal_state = TerminationType[msg.termination]
     elif field_name.startswith("info:"):
         info_key = field_name[5:]
         msg_type = camelcase_to_snake_format(ros_message_to_type_str(msg))
         self.info[info_key] = eval(f"process_{msg_type}(msg, sensor_stats)")
     else:
         raise NotImplementedError(f"{field_name}: {msg}")
     cprint(f"set field {field_name}", self._logger, msg_type=MessageType.debug)
예제 #14
0
    def get_action(self,
                   inputs,
                   train: bool = False,
                   agent_id: int = -1) -> Action:
        inputs = self.process_inputs(inputs)
        if agent_id == 0:  # tracking agent ==> tracking_linear_y
            output = self.sample(inputs,
                                 train=train).clamp(min=self.action_min,
                                                    max=self.action_max)
            actions = np.stack(
                [0, *output.data.cpu().numpy().squeeze(), 0, 0, 0, 0, 0, 0])
        elif agent_id == 1:  # fleeing agent ==> fleeing_linear_y
            output = self.sample(inputs, train=train,
                                 adversarial=True).clamp(min=self.action_min,
                                                         max=self.action_max)
            actions = np.stack(
                [0, 0, 0, 0, *output.data.cpu().numpy().squeeze(), 0, 0, 0])
        else:
            output = self.sample(inputs, train=train,
                                 adversarial=False).clamp(min=self.action_min,
                                                          max=self.action_max)
            adversarial_output = self.sample(inputs,
                                             train=train,
                                             adversarial=True).clamp(
                                                 min=self.action_min,
                                                 max=self.action_max)
            # actions = np.stack([0, output.data.cpu().numpy().squeeze().item(), 0,
            #                     0, adversarial_output.data.cpu().numpy().squeeze().item(), 0,
            #                     0, 0], axis=-1)
            actions = np.stack([0, 1, 0, 0, 1, 0, 0, 0], axis=-1)

        # actions = self.adjust_height(inputs, actions)  Not necessary, hector quadrotor controller keeps altitude fixed
        actions = clip_action_according_to_playfield_size(
            inputs.detach().numpy().squeeze(), actions, self._playfield_size)
        return Action(
            actor_name=
            "tracking_fleeing_agent",  # assume output [1, 8] so no batch!
            value=actions)
 def get_action(self, inputs, train: bool = False) -> Action:
     output = self._policy_distribution(inputs, train).sample().item()
     return Action(
         actor_name=get_filename_without_extension(__file__),
         value=self.discrete_action_mapper.index_to_tensor(output))
예제 #16
0
            reference_pos = set_random_cone_location(WORLD)
        elif TARGET == "gate":
            reference_pos = set_random_gate_location()
        elif TARGET == "line":
            reference_pos = spawn_line(WORLD)
        else:
            raise NotImplementedError

        step_index = 0
        experience, observation = environment.reset()
        while experience.done == TerminationType.NotDone:
            tensor = torch.from_numpy(observation).permute(
                2, 0, 1).float().unsqueeze(0)
            output = model(tensor).detach().cpu().numpy().squeeze()
            experience, observation = environment.step(
                action=Action(value=np.asarray(
                    [output[0], output[1], output[2], 0, 0, output[3]]))
                if DS_TASK == "velocities" else None,
                waypoint=output if DS_TASK == "waypoints" else None,
            )
            loss = save(reference_pos,
                        experience,
                        json_data,
                        hdf5_data,
                        prediction=output)
            if loss is not None:
                losses.append(loss)
            step_index += 1

        successes.append(experience.done == TerminationType.Success)
        if TARGET == "line":
            remove_line()
예제 #17
0
 def get_action(self, inputs, train: bool = False) -> Action:
     inputs = self.process_inputs(inputs=inputs)
     output = self.forward(inputs)
     return Action(actor_name=get_filename_without_extension(__file__),
                   value=output.data)
 def get_random_action(self) -> Action:
     return Action(actor_name='random',
                   value=np.asarray(self._gym.action_space.sample()))
 def get_action(self, inputs, train: bool = False) -> Action:
     output = self._policy_distribution(inputs, train).sample()
     return Action(actor_name=get_filename_without_extension(__file__),
                   value=output.item())