Exemple #1
0
 def score(self, expert_evaluation, expert_trajectory: Trajectory, streaming_enviroment, trace_list,
           video_csv_list, add_data=False):
     """
     Wrapper for the base scoring function
     :param expert_evaluation:
     :param expert_trajectory:
     :param streaming_enviroment:
     :param trace_list: Which traces did we evaluate
     :param video_csv_list: Which videos did we evaluate
     :param add_data:
     :return:
     """
     expert_trajectory.convert_list()
     behavioural_cloning_trace_generator_testing = TrajectoryVideoStreaming(self, streaming_enviroment,
                                                                            trace_list=trace_list,
                                                                            video_csv_list=video_csv_list)
     state_t = np.array([self.classifier.extract_features_observation(state_t) for state_t, _, _ in
                         tqdm(expert_trajectory.trajectory_list, desc='transforming')])
     state_t = pd.DataFrame(state_t, columns=self.classifier.extract_features_names())
     self.impute_NaN_inplace(state_t)
     expert_action = expert_trajectory.trajectory_action_t_arr
     approx_action = self.classifier.predict(state_t)
     expert_action = expert_action.ravel()
     behavioural_cloning_evaluation, behavioural_cloning_evaluation_trajectory = behavioural_cloning_trace_generator_testing.create_trajectories(
         random_action_probability=0, cores_avail=1)
     return self.score_comparison(expert_evaluation=expert_evaluation,
                                  expert_trajectory=expert_trajectory,
                                  expert_action=expert_action,
                                  approx_evaluation=behavioural_cloning_evaluation,
                                  approx_trajectory=behavioural_cloning_evaluation_trajectory,
                                  approx_action=approx_action, add_data=add_data)
Exemple #2
0
 def score(self, expert_evaluation, expert_trajectory: Trajectory, streaming_enviroment, trace_list,
           video_csv_list, add_data=False):
     expert_trajectory.convert_list()
     behavioural_cloning_trace_generator_testing = TrajectoryVideoStreaming(self, streaming_enviroment,
                                                                            trace_list=trace_list,
                                                                            video_csv_list=video_csv_list)
     state_t_testing = expert_trajectory.trajectory_state_t_arr
     state_t_future_testing = expert_trajectory.trajectory_state_t_future
     expert_action = expert_trajectory.trajectory_action_t_arr
     approx_action = self.policy_network.model.predict([state_t_testing, state_t_future_testing]).argmax(-1)
     expert_action = expert_action.ravel()
     behavioural_cloning_evaluation, behavioural_cloning_evaluation_trajectory = behavioural_cloning_trace_generator_testing.create_trajectories(
         random_action_probability=0, cores_avail=1)
     return self.score_comparison(expert_evaluation=expert_evaluation,
                                  expert_trajectory=expert_trajectory,
                                  expert_action=expert_action,
                                  approx_evaluation=behavioural_cloning_evaluation,
                                  approx_trajectory=behavioural_cloning_evaluation_trajectory,
                                  approx_action=approx_action, add_data=add_data)
    def score(self, expert_evaluation, expert_trajectory: Trajectory, streaming_enviroment, trace_list,
              video_csv_list, add_data=False):
        expert_trajectory.convert_list()
        behavioural_cloning_trace_generator_testing = TrajectoryVideoStreaming(self, streaming_enviroment,
                                                                               trace_list=trace_list,
                                                                               video_csv_list=video_csv_list)
        state_t = np.array([self.abr_policy_learner.extract_features_observation(state_t) for state_t, _, _ in
                            tqdm(expert_trajectory.trajectory_list, desc='transforming')])
        state_t = pd.DataFrame(state_t, columns=self.abr_policy_learner.extract_features_names())
        expert_action = expert_trajectory.trajectory_action_t_arr
        approx_action = self.abr_policy_learner.predict(state_t)

        behavioural_cloning_evaluation, behavioural_cloning_evaluation_trajectory = behavioural_cloning_trace_generator_testing.create_trajectories(
            random_action_probability=0, cores_avail=1)
        return self.score_comparison(expert_evaluation=expert_evaluation,
                                     expert_trajectory=expert_trajectory,
                                     expert_action=expert_action,
                                     approx_evaluation=behavioural_cloning_evaluation,
                                     approx_trajectory=behavioural_cloning_evaluation_trajectory,
                                     approx_action=approx_action, add_data=add_data)