Python Transitionの例、rl_coach.core_types.Transition Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_single_episode_buffer.py プロジェクト: wwxFromTju/coach

def test_store_and_get(buffer: SingleEpisodeBuffer):
    # store single non terminal transition
    transition = Transition(state={"observation": np.array([1, 2, 3])},
                            action=1,
                            reward=1,
                            game_over=False)
    buffer.store(transition)
    assert buffer.length() == 1
    assert buffer.num_complete_episodes() == 0
    assert buffer.num_transitions_in_complete_episodes() == 0
    assert buffer.num_transitions() == 1

    # get the single stored transition
    episode = buffer.get(0)
    assert episode.length() == 1
    assert episode.get_first_transition(
    ) is transition  # check addresses are the same
    assert episode.get_last_transition(
    ) is transition  # check addresses are the same

    # store single terminal transition
    transition = Transition(state={"observation": np.array([1, 2, 3])},
                            action=1,
                            reward=1,
                            game_over=True)
    buffer.store(transition)
    assert buffer.length() == 1
    assert buffer.num_complete_episodes() == 1
    assert buffer.num_transitions_in_complete_episodes() == 2

    # check that the episode is valid
    episode = buffer.get(0)
    assert episode.length() == 2
    assert episode.get_transition(0).total_return == 1 + 0.99
    assert episode.get_transition(1).total_return == 1
    assert buffer.mean_reward() == 1

    # only one episode in the replay buffer
    episode = buffer.get(1)
    assert episode is None

    # adding transitions after the first episode was closed
    transition = Transition(state={"observation": np.array([1, 2, 3])},
                            action=1,
                            reward=0,
                            game_over=False)
    buffer.store(transition)
    assert buffer.length() == 1
    assert buffer.num_complete_episodes() == 0
    assert buffer.num_transitions_in_complete_episodes() == 0

    # still only one episode
    assert buffer.get(1) is None
    assert buffer.mean_reward() == 0

コード例 #2

0

ファイルを表示

def test_update_episode(her):
    episode = Episode()
    for i in range(10):
        episode.insert(
            Transition(
                state={
                    'observation': np.array([i]),
                    'desired_goal': np.array([i + 1]),
                    'achieved_goal': np.array([i + 1])
                },
                action=i,
                game_over=i == 9,
                reward=0 if i == 9 else -1,
            ))

    her.store_episode(episode)
    # print('her._num_transitions', her._num_transitions)

    # 10 original transitions, and 9 transitions * 4 hindsight episodes
    assert her.num_transitions() == 10 + (4 * 9)

    # make sure that the goal state was never sampled from the past
    for transition in her.transitions:
        assert transition.state['desired_goal'] > transition.state[
            'observation']
        assert transition.next_state['desired_goal'] >= transition.next_state[
            'observation']

        if transition.reward == 0:
            assert transition.game_over
        else:
            assert not transition.game_over

コード例 #3

0

ファイルを表示

    def load_csv(self, csv_dataset: CsvDataset) -> None:
        """
        Restore the replay buffer contents from a csv file.
        The csv file is assumed to include a list of transitions.
        :param csv_dataset: A construct which holds the dataset parameters
        """
        self.assert_not_frozen()

        df = pd.read_csv(csv_dataset.filepath)
        if len(df) > self.max_size[1]:
            screen.warning(
                "Warning! The number of transitions to load into the replay buffer ({}) is "
                "bigger than the max size of the replay buffer ({}). The excessive transitions will "
                "not be stored.".format(len(df), self.max_size[1]))

        episode_ids = df['episode_id'].unique()
        progress_bar = ProgressBar(len(episode_ids))
        state_columns = [
            col for col in df.columns if col.startswith('state_feature')
        ]

        for e_id in episode_ids:
            progress_bar.update(e_id)
            df_episode_transitions = df[df['episode_id'] == e_id]
            episode = Episode()
            for (_, current_transition), (_, next_transition) in zip(
                    df_episode_transitions[:-1].iterrows(),
                    df_episode_transitions[1:].iterrows()):
                state = np.array(
                    [current_transition[col] for col in state_columns])
                next_state = np.array(
                    [next_transition[col] for col in state_columns])

                episode.insert(
                    Transition(
                        state={'observation': state},
                        action=current_transition['action'],
                        reward=current_transition['reward'],
                        next_state={'observation': next_state},
                        game_over=False,
                        info={
                            'all_action_probabilities':
                            ast.literal_eval(
                                current_transition['all_action_probabilities'])
                        }))

            # Set the last transition to end the episode
            if csv_dataset.is_episodic:
                episode.get_last_transition().game_over = True

            self.store_episode(episode)

        # close the progress bar
        progress_bar.update(len(episode_ids))
        progress_bar.close()

        self.shuffle_episodes()

コード例 #4

0

ファイルを表示

ファイル: td3_agent.py プロジェクト: bigdatasciencegroup/intel-ailab-reinforcement-learning-coach

    def update_transition_before_adding_to_replay_buffer(self, transition: Transition) -> Transition:
        """
        Allows agents to update the transition just before adding it to the replay buffer.
        Can be useful for agents that want to tweak the reward, termination signal, etc.

        :param transition: the transition to update
        :return: the updated transition
        """
        transition.game_over = False if self.current_episode_steps_counter ==\
                                        self.parent_level_manager.environment.env._max_episode_steps\
            else transition.game_over

        return transition

コード例 #5

0

ファイルを表示

def episode():
    episode = []
    for i in range(10):
        episode.append(
            Transition(
                state={
                    'observation': np.array([i]),
                    'desired_goal': np.array([i]),
                    'achieved_goal': np.array([i])
                },
                action=i,
            ))
    return episode

コード例 #6

0

ファイルを表示

ファイル: test_single_episode_buffer.py プロジェクト: wwxFromTju/coach

def test_clean(buffer: SingleEpisodeBuffer):
    # add several transitions and then clean the buffer
    transition = Transition(state={"observation": np.array([1, 2, 3])},
                            action=1,
                            reward=1,
                            game_over=False)
    for i in range(10):
        buffer.store(transition)
    assert buffer.num_transitions() == 10
    buffer.clean()
    assert buffer.num_transitions() == 0

    # add more transitions after the clean and make sure they were really cleaned
    transition = Transition(state={"observation": np.array([1, 2, 3])},
                            action=1,
                            reward=1,
                            game_over=True)
    buffer.store(transition)
    assert buffer.num_transitions() == 1
    assert buffer.num_transitions_in_complete_episodes() == 1
    assert buffer.num_complete_episodes() == 1
    for i in range(10):
        assert buffer.sample(1)[0] is transition

コード例 #7

0

ファイルを表示

ファイル: carla_dataset_to_replay_buffer.py プロジェクト: WonMian/coach

def create_dataset(dataset_root, output_path):
    maybe_download(dataset_root)

    dataset_root = os.path.join(dataset_root, 'AgentHuman')
    train_set_root = os.path.join(dataset_root, 'SeqTrain')
    validation_set_root = os.path.join(dataset_root, 'SeqVal')

    # training set extraction
    memory = ExperienceReplay(max_size=(MemoryGranularity.Transitions,
                                        sys.maxsize))
    train_set_files = sorted(os.listdir(train_set_root))
    print("found {} files".format(len(train_set_files)))
    progress_bar = ProgressBar(len(train_set_files))
    for file_idx, file in enumerate(train_set_files[:3000]):
        progress_bar.update(file_idx, "extracting file {}".format(file))
        train_set = h5py.File(os.path.join(train_set_root, file), 'r')
        observations = train_set['rgb'][:]  # forward camera
        measurements = np.expand_dims(train_set['targets'][:, 10],
                                      -1)  # forward speed
        actions = train_set['targets'][:, :3]  # steer, gas, brake
        # actions[:, :2] = actions[:, 1:3]
        # actions[:, 2] = train_set['targets'][:, 0]                           # gas, brake, steer
        # actions[:, 1] -= actions[:, 2]
        # actions = actions[:, :2][:, ::-1]

        high_level_commands = train_set['targets'][:, 24].astype(
            'int') - 2  # follow lane, left, right, straight

        file_length = train_set['rgb'].len()
        assert train_set['rgb'].len() == train_set['targets'].len()

        for transition_idx in range(file_length):
            transition = Transition(state={
                'CameraRGB':
                observations[transition_idx],
                'measurements':
                measurements[transition_idx],
                'high_level_command':
                high_level_commands[transition_idx]
            },
                                    action=actions[transition_idx],
                                    reward=0)
            memory.store(transition)
    progress_bar.close()
    print("Saving pickle file to {}".format(output_path))
    memory.save(output_path)

コード例 #8

0

ファイルを表示

ファイル: agent.py プロジェクト: mdavala/coach

    def observe(self, env_response: EnvResponse) -> bool:
        """
        Given a response from the environment, distill the observation from it and store it for later use.
        The response should be a dictionary containing the performed action, the new observation and measurements,
        the reward, a game over flag and any additional information necessary.
        :param env_response: result of call from environment.step(action)
        :return:
        """

        # filter the env_response
        filtered_env_response = self.input_filter.filter(env_response)[0]

        # inject agent collected statistics, if required
        if self.ap.algorithm.use_accumulated_reward_as_measurement:
            if 'measurements' in filtered_env_response.next_state:
                filtered_env_response.next_state['measurements'] = np.append(
                    filtered_env_response.next_state['measurements'],
                    self.total_shaped_reward_in_current_episode)
            else:
                filtered_env_response.next_state['measurements'] = np.array(
                    [self.total_shaped_reward_in_current_episode])

        # if we are in the first step in the episode, then we don't have a a next state and a reward and thus no
        # transition yet, and therefore we don't need to store anything in the memory.
        # also we did not reach the goal yet.
        if self.current_episode_steps_counter == 0:
            # initialize the current state
            self.curr_state = filtered_env_response.next_state
            return env_response.game_over
        else:
            transition = Transition(
                state=copy.copy(self.curr_state),
                action=self.last_action_info.action,
                reward=filtered_env_response.reward,
                next_state=filtered_env_response.next_state,
                game_over=filtered_env_response.game_over,
                info=filtered_env_response.info)

            # now that we have formed a basic transition - the next state progresses to be the current state
            self.curr_state = filtered_env_response.next_state

            # make agent specific changes to the transition if needed
            transition = self.update_transition_before_adding_to_replay_buffer(
                transition)

            # merge the intrinsic reward in
            if self.ap.algorithm.scale_external_reward_by_intrinsic_reward_value:
                transition.reward = transition.reward * (
                    1 + self.last_action_info.action_intrinsic_reward)
            else:
                transition.reward = transition.reward + self.last_action_info.action_intrinsic_reward

            # sum up the total shaped reward
            self.total_shaped_reward_in_current_episode += transition.reward
            self.total_reward_in_current_episode += env_response.reward
            self.shaped_reward.add_sample(transition.reward)
            self.reward.add_sample(env_response.reward)

            # add action info to transition
            if type(self.parent).__name__ == 'CompositeAgent':
                transition.add_info(self.parent.last_action_info.__dict__)
            else:
                transition.add_info(self.last_action_info.__dict__)

            # create and store the transition
            if self.phase in [RunPhase.TRAIN, RunPhase.HEATUP]:
                # for episodic memories we keep the transitions in a local buffer until the episode is ended.
                # for regular memories we insert the transitions directly to the memory
                if isinstance(self.memory, EpisodicExperienceReplay):
                    self.current_episode_buffer.insert(transition)
                else:
                    self.call_memory('store', transition)

            if self.ap.visualization.dump_in_episode_signals:
                self.update_step_in_episode_log()

            return transition.game_over

コード例 #9

0

ファイルを表示

    def load_csv(self, csv_dataset: CsvDataset,
                 input_filter: InputFilter) -> None:
        """
        Restore the replay buffer contents from a csv file.
        The csv file is assumed to include a list of transitions.
        :param csv_dataset: A construct which holds the dataset parameters
        :param input_filter: A filter used to filter the CSV data before feeding it to the memory.
        """
        self.assert_not_frozen()

        df = pd.read_csv(csv_dataset.filepath)
        if len(df) > self.max_size[1]:
            screen.warning(
                "Warning! The number of transitions to load into the replay buffer ({}) is "
                "bigger than the max size of the replay buffer ({}). The excessive transitions will "
                "not be stored.".format(len(df), self.max_size[1]))

        episode_ids = df["episode_id"].unique()
        progress_bar = ProgressBar(len(episode_ids))
        state_columns = [
            col for col in df.columns if col.startswith("state_feature")
        ]

        for e_id in episode_ids:
            progress_bar.update(e_id)
            df_episode_transitions = df[df["episode_id"] == e_id]
            input_filter.reset()

            if len(df_episode_transitions) < 2:
                # we have to have at least 2 rows in each episode for creating a transition
                continue

            episode = Episode()
            transitions = []
            for (_, current_transition), (_, next_transition) in zip(
                    df_episode_transitions[:-1].iterrows(),
                    df_episode_transitions[1:].iterrows()):
                state = np.array(
                    [current_transition[col] for col in state_columns])
                next_state = np.array(
                    [next_transition[col] for col in state_columns])

                transitions.append(
                    Transition(
                        state={"observation": state},
                        action=int(current_transition["action"]),
                        reward=current_transition["reward"],
                        next_state={"observation": next_state},
                        game_over=False,
                        info={
                            "all_action_probabilities":
                            ast.literal_eval(
                                current_transition["all_action_probabilities"])
                        },
                    ), )

            transitions = input_filter.filter(transitions, deep_copy=False)
            for t in transitions:
                episode.insert(t)

            # Set the last transition to end the episode
            if csv_dataset.is_episodic:
                episode.get_last_transition().game_over = True

            self.store_episode(episode)

        # close the progress bar
        progress_bar.update(len(episode_ids))
        progress_bar.close()

コード例 #10

0

ファイルを表示

ファイル: dataset_to_replay_buffer.py プロジェクト: itaicaspi-intel/advanced-coach

        progress_bar.update(file_idx, "extracting file {}".format(file))
        train_set = h5py.File(os.path.join(train_set_root, file), 'r')
        observations = train_set['rgb'][:]  # forward camera
        measurements = np.expand_dims(train_set['targets'][:, 10],
                                      -1)  # forward speed
        actions = train_set['targets'][:, :3]  # steer, gas, brake
        actions[:, 1] -= actions[:, 2]
        actions = actions[:, :2][:, ::-1]

        high_level_commands = train_set['targets'][:, 24].astype(
            'int') - 2  # follow lane, left, right, straight

        file_length = train_set['rgb'].len()
        assert train_set['rgb'].len() == train_set['targets'].len()

        for transition_idx in range(file_length):
            transition = Transition(state={
                'forward_camera':
                observations[transition_idx],
                'measurements':
                measurements[transition_idx],
                'high_level_command':
                high_level_commands[transition_idx]
            },
                                    action=actions[transition_idx],
                                    reward=0)
            memory.store(transition)
    progress_bar.close()
    print("Saving pickle file.")
    memory.save('carla_train_set_replay_buffer.p')