Ejemplo n.º 1
0
    def load_csv(self, csv_dataset: CsvDataset) -> None:
        """
        Restore the replay buffer contents from a csv file.
        The csv file is assumed to include a list of transitions.
        :param csv_dataset: A construct which holds the dataset parameters
        """
        self.assert_not_frozen()

        df = pd.read_csv(csv_dataset.filepath)
        if len(df) > self.max_size[1]:
            screen.warning(
                "Warning! The number of transitions to load into the replay buffer ({}) is "
                "bigger than the max size of the replay buffer ({}). The excessive transitions will "
                "not be stored.".format(len(df), self.max_size[1]))

        episode_ids = df['episode_id'].unique()
        progress_bar = ProgressBar(len(episode_ids))
        state_columns = [
            col for col in df.columns if col.startswith('state_feature')
        ]

        for e_id in episode_ids:
            progress_bar.update(e_id)
            df_episode_transitions = df[df['episode_id'] == e_id]
            episode = Episode()
            for (_, current_transition), (_, next_transition) in zip(
                    df_episode_transitions[:-1].iterrows(),
                    df_episode_transitions[1:].iterrows()):
                state = np.array(
                    [current_transition[col] for col in state_columns])
                next_state = np.array(
                    [next_transition[col] for col in state_columns])

                episode.insert(
                    Transition(
                        state={'observation': state},
                        action=current_transition['action'],
                        reward=current_transition['reward'],
                        next_state={'observation': next_state},
                        game_over=False,
                        info={
                            'all_action_probabilities':
                            ast.literal_eval(
                                current_transition['all_action_probabilities'])
                        }))

            # Set the last transition to end the episode
            if csv_dataset.is_episodic:
                episode.get_last_transition().game_over = True

            self.store_episode(episode)

        # close the progress bar
        progress_bar.update(len(episode_ids))
        progress_bar.close()

        self.shuffle_episodes()
Ejemplo n.º 2
0
    def load_csv(self, csv_dataset: CsvDataset,
                 input_filter: InputFilter) -> None:
        """
        Restore the replay buffer contents from a csv file.
        The csv file is assumed to include a list of transitions.
        :param csv_dataset: A construct which holds the dataset parameters
        :param input_filter: A filter used to filter the CSV data before feeding it to the memory.
        """
        self.assert_not_frozen()

        df = pd.read_csv(csv_dataset.filepath)
        if len(df) > self.max_size[1]:
            screen.warning(
                "Warning! The number of transitions to load into the replay buffer ({}) is "
                "bigger than the max size of the replay buffer ({}). The excessive transitions will "
                "not be stored.".format(len(df), self.max_size[1]))

        episode_ids = df["episode_id"].unique()
        progress_bar = ProgressBar(len(episode_ids))
        state_columns = [
            col for col in df.columns if col.startswith("state_feature")
        ]

        for e_id in episode_ids:
            progress_bar.update(e_id)
            df_episode_transitions = df[df["episode_id"] == e_id]
            input_filter.reset()

            if len(df_episode_transitions) < 2:
                # we have to have at least 2 rows in each episode for creating a transition
                continue

            episode = Episode()
            transitions = []
            for (_, current_transition), (_, next_transition) in zip(
                    df_episode_transitions[:-1].iterrows(),
                    df_episode_transitions[1:].iterrows()):
                state = np.array(
                    [current_transition[col] for col in state_columns])
                next_state = np.array(
                    [next_transition[col] for col in state_columns])

                transitions.append(
                    Transition(
                        state={"observation": state},
                        action=int(current_transition["action"]),
                        reward=current_transition["reward"],
                        next_state={"observation": next_state},
                        game_over=False,
                        info={
                            "all_action_probabilities":
                            ast.literal_eval(
                                current_transition["all_action_probabilities"])
                        },
                    ), )

            transitions = input_filter.filter(transitions, deep_copy=False)
            for t in transitions:
                episode.insert(t)

            # Set the last transition to end the episode
            if csv_dataset.is_episodic:
                episode.get_last_transition().game_over = True

            self.store_episode(episode)

        # close the progress bar
        progress_bar.update(len(episode_ids))
        progress_bar.close()