Exemple #1
0
class TraditionalRLRewardPredictor():
    """Predictor that always returns the true reward provided by the environment."""
    def __init__(self, summary_writer):
        self.agent_logger = AgentLogger(summary_writer)

    def predict_reward(self, path):
        self.agent_logger.log_episode(path)
        return path["original_rewards"]

    def path_callback(self, path):
        pass
Exemple #2
0
class TraditionalRLRewardPredictor(object):
    """Predictor that always returns the true reward provided by the environment."""
    def __init__(self, summary_writer):
        self.agent_logger = AgentLogger(summary_writer)

    def predict_reward(self, path):
        self.agent_logger.log_episode(
            path)  # <-- This may cause problems in future versions of Teacher.
        return path["original_rewards"]

    def path_callback(self, path):
        pass
Exemple #3
0
class TraditionalRLRewardPredictor(object):
    """Predictor that always returns the true reward provided by the environment."""
    def __init__(self, summary_writer, agent_logger=None):
        self.agent_logger = AgentLogger(
            summary_writer) if agent_logger is None else agent_logger
        self.sess = None
        self.comparison_collector = None

    def predict_reward(self, path):
        self.agent_logger.log_episode(
            path)  # <-- This may cause problems in future versions of Teacher.
        return path["original_rewards"]

    def path_callback(self, path):
        pass

    def save_session(self, path, global_step):
        self.agent_logger.save(path, global_step)
        with open(os.path.join(path, 'iteration.pkl'), 'wb') as f:
            pickle.dump(global_step, f)

    def load_session(self, path):
        pass