def callback(info): episode = info['episode'] params[0] = info["optimizer"].value tracking_params[0] = info["tracking_params"] policy_value = run_ddpg.eval_policy(callback_rngs[episode], info["optimizer"].value[0]) train_reward_per_episode.append(info["reward"]) policy_value_per_episode.append(policy_value) elapsed_per_episode.append(info["elapsed"])
def callback(info): episode = info['episode'] reward = info['reward'] current_actor_params = info["optimizer"].value[0] policy_value = run_ddpg.eval_policy(callback_rngs[episode], current_actor_params) print(f"Episode {episode}, " f"episode_length = {info['episode_length']}, " f"reward = {reward}, " f"policy_value = {policy_value}, " f"elapsed = {info['elapsed']}") train_reward_per_episode.append(reward) policy_value_per_episode.append(policy_value) episode_lengths.append(info["episode_length"])