예제 #1
0
                    'replay_buffer': replay_buffer,
                    'num_iters': num_iters,
                    'monitor_state': monitored_env.get_state()
                })

            if info["steps"] > args.num_steps:
                break

            if done:
                steps_left = args.num_steps - info["steps"]
                completion = np.round(info["steps"] / args.num_steps, 1)
                mean_ep_reward = np.mean(info["rewards"][-100:])
                logger.record_tabular("% completion", completion)
                logger.record_tabular("steps", info["steps"])
                logger.record_tabular("iters", num_iters)
                logger.record_tabular("episodes", len(info["rewards"]))
                logger.record_tabular("reward (100 epi mean)", np.mean(info["rewards"][-100:]))
                if not args.noisy:
                    logger.record_tabular("exploration", exploration.value(num_iters))
                if args.prioritized:
                    logger.record_tabular("max priority", replay_buffer._max_priority)
                fps_estimate = (float(steps_per_iter) / (float(iteration_time_est) + 1e-6)
                                if steps_per_iter._value is not None else "calculating...")
                logger.dump_tabular()
                logger.log()
                logger.log("ETA: " + pretty_eta(int(steps_left / fps_estimate)))
                logger.log()
                # add summary for one episode
                ep_stats.add_all_summary(writer, [mean_ep_reward, ep_length], num_iters)
                ep_length = 0
            if done:
                steps_left = args.num_steps - info["steps"]
                completion = np.round(info["steps"] / args.num_steps, 1)
                mean_ep_reward = np.mean(info["rewards"][-100:])
                logger.record_tabular("% completion", completion)
                logger.record_tabular("steps", info["steps"])
                logger.record_tabular("iters", num_iters)
                logger.record_tabular("episodes", len(info["rewards"]))
                logger.record_tabular("reward (100 epi mean)",
                                      np.mean(info["rewards"][-100:]))
                if not args.noisy:
                    logger.record_tabular("exploration",
                                          exploration.value(num_iters))
                if args.prioritized:
                    logger.record_tabular("max priority",
                                          replay_buffer._max_priority)
                fps_estimate = (float(steps_per_iter) /
                                (float(iteration_time_est) + 1e-6)
                                if steps_per_iter._value is not None else
                                "calculating:")
                logger.dump_tabular()
                logger.log()
                logger.log("ETA: " +
                           pretty_eta(int(steps_left / fps_estimate)))
                logger.log()
                # add summary for one episode
                ep_stats.add_all_summary(writer, [mean_ep_reward, ep_length],
                                         num_iters)
                ep_length = 0
예제 #3
0
                break

            if done:
                steps_left = args.num_steps - info["steps"]
                completion = np.round(info["steps"] / args.num_steps, 1)
                mean_ep_reward = np.mean(info["rewards"][-100:])
                logger.record_tabular("% completion", completion)
                logger.record_tabular("steps", info["steps"])
                logger.record_tabular("iters", num_iters)
                logger.record_tabular("episodes", len(info["rewards"]))
                logger.record_tabular("reward (100 epi mean)",
                                      np.mean(info["rewards"][-100:]))
                if not args.noisy:
                    logger.record_tabular("exploration",
                                          exploration.value(num_iters))
                if args.prioritized:
                    logger.record_tabular("max priority",
                                          replay_buffer._max_priority)
                fps_estimate = (
                    float(steps_per_iter) / (float(iteration_time_est) + 1e-6)
                    if steps_per_iter._value is not None else "calculating:")
                logger.dump_tabular()
                logger.log()
                logger.log("ETA: " +
                           pretty_eta(int(steps_left / fps_estimate)))
                logger.log()
                # add summary for one episode
                ep_stats.add_all_summary(writer, [mean_ep_reward, ep_length],
                                         num_iters)
                ep_length = 0