'replay_buffer': replay_buffer, 'num_iters': num_iters, 'monitor_state': monitored_env.get_state() }) if info["steps"] > args.num_steps: break if done: steps_left = args.num_steps - info["steps"] completion = np.round(info["steps"] / args.num_steps, 1) mean_ep_reward = np.mean(info["rewards"][-100:]) logger.record_tabular("% completion", completion) logger.record_tabular("steps", info["steps"]) logger.record_tabular("iters", num_iters) logger.record_tabular("episodes", len(info["rewards"])) logger.record_tabular("reward (100 epi mean)", np.mean(info["rewards"][-100:])) if not args.noisy: logger.record_tabular("exploration", exploration.value(num_iters)) if args.prioritized: logger.record_tabular("max priority", replay_buffer._max_priority) fps_estimate = (float(steps_per_iter) / (float(iteration_time_est) + 1e-6) if steps_per_iter._value is not None else "calculating...") logger.dump_tabular() logger.log() logger.log("ETA: " + pretty_eta(int(steps_left / fps_estimate))) logger.log() # add summary for one episode ep_stats.add_all_summary(writer, [mean_ep_reward, ep_length], num_iters) ep_length = 0
if done: steps_left = args.num_steps - info["steps"] completion = np.round(info["steps"] / args.num_steps, 1) mean_ep_reward = np.mean(info["rewards"][-100:]) logger.record_tabular("% completion", completion) logger.record_tabular("steps", info["steps"]) logger.record_tabular("iters", num_iters) logger.record_tabular("episodes", len(info["rewards"])) logger.record_tabular("reward (100 epi mean)", np.mean(info["rewards"][-100:])) if not args.noisy: logger.record_tabular("exploration", exploration.value(num_iters)) if args.prioritized: logger.record_tabular("max priority", replay_buffer._max_priority) fps_estimate = (float(steps_per_iter) / (float(iteration_time_est) + 1e-6) if steps_per_iter._value is not None else "calculating:") logger.dump_tabular() logger.log() logger.log("ETA: " + pretty_eta(int(steps_left / fps_estimate))) logger.log() # add summary for one episode ep_stats.add_all_summary(writer, [mean_ep_reward, ep_length], num_iters) ep_length = 0
break if done: steps_left = args.num_steps - info["steps"] completion = np.round(info["steps"] / args.num_steps, 1) mean_ep_reward = np.mean(info["rewards"][-100:]) logger.record_tabular("% completion", completion) logger.record_tabular("steps", info["steps"]) logger.record_tabular("iters", num_iters) logger.record_tabular("episodes", len(info["rewards"])) logger.record_tabular("reward (100 epi mean)", np.mean(info["rewards"][-100:])) if not args.noisy: logger.record_tabular("exploration", exploration.value(num_iters)) if args.prioritized: logger.record_tabular("max priority", replay_buffer._max_priority) fps_estimate = ( float(steps_per_iter) / (float(iteration_time_est) + 1e-6) if steps_per_iter._value is not None else "calculating:") logger.dump_tabular() logger.log() logger.log("ETA: " + pretty_eta(int(steps_left / fps_estimate))) logger.log() # add summary for one episode ep_stats.add_all_summary(writer, [mean_ep_reward, ep_length], num_iters) ep_length = 0