else: replay_buffer = ReplayBuffer(args.replay_buffer_size) U.initialize() update_target() num_iters = 0 # Load the model state = maybe_load_model(savedir, container) if state is not None: num_iters, replay_buffer = state["num_iters"], state[ "replay_buffer"], monitored_env.set_state(state["monitor_state"]) start_time, start_steps = None, None steps_per_iter = RunningAvg(0.999) iteration_time_est = RunningAvg(0.999) obs = env.reset() # Record the mean of the \sigma sigma_name_list = [] sigma_list = [] for param in tf.trainable_variables(): # only record the \sigma in the action network if 'sigma' in param.name \ and 'deepq/q_func/action_value' in param.name: summary_name = \ param.name.replace( 'deepq/q_func/action_value/', '').replace( '/', '.').split(':')[0] sigma_name_list.append(summary_name) sigma_list.append(tf.reduce_mean(tf.abs(param)))