else:
            replay_buffer = ReplayBuffer(args.replay_buffer_size)

        U.initialize()
        update_target()
        num_iters = 0

        # Load the model
        state = maybe_load_model(savedir, container)
        if state is not None:
            num_iters, replay_buffer = state["num_iters"], state[
                "replay_buffer"],
            monitored_env.set_state(state["monitor_state"])

        start_time, start_steps = None, None
        steps_per_iter = RunningAvg(0.999)
        iteration_time_est = RunningAvg(0.999)
        obs = env.reset()
        # Record the mean of the \sigma
        sigma_name_list = []
        sigma_list = []
        for param in tf.trainable_variables():
            # only record the \sigma in the action network
            if 'sigma' in param.name \
                    and 'deepq/q_func/action_value' in param.name:
                summary_name = \
                    param.name.replace(
                        'deepq/q_func/action_value/', '').replace(
                            '/', '.').split(':')[0]
                sigma_name_list.append(summary_name)
                sigma_list.append(tf.reduce_mean(tf.abs(param)))
Esempio n. 2
0
        else:
            replay_buffer = ReplayBuffer(args.replay_buffer_size)

        U.initialize()
        update_target()
        num_iters = 0

        # Load the model
        state = maybe_load_model(savedir, container)
        if state is not None:
            num_iters, replay_buffer = state["num_iters"], state[
                "replay_buffer"],
            monitored_env.set_state(state["monitor_state"])

        start_time, start_steps = None, None
        steps_per_iter = RunningAvg(0.999)
        iteration_time_est = RunningAvg(0.999)
        obs = env.reset()
        # Record the mean of the \sigma
        sigma_name_list = []
        sigma_list = []
        for param in tf.trainable_variables():
            # only record the \sigma in the action network
            if 'sigma' in param.name \
                    and 'deepq/q_func/action_value' in param.name:
                summary_name = \
                    param.name.replace(
                        'deepq/q_func/action_value/', '').replace(
                            '/', '.').split(':')[0]
                sigma_name_list.append(summary_name)
                sigma_list.append(tf.reduce_mean(tf.abs(param)))