コード例 #1
0
ファイル: main.py プロジェクト: N1kYan/ReinforcementLearning
    a_state_input = graph.get_tensor_by_name("actor/state_input:0")
    a_actions_input = graph.get_tensor_by_name("actor/actions_input:0")
    a_advantages_input = \
        graph.get_tensor_by_name("actor/advantages_input:0")
    a_probabilities = graph.get_tensor_by_name("actor/probabilities:0")
    a_weights = graph.get_tensor_by_name("actor/weights:0")

    c_state_input = graph.get_tensor_by_name("critic/state_input:0")
    c_true_vf_input = graph.get_tensor_by_name("critic/true_vf_input:0")
    c_output = graph.get_tensor_by_name("critic/output:0")
    c_optimizer = tf.get_collection("optimizer")
    c_loss = graph.get_tensor_by_name("critic/loss:0")

else:
    a_state_input, a_actions_input, a_advantages_input, \
        a_probabilities, a_weights = Actor.create_policy_net(env)

    c_state_input, c_true_vf_input, c_output, c_optimizer, c_loss = \
        Critic.create_value_net(env)

actor = Actor(env, a_state_input, a_actions_input, a_advantages_input,
              a_probabilities, a_weights)
critic = Critic(env, c_state_input, c_true_vf_input, c_output, c_optimizer,
                c_loss)
nac = NAC(env, actor, critic)

if LOAD_WEIGHTS is None:
    sess.run(tf.global_variables_initializer())

env.network_generation_time = int(time.time() - start_time)
print("Done! (Time: " + str(env.network_generation_time) + " seconds)")