a_state_input = graph.get_tensor_by_name("actor/state_input:0") a_actions_input = graph.get_tensor_by_name("actor/actions_input:0") a_advantages_input = \ graph.get_tensor_by_name("actor/advantages_input:0") a_probabilities = graph.get_tensor_by_name("actor/probabilities:0") a_weights = graph.get_tensor_by_name("actor/weights:0") c_state_input = graph.get_tensor_by_name("critic/state_input:0") c_true_vf_input = graph.get_tensor_by_name("critic/true_vf_input:0") c_output = graph.get_tensor_by_name("critic/output:0") c_optimizer = tf.get_collection("optimizer") c_loss = graph.get_tensor_by_name("critic/loss:0") else: a_state_input, a_actions_input, a_advantages_input, \ a_probabilities, a_weights = Actor.create_policy_net(env) c_state_input, c_true_vf_input, c_output, c_optimizer, c_loss = \ Critic.create_value_net(env) actor = Actor(env, a_state_input, a_actions_input, a_advantages_input, a_probabilities, a_weights) critic = Critic(env, c_state_input, c_true_vf_input, c_output, c_optimizer, c_loss) nac = NAC(env, actor, critic) if LOAD_WEIGHTS is None: sess.run(tf.global_variables_initializer()) env.network_generation_time = int(time.time() - start_time) print("Done! (Time: " + str(env.network_generation_time) + " seconds)")