EP_reward_sums, episodes = [], [] #agent.save_model("./model_init/PG1") agent.load_model("./model_init/PG1") # Session settings GPU_mem_ratio = 0.2 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_mem_ratio) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Create recomposed transition critic state_dim = 22 hidden_dim = 3 critic_hidden_dim = 2 action_dim = 5 tuc = TUC(sess, "TUC", state_dim, action_dim, 0.003) #tuc.save_model("./model_init/TUC1") tuc.load_model("./model_init/TUC1") # KL divergence def KL_divergence(mean_1, log_std_1, mean_2, log_std_2): term_1 = np.sum(np.square(np.divide(np.exp(log_std_1), np.exp(log_std_2)))) term_2 = np.sum(2 * log_std_2 - 2 * log_std_1) term_3 = np.sum( np.divide(np.square(mean_1 - mean_2), np.square(np.exp(log_std_2)))) return np.maximum(0, 0.5 * (term_1 + term_2 + term_2 - 1))
# Session settings gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_mem_ratio) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Create agents and networks PG_agent = PG(sess, "PG", 4, 2, 0.02, 0.95) PG_agent.load_model("./model_final/PG1_KL") # Create recomposed transition critic state_dim = 4 hidden_dim = 3 critic_hidden_dim = 2 action_dim = 2 tuc = TUC(sess, "TUC", state_dim, hidden_dim, critic_hidden_dim, action_dim, 0.003) tuc.load_model("./model_final/TUC1_KL") EPs_total_reward = [] states = [] next_states = [] actions = [] values = [] #EP = EPISODE for EP in range(1, 2): state = env.reset() done = 0 t = 0
agent = PG() EP_reward_sums, episodes = [], [] agent.load_model("./model_init/PG1") # Session settings GPU_mem_ratio = 0.2 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_mem_ratio) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Create recomposed transition critic state_dim = 22 hidden_dim = 3 critic_hidden_dim = 2 action_dim = 5 tuc = TUC(sess, "TUC", state_dim, action_dim, 0.003) tuc.load_model("./model_final/TUC1") # KL divergence def KL_divergence(mean_1, log_std_1, mean_2, log_std_2): term_1 = np.sum(np.square(np.divide(np.exp(log_std_1), np.exp(log_std_2)))) term_2 = np.sum(2 * log_std_2 - 2 * log_std_1) term_3 = np.sum( np.divide(np.square(mean_1 - mean_2), np.square(np.exp(log_std_2)))) return np.maximum(0, 0.5 * (term_1 + term_2 + term_2 - 1)) exp_results_filename = "PG1_TUC"
import numpy as np import random from Agent import PG from Agent import TUC # Device settings for tensorflow os.environ["CUDA_VISIBLE_DEVICES"] = "0" path_voc = "../datas/VOCdevkit/VOC2007" # get models print("load models") model_vgg = getVGG_16bn("../models") model_vgg = model_vgg.cuda() agent = PG(0.0002, 0.90) TUC_dynamic = TUC(0.001) # get image datas path_voc_1 = "../datas/VOCdevkit/VOC2007" class_object = '1' image_names_1, images_1 = load_image_data(path_voc_1, class_object) image_names = image_names_1 images = images_1 print("aeroplane_trainval image:%d" % len(image_names)) # define the Pytorch Tensor use_cuda = torch.cuda.is_available() FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor