from collections import namedtuple import time import os import numpy as np import random from Agent import PG # Device settings for tensorflow os.environ["CUDA_VISIBLE_DEVICES"] = "0" path_voc = "../datas/VOCdevkit/VOC2007" # get models print("load models") model_vgg = getVGG_16bn("../models") model_vgg = model_vgg.cuda() agent = PG(0.0002, 0.90) # get image datas path_voc_1 = "../datas/VOCdevkit/VOC2007" class_object = '1' image_names_1, images_1 = load_image_data(path_voc_1, class_object) image_names = image_names_1 images = images_1 print("aeroplane_trainval image:%d" % len(image_names)) # define the Pytorch Tensor use_cuda = torch.cuda.is_available() FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
import copy import pylab import numpy as np import tensorflow as tf from Environment import Env from Agent import PG from Agent import TUC import pickle np.random.seed(0) EPISODES = 50 env = Env() agent = PG() EP_reward_sums, episodes = [], [] #agent.save_model("./model_init/PG1") agent.load_model("./model_init/PG1") # Session settings GPU_mem_ratio = 0.2 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_mem_ratio) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Create recomposed transition critic state_dim = 22 hidden_dim = 3 critic_hidden_dim = 2 action_dim = 5 tuc = TUC(sess, "TUC", state_dim, action_dim, 0.003) #tuc.save_model("./model_init/TUC1")
import copy import pylab import numpy as np import tensorflow as tf from Environment import Env from Agent import PG from Agent import VAE import pickle np.random.seed(0) EPISODES = 50 env = Env() agent = PG() EP_reward_sums, episodes = [], [] agent.load_model("./model_init/PG1") # Session settings GPU_mem_ratio = 0.2 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = GPU_mem_ratio) sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) # Create recomposed transition critic vae = VAE(sess,"VAE",22,5,0.001) vae.save_model("./model_init/VAE") EPISODE = 50 action_dim = 5 mem_size = 5
path_voc = "../datas/VOCdevkit/VOC2007" image_names = np.array(load_images_names_in_data_set('aeroplane_val', path_voc)) labels = load_images_labels_in_data_set('aeroplane_val', path_voc) image_names_aero = [] for i in range(len(image_names)): if labels[i] == '1': image_names_aero.append(image_names[i]) image_names = image_names_aero images = get_all_images(image_names, path_voc) print("aeroplane_val image:%d" % len(image_names)) model_vgg = getVGG_16bn("../models") model_vgg = model_vgg.cuda() agent = PG(0.0002, 0.90) agent.load_model("./model_final/pg_VIME_agent_2") exp_results_filename = "PG_VIME_2" class_object = 1 steps = 5 res = [] res_step = [] res_annotations = [] for i in range(len(image_names)): image_name = image_names[i] image = images[i] # get use for iou calculation gt_annotation = get_bb_of_gt_from_pascal_xml_annotation( image_name, path_voc)
WINDOW_SIZE = 40 TUC_EPOCHS = 3 CRITIC_EPOCHS = 3 EP_WINDOW_SIZE = 5 # Environment settings np.random.seed(1) env = gym.make('CartPole-v0') env = env.unwrapped # Session settings gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_mem_ratio) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Create agents and networks PG_agent = PG(sess, "PG", 4, 2, 0.02, 0.95) PG_agent.load_model("./model_init/PG1") vae = VAE(sess, "VAE", 4, 3, 2, 0.001) vae.save_model("./model_init/VAE") EPISODE = 100 mem_size = 5 #''' # Train PG and DNN for EP in range(1, EPISODE + 1): state = env.reset() done = 0 t = 0 EP_reward_sum = 0.
# KL divergence def KL_divergence(mean_1,log_std_1,mean_2,log_std_2): term_1 = np.sum(np.square(np.divide(np.exp(log_std_1),np.exp(log_std_2)))) term_2 = np.sum(2*log_std_2-2*log_std_1) term_3 = np.sum(np.divide(np.square(mean_1-mean_2),np.square(np.exp(log_std_2)))) return np.maximum(0,0.5*(term_1+term_2+term_2-1)) # Session settings gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = GPU_mem_ratio) sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) # Create agents and networks PG_agent = PG(sess,"PG",4,2,0.02,0.95) #PG_agent.save_model("./model_init/PG1") PG_agent.load_model("./model_init/PG1") # Create transition uncertainty critic state_dim = 4 hidden_dim = 3 critic_hidden_dim = 2 action_dim = 2 tuc = TUC(sess,"TUC",state_dim,hidden_dim,critic_hidden_dim,action_dim,0.003) #tuc.save_model("./model_init/TUC1") tuc.load_model("./model_init/TUC1") # Run W/O TUC
result[i] = sum / (i+1) for i in range( N, len(l) ): sum = sum - l[i-N] + l[i] result[i] = sum / N return result # Session settings gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = GPU_mem_ratio) sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) # Create agents and networks PG_agent = PG(sess,"PG",4,2,0.02,0.95) PG_agent.load_model("./model_final/PG1_KL") state = env.reset() done = 0 t = 0 EP_reward_sum = 0. while not done: env.render()
import copy import pylab import numpy as np import tensorflow as tf from Environment import Env from Agent import PG from Agent import TUC import pickle np.random.seed(0) EPISODES = 50 env = Env() agent = PG() EP_reward_sums, episodes = [], [] agent.load_model("./model_final/PG1_TUC") # Session settings GPU_mem_ratio = 0.2 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = GPU_mem_ratio) sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) exp_results_filename = "PG1_TUC" EPs_total_reward = [] ratio_init_1 = 0.9 ratio_init_2 = 100
import random from Agent import PG from Agent import BBN_module from Agent import BBN # Device settings for tensorflow os.environ["CUDA_VISIBLE_DEVICES"] = "0" path_voc = "../datas/VOCdevkit/VOC2007" # get models print("load models") model_vgg = getVGG_16bn("../models") model_vgg = model_vgg.cuda() agent = PG(0.0002,0.90) module = BBN_module() BBN_dynamic = BBN(module, 0.001) # get image datas path_voc_1 = "../datas/VOCdevkit/VOC2007" class_object = '1' image_names_1, images_1 = load_image_data(path_voc_1, class_object) image_names = image_names_1 images = images_1 print("aeroplane_trainval image:%d" % len(image_names)) # define the Pytorch Tensor
import copy import pylab import numpy as np from Environment import Env from Agent import PG import pickle np.random.seed(0) EPISODES = 50 env = Env() agent = PG() EP_reward_sums, episodes = [], [] agent.save_model("./model_init/PG1") #agent.load_model("./model_init/PG1") exp_results_filename = "PG1" EPs_total_reward = [] for EP in range(EPISODES): done = False EP_reward_sum = 0 state = env.reset() state = np.reshape(state, [1, 22]) t = 0 while not done: t += 1
import os import numpy as np import random from Agent import PG from Agent import TUC # Device settings for tensorflow os.environ["CUDA_VISIBLE_DEVICES"] = "0" path_voc = "../datas/VOCdevkit/VOC2007" # get models print("load models") model_vgg = getVGG_16bn("../models") model_vgg = model_vgg.cuda() agent = PG(0.0002, 0.90) TUC_dynamic = TUC(0.001) # get image datas path_voc_1 = "../datas/VOCdevkit/VOC2007" class_object = '1' image_names_1, images_1 = load_image_data(path_voc_1, class_object) image_names = image_names_1 images = images_1 print("aeroplane_trainval image:%d" % len(image_names)) # define the Pytorch Tensor use_cuda = torch.cuda.is_available() FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor