예제 #1
0
from collections import namedtuple
import time
import os
import numpy as np
import random
from Agent import PG

# Device settings for tensorflow
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
path_voc = "../datas/VOCdevkit/VOC2007"

# get models
print("load models")
model_vgg = getVGG_16bn("../models")
model_vgg = model_vgg.cuda()
agent = PG(0.0002, 0.90)

# get image datas
path_voc_1 = "../datas/VOCdevkit/VOC2007"
class_object = '1'
image_names_1, images_1 = load_image_data(path_voc_1, class_object)
image_names = image_names_1
images = images_1

print("aeroplane_trainval image:%d" % len(image_names))

# define the Pytorch Tensor
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor
import copy
import pylab
import numpy as np
import tensorflow as tf
from Environment import Env
from Agent import PG
from Agent import TUC
import pickle

np.random.seed(0)
EPISODES = 50

env = Env()
agent = PG()

EP_reward_sums, episodes = [], []
#agent.save_model("./model_init/PG1")
agent.load_model("./model_init/PG1")

# Session settings
GPU_mem_ratio = 0.2
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_mem_ratio)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

# Create recomposed transition critic
state_dim = 22
hidden_dim = 3
critic_hidden_dim = 2
action_dim = 5
tuc = TUC(sess, "TUC", state_dim, action_dim, 0.003)
#tuc.save_model("./model_init/TUC1")
예제 #3
0
import copy
import pylab
import numpy as np
import tensorflow as tf
from Environment import Env
from Agent import PG
from Agent import VAE
import pickle

np.random.seed(0)
EPISODES = 50

env = Env()
agent = PG()


EP_reward_sums, episodes = [], []
agent.load_model("./model_init/PG1")

# Session settings
GPU_mem_ratio = 0.2
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = GPU_mem_ratio)
sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))

# Create recomposed transition critic
vae = VAE(sess,"VAE",22,5,0.001)
vae.save_model("./model_init/VAE")

EPISODE = 50
action_dim = 5
mem_size = 5
path_voc = "../datas/VOCdevkit/VOC2007"
image_names = np.array(load_images_names_in_data_set('aeroplane_val',
                                                     path_voc))
labels = load_images_labels_in_data_set('aeroplane_val', path_voc)
image_names_aero = []
for i in range(len(image_names)):
    if labels[i] == '1':
        image_names_aero.append(image_names[i])
image_names = image_names_aero
images = get_all_images(image_names, path_voc)
print("aeroplane_val image:%d" % len(image_names))

model_vgg = getVGG_16bn("../models")
model_vgg = model_vgg.cuda()

agent = PG(0.0002, 0.90)
agent.load_model("./model_final/pg_VIME_agent_2")
exp_results_filename = "PG_VIME_2"

class_object = 1
steps = 5
res = []
res_step = []
res_annotations = []
for i in range(len(image_names)):
    image_name = image_names[i]
    image = images[i]

    # get use for iou calculation
    gt_annotation = get_bb_of_gt_from_pascal_xml_annotation(
        image_name, path_voc)
WINDOW_SIZE = 40
TUC_EPOCHS = 3
CRITIC_EPOCHS = 3
EP_WINDOW_SIZE = 5

# Environment settings
np.random.seed(1)
env = gym.make('CartPole-v0')
env = env.unwrapped

# Session settings
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_mem_ratio)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

# Create agents and networks
PG_agent = PG(sess, "PG", 4, 2, 0.02, 0.95)
PG_agent.load_model("./model_init/PG1")

vae = VAE(sess, "VAE", 4, 3, 2, 0.001)
vae.save_model("./model_init/VAE")

EPISODE = 100
mem_size = 5
#'''
# Train PG and DNN
for EP in range(1, EPISODE + 1):

    state = env.reset()
    done = 0
    t = 0
    EP_reward_sum = 0.
# KL divergence
def KL_divergence(mean_1,log_std_1,mean_2,log_std_2):    

      term_1 = np.sum(np.square(np.divide(np.exp(log_std_1),np.exp(log_std_2)))) 
      term_2 = np.sum(2*log_std_2-2*log_std_1)
      term_3 = np.sum(np.divide(np.square(mean_1-mean_2),np.square(np.exp(log_std_2))))
          
      return np.maximum(0,0.5*(term_1+term_2+term_2-1))   
      
      
# Session settings
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = GPU_mem_ratio)
sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))

# Create agents and networks
PG_agent = PG(sess,"PG",4,2,0.02,0.95)
#PG_agent.save_model("./model_init/PG1")
PG_agent.load_model("./model_init/PG1")


# Create transition uncertainty critic
state_dim = 4
hidden_dim = 3
critic_hidden_dim = 2
action_dim = 2
tuc = TUC(sess,"TUC",state_dim,hidden_dim,critic_hidden_dim,action_dim,0.003) 
#tuc.save_model("./model_init/TUC1")
tuc.load_model("./model_init/TUC1")


# Run W/O TUC 
예제 #7
0
        result[i] = sum / (i+1)
 
    for i in range( N, len(l) ):
        sum = sum - l[i-N] + l[i]
        result[i] = sum / N
 
    return result    
      
      
      
# Session settings
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = GPU_mem_ratio)
sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))

# Create agents and networks
PG_agent = PG(sess,"PG",4,2,0.02,0.95)
PG_agent.load_model("./model_final/PG1_KL")




state = env.reset()
done = 0
t = 0
EP_reward_sum = 0.
   
    
while not done: 

      env.render()
    
예제 #8
0
import copy
import pylab
import numpy as np
import tensorflow as tf
from Environment import Env
from Agent import PG
from Agent import TUC
import pickle

np.random.seed(0)
EPISODES = 50

env = Env()
agent = PG()


EP_reward_sums, episodes = [], []

agent.load_model("./model_final/PG1_TUC")

# Session settings
GPU_mem_ratio = 0.2
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = GPU_mem_ratio)
sess = tf.Session(config = tf.ConfigProto(gpu_options = gpu_options))


exp_results_filename = "PG1_TUC"
EPs_total_reward = []
ratio_init_1 = 0.9
ratio_init_2 = 100
예제 #9
0
import random
from Agent import PG
from Agent import BBN_module
from Agent import BBN


# Device settings for tensorflow
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
path_voc = "../datas/VOCdevkit/VOC2007"

# get models 
print("load models")
model_vgg = getVGG_16bn("../models")
model_vgg = model_vgg.cuda()

agent = PG(0.0002,0.90)
module = BBN_module()
BBN_dynamic = BBN(module, 0.001)


# get image datas
path_voc_1 = "../datas/VOCdevkit/VOC2007"
class_object = '1'
image_names_1, images_1 = load_image_data(path_voc_1, class_object)
image_names = image_names_1 
images = images_1

print("aeroplane_trainval image:%d" % len(image_names))


# define the Pytorch Tensor
예제 #10
0
import copy
import pylab
import numpy as np
from Environment import Env
from Agent import PG
import pickle

np.random.seed(0)
EPISODES = 50

env = Env()
agent = PG()

EP_reward_sums, episodes = [], []
agent.save_model("./model_init/PG1")
#agent.load_model("./model_init/PG1")

exp_results_filename = "PG1"
EPs_total_reward = []

for EP in range(EPISODES):

    done = False
    EP_reward_sum = 0
    state = env.reset()
    state = np.reshape(state, [1, 22])
    t = 0

    while not done:

        t += 1
예제 #11
0
import os
import numpy as np
import random
from Agent import PG
from Agent import TUC

# Device settings for tensorflow
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
path_voc = "../datas/VOCdevkit/VOC2007"

# get models
print("load models")
model_vgg = getVGG_16bn("../models")
model_vgg = model_vgg.cuda()

agent = PG(0.0002, 0.90)
TUC_dynamic = TUC(0.001)

# get image datas
path_voc_1 = "../datas/VOCdevkit/VOC2007"
class_object = '1'
image_names_1, images_1 = load_image_data(path_voc_1, class_object)
image_names = image_names_1
images = images_1

print("aeroplane_trainval image:%d" % len(image_names))

# define the Pytorch Tensor
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor