コード例 #1
0
EP_reward_sums, episodes = [], []
#agent.save_model("./model_init/PG1")
agent.load_model("./model_init/PG1")

# Session settings
GPU_mem_ratio = 0.2
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_mem_ratio)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

# Create recomposed transition critic
state_dim = 22
hidden_dim = 3
critic_hidden_dim = 2
action_dim = 5
tuc = TUC(sess, "TUC", state_dim, action_dim, 0.003)
#tuc.save_model("./model_init/TUC1")
tuc.load_model("./model_init/TUC1")


# KL divergence
def KL_divergence(mean_1, log_std_1, mean_2, log_std_2):

    term_1 = np.sum(np.square(np.divide(np.exp(log_std_1), np.exp(log_std_2))))
    term_2 = np.sum(2 * log_std_2 - 2 * log_std_1)
    term_3 = np.sum(
        np.divide(np.square(mean_1 - mean_2), np.square(np.exp(log_std_2))))

    return np.maximum(0, 0.5 * (term_1 + term_2 + term_2 - 1))

コード例 #2
0

# Session settings
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_mem_ratio)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

# Create agents and networks
PG_agent = PG(sess, "PG", 4, 2, 0.02, 0.95)
PG_agent.load_model("./model_final/PG1_KL")

# Create recomposed transition critic
state_dim = 4
hidden_dim = 3
critic_hidden_dim = 2
action_dim = 2
tuc = TUC(sess, "TUC", state_dim, hidden_dim, critic_hidden_dim, action_dim,
          0.003)
tuc.load_model("./model_final/TUC1_KL")

EPs_total_reward = []

states = []
next_states = []
actions = []
values = []

#EP = EPISODE
for EP in range(1, 2):

    state = env.reset()
    done = 0
    t = 0
コード例 #3
0
agent = PG()

EP_reward_sums, episodes = [], []
agent.load_model("./model_init/PG1")

# Session settings
GPU_mem_ratio = 0.2
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=GPU_mem_ratio)
sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

# Create recomposed transition critic
state_dim = 22
hidden_dim = 3
critic_hidden_dim = 2
action_dim = 5
tuc = TUC(sess, "TUC", state_dim, action_dim, 0.003)
tuc.load_model("./model_final/TUC1")


# KL divergence
def KL_divergence(mean_1, log_std_1, mean_2, log_std_2):

    term_1 = np.sum(np.square(np.divide(np.exp(log_std_1), np.exp(log_std_2))))
    term_2 = np.sum(2 * log_std_2 - 2 * log_std_1)
    term_3 = np.sum(
        np.divide(np.square(mean_1 - mean_2), np.square(np.exp(log_std_2))))

    return np.maximum(0, 0.5 * (term_1 + term_2 + term_2 - 1))


exp_results_filename = "PG1_TUC"
コード例 #4
0
ファイル: Run_PG_TUC.py プロジェクト: johanesn/Wei-Lin-Liao
import numpy as np
import random
from Agent import PG
from Agent import TUC

# Device settings for tensorflow
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
path_voc = "../datas/VOCdevkit/VOC2007"

# get models
print("load models")
model_vgg = getVGG_16bn("../models")
model_vgg = model_vgg.cuda()

agent = PG(0.0002, 0.90)
TUC_dynamic = TUC(0.001)

# get image datas
path_voc_1 = "../datas/VOCdevkit/VOC2007"
class_object = '1'
image_names_1, images_1 = load_image_data(path_voc_1, class_object)
image_names = image_names_1
images = images_1

print("aeroplane_trainval image:%d" % len(image_names))

# define the Pytorch Tensor
use_cuda = torch.cuda.is_available()
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
ByteTensor = torch.cuda.ByteTensor if use_cuda else torch.ByteTensor