Esempio n. 1
0
def mytest(env_name,
           eval_episode=10,
           num_init_traj=1,
           max_horizon=15,
           ensemble=1,
           gt=0,
           finetune=False,
           finetune_iter=41,
           finetune_proc=10,
           cem_iter=20):
    NUMS = {
        'HalfCheetahPT-v2': 6,
        'HopperPT-v2': 5,
        'Walker2dPT-v2': 8,
    }

    num = NUMS[env_name]

    if not finetune:
        policy_net = get_awr_network(env_name, num)
    else:
        policy_net = get_finetune_network(env_name,
                                          num,
                                          num_iter=finetune_iter,
                                          num_proc=finetune_proc)

    model = make_parallel(10, env_name, num=num, stochastic=False)
    env = make(env_name, num=num, resample_MP=True, stochastic=False)

    params = get_params(env)
    mean_params = np.array([0.5] * len(params))
    osi = CEMOSI(model,
                 mean_params,
                 iter_num=cem_iter,
                 num_mutation=100,
                 num_elite=10,
                 std=0.3)

    rewards, dist = online_osi(env,
                               osi,
                               policy_net,
                               num_init_traj=num_init_traj,
                               max_horizon=max_horizon,
                               eval_episodes=eval_episode,
                               use_state=False,
                               print_timestep=10000,
                               resample_MP=True,
                               ensemble=ensemble,
                               online=0,
                               gt=gt)
    rewards = np.array(rewards)
    print('l2 distance', dist)
    print('rewards', rewards)
    return {
        'mean': rewards.mean(),
        'std': rewards.std(),
        'min': rewards.min(),
        'max': rewards.max(),
        'dist': dist.mean(),
    }
Esempio n. 2
0
def test_up_diff():
    env_name = 'HopperPT-v2'
    num = 5

    policy_net = get_awr_network(env_name, num)

    model = make_parallel(30, env_name, num=num, stochastic=False)
    env = make(env_name, num=num, resample_MP=True, stochastic=False)

    params = get_params(env)
    #set_params(env, [0.55111654,0.55281674,0.46355396,0.84531834,0.58944066])
    set_params(env,
               [0.31851129, 0.93941556, 0.02147825, 0.43523052, 1.02611646])
    set_params(env,
               [0.94107358, 0.77519005, 0.44055224, 0.9369426, -0.03846457])
    set_params(env,
               [0.05039606, 0.14680257, 0.56502066, 0.25723492, 0.73810709])

    mean_params = np.array([0.5] * len(params))
    osi = DiffOSI(model, mean_params, 0.001, iter=100, momentum=0.9, eps=1e-3)
    policy_net.set_params(mean_params)

    # I run this at the last time..
    # online is very useful ..
    online_osi(env,
               osi,
               policy_net,
               num_init_traj=5,
               max_horizon=15,
               eval_episodes=20,
               use_state=False,
               print_timestep=10000,
               resample_MP=True,
               online=0)
Esempio n. 3
0
def test_UP():
    env_name = 'DartHopperPT-v1'
    num = 5

    policy_net = get_up_network(env_name, num)

    env = make(env_name, num=num, resample_MP=True)
    eval_policy(policy_net,
                env,
                10,
                0,
                None,
                timestep=1000,
                use_state=False,
                set_gt_params=True)
Esempio n. 4
0
def test_up_osi():
    #env_name = 'DartHopperPT-v1'
    env_name = 'HopperPT-v2'
    num = 5

    #policy_net = get_up_network(env_name, num)
    policy_net = get_awr_network(env_name, num)

    model = make_parallel(10, env_name, num=num, stochastic=False)
    env = make(env_name, num=num, resample_MP=True, stochastic=False)

    params = get_params(env)
    #set_params(env, [0.55111654,0.55281674,0.46355396,0.84531834,0.58944066])
    set_params(env,
               [0.31851129, 0.93941556, 0.02147825, 0.43523052, 1.02611646])
    set_params(env,
               [0.94107358, 0.77519005, 0.44055224, 0.9369426, -0.03846457])
    set_params(env,
               [0.05039606, 0.14680257, 0.56502066, 0.25723492, 0.73810709])

    mean_params = np.array([0.5] * len(params))
    osi = CEMOSI(model,
                 mean_params,
                 iter_num=20,
                 num_mutation=100,
                 num_elite=10,
                 std=0.3)
    policy_net.set_params(mean_params)

    online_osi(env,
               osi,
               policy_net,
               num_init_traj=5,
               max_horizon=15,
               eval_episodes=30,
               use_state=False,
               print_timestep=10000,
               resample_MP=True,
               ensemble=1,
               online=0,
               gt=0)
Esempio n. 5
0
def test_POLO():
    #env_name = 'DartWalker2dPT-v1'
    #num = 8
    env_name = 'DartHopperPT-v1'
    num = 5

    value_net = get_td3_value(env_name)
    #value_net = None

    parser = argparse.ArgumentParser()
    add_parser(parser)
    args = parser.parse_args()

    model = make_parallel(args.num_proc, env_name, num=num, stochastic=True)
    env = make(env_name, num=num, resample_MP=False)

    controller = POLO(value_net,
                      model,
                      action_space=env.action_space,
                      add_actions=args.add_actions,
                      horizon=args.horizon,
                      std=args.std,
                      iter_num=args.iter_num,
                      initial_iter=args.initial_iter,
                      num_mutation=args.num_mutation,
                      num_elite=args.num_elite,
                      alpha=0.1,
                      trunc_norm=True,
                      lower_bound=env.action_space.low,
                      upper_bound=env.action_space.high,
                      replan_period=5)

    trajectories = eval_policy(controller,
                               env,
                               10,
                               args.video_num,
                               args.video_path,
                               timestep=args.timestep,
                               set_gt_params=True,
                               print_timestep=100)
Esempio n. 6
0
import tensorflow as tf
import model
import data

# 訓練データ作成担当
g = data.Data()
# GPUをすべて使わないオプション
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
tf.keras.backend.set_session(sess)
# モデルを作成
model = model.make(tflite=False)
# 最適化を定義
optimizer = tf.keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optimizer,
              loss="categorical_crossentropy",
              metrics=["categorical_accuracy"])


# コールバック
class Callback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        "各エポック終了時に重みを保存する"
        model.save("weight.hdf5")


cb = Callback()
# 途中から学習する場合
initial_epoch = 0
if initial_epoch >= 1:
Esempio n. 7
0
def test_cem_osi():
    env_name = 'HopperPT-v3'
    num = 5

    from networks import get_td3_value
    #value_net = get_td3_value(env_name)
    value_net = None

    from policy import POLO, add_parser
    import argparse

    parser = argparse.ArgumentParser()
    add_parser(parser)
    args = parser.parse_args()
    args.num_proc = 20

    model = make_parallel(args.num_proc, env_name, num=num, stochastic=True)
    env = make(env_name, num=num, resample_MP=True)

    #args.iter_num = 2
    args.num_mutation = 500
    #args.num_mutation = 100
    args.iter_num = 5
    args.num_elite = 10

    policy_net = POLO(value_net,
                      model,
                      action_space=env.action_space,
                      add_actions=args.add_actions,
                      horizon=args.horizon,
                      std=args.std,
                      iter_num=args.iter_num,
                      initial_iter=args.initial_iter,
                      num_mutation=args.num_mutation,
                      num_elite=args.num_elite,
                      alpha=0.1,
                      trunc_norm=True,
                      lower_bound=env.action_space.low,
                      upper_bound=env.action_space.high)

    resample_MP = True
    env = make(env_name, num=num, resample_MP=resample_MP, stochastic=False)

    params = get_params(env)

    print("FIXXXXXXXXXXXXXXXXXXXXXXPARAMETERS")
    set_params(
        env,
        np.array([0.58093299, 0.05418986, 0.93399553, 0.1678795, 1.04150952]))
    set_params(env,
               [0.55111654, 0.55281674, 0.46355396, 0.84531834, 0.58944066])
    set_params(env,
               [0.31851129, 0.93941556, 0.02147825, 0.43523052, 1.02611646])
    set_params(env, [0.58589476, 0.11078934, 0.348238, 0.68130195, 0.98376274])

    mean_params = np.array([0.5] * len(params))
    osi = CEMOSI(model,
                 mean_params,
                 iter_num=20,
                 num_mutation=100,
                 num_elite=10,
                 std=0.3,
                 ensemble_num=5)
    policy_net.set_params(mean_params)
    print(get_params(env))

    online_osi(env,
               osi,
               policy_net,
               num_init_traj=1,
               max_horizon=15,
               eval_episodes=10,
               use_state=True,
               print_timestep=10,
               resample_MP=resample_MP,
               online=0,
               ensemble=5)
Esempio n. 8
0
from model import make
import numpy as np
import sys
import os

# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

Dp = 10
Hp = 175
Wp = 175
T = 35

model = make(Dp, Hp, Wp, T)

model.summary()

model.compile('sgd', ['mse', 'mse'])

#Dp = 10
#Hp = 400
#Wp = 352
#T = 35

# TODO: data preparation, i.e. grouping and sampling (this is done outside the network)
x = np.random.random((1, Dp, Hp, Wp, T, 7))
y1 = np.random.random((1, Hp // 2, Wp // 2, 2))
y2 = np.random.random((1, Hp // 2, Wp // 2, 14))


def gen():
    yield x, [y1, y2]
Esempio n. 9
0
import tensorflow as tf
import model
import shutil

# Kerasのモデルを読み込む
model = model.make(tflite=True)
# ニューラルの重みを読み込む
model.load_weights("weight.hdf5")
# TensorFlowのセッションを取得
sess = tf.keras.backend.get_session()
# SavedModelを出力
shutil.rmtree("saved_model/", True)
tf.saved_model.simple_save(sess,
                           "saved_model/",
                           inputs={'input': model.inputs[0]},
                           outputs={'output': model.outputs[0]})