Ejemplo n.º 1
0
    def __init__(self,
                 policy,
                 teacher_env=None,
                 agent_env=None,
                 datasource='simulation',
                 phi=None,
                 theta=None):

        # init

        self.datasource = datasource
        self.policy = policy

        if self.datasource == 'simulation':
            self.teacher_env = ffenv.FireflyEnv(
            )  # init env, need setup later with arg
            self.agent_env = ffenv.FireflyEnv()

        elif self.datasource == 'behavior':
            self.agent_env = ffenv.FireflyEnv()
            pass
Ejemplo n.º 2
0
import matplotlib.pyplot as plt
from Config import Config
from FireflyEnv import ffenv
import numpy as np
from numpy import pi
import matplotlib.pyplot as plt
agent_name = "DDPG_selu_skip_96reward1000000_9 26 16 43"
num_episode = 20
arg = Config()
# arg.gains_range[0:2]=[0.9,0.91]
# arg.std_range=[0.02,0.03,0.02,0.03]

# arg.std_range=[0.0001,0.001,0.0001,0.001]
# arg.gains_range=[0.99,1.,0.99,1.]

env = ffenv.FireflyEnv(arg)
baselines_selu = DDPG.load(agent_name)
torch_model_selu = policy_torch.copy_mlp_weights(baselines_selu,
                                                 layers=[256, 256, 64, 32],
                                                 act_fn=nn.functional.selu)
torch_model_selu.name = 'selu'

# baselines_relu = DDPG.load("DDPG_theta")
# torch_model_relu = policy_torch.copy_mlp_weights(baselines_relu,layers=[32,64])
# torch_model_relu.name='relu'

agent = torch_model_selu

# create saving vars
all_ep = []
Ejemplo n.º 3
0

inverse_arg = Inverse_Config()
env_arg = Config()
# policy=DDPG.load("DDPG_ff")

# print(os.getcwd())

phi = reset_theta(inverse_arg.gains_range, inverse_arg.std_range,
                  inverse_arg.goal_radius_range)
# phi=torch.tensor([1.0537, 0.7328, 0.7053, 1.2038, 0.9661, 0.8689, 0.2930, 1.9330, 0.2000])
theta = reset_theta(inverse_arg.gains_range, inverse_arg.std_range,
                    inverse_arg.goal_radius_range)

# testing env
teacher_env = ffenv.FireflyEnv(env_arg)
agent_env = ffenv.FireflyEnv(env_arg)

teacher_env.assign_presist_phi(phi)
teacher_env.reset_theta = False
# print(teacher_env.theta)
agent_env.assign_presist_phi(theta)
agent_env.reset_theta = False
# print(agent_env.theta)
# print('state',agent_env.state)

# testing torch agent
import policy_torch
baselines_mlp_model = DDPG.load("DDPG_theta")
policy = policy_torch.copy_mlp_weights(baselines_mlp_model)
Ejemplo n.º 4
0
from Config import Config
from DDPGv2Agent.rewards import *  #reward

arg = Config()

import numpy as np
import time
import torch
from ff_policy.policy_selu import SoftPolicy
import tensorflow as tf
from stable_baselines.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise

action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(2),
                                            sigma=float(0.5) * np.ones(2))
# env=ffenv.FireflyEnv(arg,kwargs={'reward_function':return_reward_location})
env = ffenv.FireflyEnv(arg, kwargs={})

model = DDPG(
    MlpPolicy,
    env,
    verbose=1,
    tensorboard_log="./DDPG_tb/",
    full_tensorboard_log=False,
    action_noise=action_noise,
    gamma=0.99,
    memory_policy=None,
    eval_env=None,
    nb_train_steps=50,
    nb_rollout_steps=100,
    nb_eval_steps=100,
    param_noise=None,