#arg.noise_range = learning_arg['noise_range'] #ln(noise_var)

    # high SNR only : 10~100
    arg.noise_range = [
        np.log(0.01),
        np.log(0.1),
        np.log(np.pi / 4 / 100),
        np.log(np.pi / 4 / 10)
    ]  # ln(noise_var): SNR=[100 easy, 10 middle] [vel min, vel max, ang min, ang max]
    arg.goal_radius_range = learning_arg['goal_radius_range']
    arg.WORLD_SIZE = learning_arg['WORLD_SIZE']
    arg.DELTA_T = learning_arg['DELTA_T']
    arg.EPISODE_TIME = learning_arg['EPISODE_TIME']
    arg.EPISODE_LEN = learning_arg['EPISODE_LEN']

    env = Model(arg)  # build an environment
    env.max_goal_radius = arg.goal_radius_range[
        1]  # use the largest world size for goal radius
    env.box = arg.WORLD_SIZE
    agent = Agent(env.state_dim,
                  env.action_dim,
                  arg,
                  filename,
                  hidden_dim=128,
                  gamma=DISCOUNT_FACTOR,
                  tau=0.001,
                  device="cpu")
    agent.load(filename)
    """
    
    final_theta_log = []
filename = '20191016-205855-221407' # agent information
df = pd.read_csv('../firefly-inverse-data/data/' + filename + '_log.csv',
                 usecols=['discount_factor','process gain forward', 'process gain angular', 'process noise std forward',
                          'process noise std angular', 'obs gain forward', 'obs gain angular', 'obs noise std forward',
                          'obs noise std angular', 'goal radius'])

DISCOUNT_FACTOR = df['discount_factor'][0]
arg.gains_range = [np.floor(df['process gain forward'].min()), np.ceil(df['process gain forward'].max()),
               np.floor(df['process gain angular'].min()), np.ceil(df['process gain angular'].max())]

arg.std_range = [df['process noise std forward'].min(), df['process noise std forward'].max(),
               df['process noise std angular'].min(), df['process noise std angular'].max()]
arg.goal_radius_range = [df['goal radius'].min(), df['goal radius'].max()]


env = Model(arg) # build an environment
agent = Agent(env.state_dim, env.action_dim, arg,  filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001, device = "cpu")
agent.load(filename)


true_theta_log = []
final_theta_log = []
stderr_log = []
result_log = []

for num_thetas in range(10):

    true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range)
    true_theta_log.append(true_theta.data.clone())
    x_traj, obs_traj, a_traj, _ = trajectory(agent, true_theta, arg.INVERSE_BATCH_SIZE, env, arg, arg.gains_range, arg.std_range, arg.goal_radius_range) # generate true trajectory
    true_loss = getLoss(agent, x_traj, obs_traj, a_traj, true_theta, env, arg.gains_range, arg.std_range) # this is the lower bound of loss?
예제 #3
0
파일: visualize.py 프로젝트: svd3/Firefly
import time
import torch
import numpy as np
from numpy import pi
from DDPGv2 import Agent, Noise

from FireflyEnv import Model
from FireflyEnv.gym_input import true_params
from FireflyEnv.env_utils import inverseCholesky, ellipse, pos_init

import matplotlib.pyplot as plt

env = Model(*true_params)
env2 = Model(*true_params)
#env.Bstep.gains.data.copy_(torch.ones(2))
state_dim = env.state_dim
action_dim = env.action_dim
num_steps = int(env.episode_len)

agent = Agent(state_dim, action_dim, hidden_dim=64, tau=0.001)
#agent.load('pretrained/ddpg/best_ddpg_model_6.pth.tar')
agent.load('pretrained/stop_model_2.pth.tar')
noise = Noise(action_dim, mean=0., std=0.01)

def R(x):
    R = np.eye(2)
    R[0,0], R[0,1] = np.cos(x), -np.sin(x)
    R[1,0], R[1,1] = np.sin(x), np.cos(x)
    return R

def qvalue(state, action):
예제 #4
0
    COLUMNS = [
        'total time', 'ep', 'std', 'time step', 'Policy NW loss',
        'value NW loss', 'reward', 'avg_reward', 'goal', 'a_vel', 'a_ang',
        'true_r', 'r', 'rel_ang', 'vel', 'ang_vel', 'vecL1', 'vecL2', 'vecL3',
        'vecL4', 'vecL5', 'vecL6', 'vecL7', 'vecL8', 'vecL9', 'vecL10',
        'vecL11', 'vecL12', 'vecL13', 'vecL14', 'vecL15',
        'process gain forward', 'process gain angular',
        'process noise lnvar fwd', 'process noise lnvar ang',
        'obs gain forward', 'obs gain angular', 'obs noise lnvar fwd',
        'obs noise lnvar ang', 'goal radius', 'batch size', 'box_size',
        'std_step_size', 'discount_factor', 'num_epochs'
    ]

    ep_time_log = pd.DataFrame(columns=COLUMNS)

    env = Model(arg)  # build an environment
    x, pro_gains, pro_noise_ln_vars, goal_radius = env.reset(
        arg.gains_range, arg.noise_range, arg.goal_radius_range)

    tot_t = 0.  # number of total time steps
    episode = 0.  # number of fireflies
    int_t = 1  # variable for changing the world setting every EPISODE_LEN time steps

    state_dim = env.state_dim
    action_dim = env.action_dim
    filename = arg.filename

    argument = arg.__dict__
    torch.save(argument, arg.data_path + 'data/' + filename + '_arg.pkl')

    agent = Agent(state_dim,
예제 #5
0
#from DDPGv2.utils import shrink
#from NAF import Agent, Noise

from FireflyEnv import Model
from FireflyEnv.gym_input import true_params

from shutil import copyfile
from collections import deque
rewards = deque(maxlen=100)

batch_size = 64
num_episodes = 2000

true_params = [p.data.clone() for p in true_params]
#env = Model(n1, n2, gains, obs_gains, log_rew_width)
env = Model(*true_params)
#env.Bstep.gains.data.copy_(torch.ones(2))
state_dim = env.state_dim
action_dim = env.action_dim
num_steps = int(env.episode_len)

std = 0.4
noise = Noise(action_dim, mean=0., std=std)
agent = Agent(state_dim, action_dim, hidden_dim=128, tau=0.001)
#agent.load('pretrained/ddpg/best_ddpg_model_7.pth.tar')
agent.load('pretrained/ddpg_new/ddpg_model_10.pth.tar')
#agent.load('pretrained/ddpg_circle/best_circle_model_2.pth.tar')
"""
best_avg = -90.
best_file = agent.file.split('/')
best_file[-1] = 'best_' + best_file[-1]
예제 #6
0
import torch
import numpy as np
from numpy import pi

from DDPGv2 import Agent, Noise
from DDPGv2.utils import shrink

from FireflyEnv import Model, pos_init
from FireflyEnv.gym_input import true_params

true_params = [p.data.clone() for p in true_params]

env = Model(*true_params)
state_dim = env.state_dim
action_dim = env.action_dim
num_steps = int(env.episode_len)

noise = Noise(action_dim, mean=0., std=0.05)
agent = Agent(state_dim, action_dim, hidden_dim=64, tau=0.001)
agentc = Agent(state_dim, action_dim, hidden_dim=64, tau=0.001)

agent.load('pretrained/ddpg/best_ddpg_model_7.pth.tar')
agentc.load('pretrained/ddpg_circle/best_circle_model_1.pth.tar')

for i in range(10):
    coord = list(pos_init(3.))
    coord[1] = -(pi / 2) * torch.ones(1)
    r, ang, rel_ang = coord
    pos0 = r * torch.cat([torch.cos(ang), torch.sin(ang)])
    x = -rel_ang
    R = torch.stack([
예제 #7
0
"""
df = pd.read_csv('../firefly-inverse-data/data/' + filename + '_log.csv',
                 usecols=['discount_factor','process gain forward', 'process gain angular', 'process noise std forward',
                          'process noise std angular', 'obs gain forward', 'obs gain angular', 'obs noise std forward',
                          'obs noise std angular', 'goal radius'])

DISCOUNT_FACTOR = df['discount_factor'][0]
arg.gains_range = [np.floor(df['process gain forward'].min()), np.ceil(df['process gain forward'].max()),
               np.floor(df['process gain angular'].min()), np.ceil(df['process gain angular'].max())]

arg.std_range = [df['process noise std forward'].min(), df['process noise std forward'].max(),
               df['process noise std angular'].min(), df['process noise std angular'].max()]
arg.goal_radius_range = [df['goal radius'].min(), df['goal radius'].max()]
"""

env = Model(arg)  # build an environment
env.box = arg.WORLD_SIZE
env.min_goal_radius = arg.goal_radius_range[0]
agent = Agent(env.state_dim,
              env.action_dim,
              arg,
              filename,
              hidden_dim=128,
              gamma=DISCOUNT_FACTOR,
              tau=0.001)  #, device = "cpu")
agent.load(filename)

# true theta
true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range)
x_traj, obs_traj, a_traj, _ = trajectory(
    agent, true_theta, arg.INVERSE_BATCH_SIZE, env, arg, arg.gains_range,