#arg.noise_range = learning_arg['noise_range'] #ln(noise_var) # high SNR only : 10~100 arg.noise_range = [ np.log(0.01), np.log(0.1), np.log(np.pi / 4 / 100), np.log(np.pi / 4 / 10) ] # ln(noise_var): SNR=[100 easy, 10 middle] [vel min, vel max, ang min, ang max] arg.goal_radius_range = learning_arg['goal_radius_range'] arg.WORLD_SIZE = learning_arg['WORLD_SIZE'] arg.DELTA_T = learning_arg['DELTA_T'] arg.EPISODE_TIME = learning_arg['EPISODE_TIME'] arg.EPISODE_LEN = learning_arg['EPISODE_LEN'] env = Model(arg) # build an environment env.max_goal_radius = arg.goal_radius_range[ 1] # use the largest world size for goal radius env.box = arg.WORLD_SIZE agent = Agent(env.state_dim, env.action_dim, arg, filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001, device="cpu") agent.load(filename) """ final_theta_log = []
filename = '20191016-205855-221407' # agent information df = pd.read_csv('../firefly-inverse-data/data/' + filename + '_log.csv', usecols=['discount_factor','process gain forward', 'process gain angular', 'process noise std forward', 'process noise std angular', 'obs gain forward', 'obs gain angular', 'obs noise std forward', 'obs noise std angular', 'goal radius']) DISCOUNT_FACTOR = df['discount_factor'][0] arg.gains_range = [np.floor(df['process gain forward'].min()), np.ceil(df['process gain forward'].max()), np.floor(df['process gain angular'].min()), np.ceil(df['process gain angular'].max())] arg.std_range = [df['process noise std forward'].min(), df['process noise std forward'].max(), df['process noise std angular'].min(), df['process noise std angular'].max()] arg.goal_radius_range = [df['goal radius'].min(), df['goal radius'].max()] env = Model(arg) # build an environment agent = Agent(env.state_dim, env.action_dim, arg, filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001, device = "cpu") agent.load(filename) true_theta_log = [] final_theta_log = [] stderr_log = [] result_log = [] for num_thetas in range(10): true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range) true_theta_log.append(true_theta.data.clone()) x_traj, obs_traj, a_traj, _ = trajectory(agent, true_theta, arg.INVERSE_BATCH_SIZE, env, arg, arg.gains_range, arg.std_range, arg.goal_radius_range) # generate true trajectory true_loss = getLoss(agent, x_traj, obs_traj, a_traj, true_theta, env, arg.gains_range, arg.std_range) # this is the lower bound of loss?
import time import torch import numpy as np from numpy import pi from DDPGv2 import Agent, Noise from FireflyEnv import Model from FireflyEnv.gym_input import true_params from FireflyEnv.env_utils import inverseCholesky, ellipse, pos_init import matplotlib.pyplot as plt env = Model(*true_params) env2 = Model(*true_params) #env.Bstep.gains.data.copy_(torch.ones(2)) state_dim = env.state_dim action_dim = env.action_dim num_steps = int(env.episode_len) agent = Agent(state_dim, action_dim, hidden_dim=64, tau=0.001) #agent.load('pretrained/ddpg/best_ddpg_model_6.pth.tar') agent.load('pretrained/stop_model_2.pth.tar') noise = Noise(action_dim, mean=0., std=0.01) def R(x): R = np.eye(2) R[0,0], R[0,1] = np.cos(x), -np.sin(x) R[1,0], R[1,1] = np.sin(x), np.cos(x) return R def qvalue(state, action):
COLUMNS = [ 'total time', 'ep', 'std', 'time step', 'Policy NW loss', 'value NW loss', 'reward', 'avg_reward', 'goal', 'a_vel', 'a_ang', 'true_r', 'r', 'rel_ang', 'vel', 'ang_vel', 'vecL1', 'vecL2', 'vecL3', 'vecL4', 'vecL5', 'vecL6', 'vecL7', 'vecL8', 'vecL9', 'vecL10', 'vecL11', 'vecL12', 'vecL13', 'vecL14', 'vecL15', 'process gain forward', 'process gain angular', 'process noise lnvar fwd', 'process noise lnvar ang', 'obs gain forward', 'obs gain angular', 'obs noise lnvar fwd', 'obs noise lnvar ang', 'goal radius', 'batch size', 'box_size', 'std_step_size', 'discount_factor', 'num_epochs' ] ep_time_log = pd.DataFrame(columns=COLUMNS) env = Model(arg) # build an environment x, pro_gains, pro_noise_ln_vars, goal_radius = env.reset( arg.gains_range, arg.noise_range, arg.goal_radius_range) tot_t = 0. # number of total time steps episode = 0. # number of fireflies int_t = 1 # variable for changing the world setting every EPISODE_LEN time steps state_dim = env.state_dim action_dim = env.action_dim filename = arg.filename argument = arg.__dict__ torch.save(argument, arg.data_path + 'data/' + filename + '_arg.pkl') agent = Agent(state_dim,
#from DDPGv2.utils import shrink #from NAF import Agent, Noise from FireflyEnv import Model from FireflyEnv.gym_input import true_params from shutil import copyfile from collections import deque rewards = deque(maxlen=100) batch_size = 64 num_episodes = 2000 true_params = [p.data.clone() for p in true_params] #env = Model(n1, n2, gains, obs_gains, log_rew_width) env = Model(*true_params) #env.Bstep.gains.data.copy_(torch.ones(2)) state_dim = env.state_dim action_dim = env.action_dim num_steps = int(env.episode_len) std = 0.4 noise = Noise(action_dim, mean=0., std=std) agent = Agent(state_dim, action_dim, hidden_dim=128, tau=0.001) #agent.load('pretrained/ddpg/best_ddpg_model_7.pth.tar') agent.load('pretrained/ddpg_new/ddpg_model_10.pth.tar') #agent.load('pretrained/ddpg_circle/best_circle_model_2.pth.tar') """ best_avg = -90. best_file = agent.file.split('/') best_file[-1] = 'best_' + best_file[-1]
import torch import numpy as np from numpy import pi from DDPGv2 import Agent, Noise from DDPGv2.utils import shrink from FireflyEnv import Model, pos_init from FireflyEnv.gym_input import true_params true_params = [p.data.clone() for p in true_params] env = Model(*true_params) state_dim = env.state_dim action_dim = env.action_dim num_steps = int(env.episode_len) noise = Noise(action_dim, mean=0., std=0.05) agent = Agent(state_dim, action_dim, hidden_dim=64, tau=0.001) agentc = Agent(state_dim, action_dim, hidden_dim=64, tau=0.001) agent.load('pretrained/ddpg/best_ddpg_model_7.pth.tar') agentc.load('pretrained/ddpg_circle/best_circle_model_1.pth.tar') for i in range(10): coord = list(pos_init(3.)) coord[1] = -(pi / 2) * torch.ones(1) r, ang, rel_ang = coord pos0 = r * torch.cat([torch.cos(ang), torch.sin(ang)]) x = -rel_ang R = torch.stack([
""" df = pd.read_csv('../firefly-inverse-data/data/' + filename + '_log.csv', usecols=['discount_factor','process gain forward', 'process gain angular', 'process noise std forward', 'process noise std angular', 'obs gain forward', 'obs gain angular', 'obs noise std forward', 'obs noise std angular', 'goal radius']) DISCOUNT_FACTOR = df['discount_factor'][0] arg.gains_range = [np.floor(df['process gain forward'].min()), np.ceil(df['process gain forward'].max()), np.floor(df['process gain angular'].min()), np.ceil(df['process gain angular'].max())] arg.std_range = [df['process noise std forward'].min(), df['process noise std forward'].max(), df['process noise std angular'].min(), df['process noise std angular'].max()] arg.goal_radius_range = [df['goal radius'].min(), df['goal radius'].max()] """ env = Model(arg) # build an environment env.box = arg.WORLD_SIZE env.min_goal_radius = arg.goal_radius_range[0] agent = Agent(env.state_dim, env.action_dim, arg, filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001) #, device = "cpu") agent.load(filename) # true theta true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range) x_traj, obs_traj, a_traj, _ = trajectory( agent, true_theta, arg.INVERSE_BATCH_SIZE, env, arg, arg.gains_range,