def compute_loss(env, agent, theta_estimation, true_theta, phi, trajectory_data=None, num_episodes=100,is1d=False): if trajectory_data is None: states, actions, tasks=trajectory(agent, torch.Tensor(phi), torch.Tensor(true_theta), env, num_episodes, is1d=is1d) else: actions=trajectory_data['actions'] tasks=trajectory_data['tasks'] theta_estimation=torch.nn.Parameter(torch.Tensor(theta_estimation)) loss = getLoss(agent, actions, tasks, torch.Tensor(phi), theta_estimation, env,states=states) return loss
def compute_H(env, agent, theta_estimation, true_theta, phi, trajectory_data=None,H_dim=9, num_episodes=100,is1d=False): states, actions, tasks=trajectory(agent, torch.Tensor(phi), torch.Tensor(true_theta), env, num_episodes,is1d=is1d) theta_estimation=torch.nn.Parameter(torch.Tensor(theta_estimation)) phi=torch.nn.Parameter(torch.Tensor(phi)) phi.requires_grad=False loss = get_loss(agent, actions, tasks, phi, theta_estimation, env,states=states, gpu=False) grads = torch.autograd.grad(loss, theta_estimation, create_graph=True,allow_unused=True)[0] H = torch.zeros(H_dim,H_dim) for i in range(H_dim): print(i) H[i] = torch.autograd.grad(grads[i], theta_estimation, retain_graph=True,allow_unused=True)[0].view(-1) return H
env = Model(arg) # build an environment agent = Agent(env.state_dim, env.action_dim, arg, filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001, device = "cpu") agent.load(filename) true_theta_log = [] final_theta_log = [] stderr_log = [] result_log = [] for num_thetas in range(10): true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range) true_theta_log.append(true_theta.data.clone()) x_traj, obs_traj, a_traj, _ = trajectory(agent, true_theta, arg.INVERSE_BATCH_SIZE, env, arg, arg.gains_range, arg.std_range, arg.goal_radius_range) # generate true trajectory true_loss = getLoss(agent, x_traj, obs_traj, a_traj, true_theta, env, arg.gains_range, arg.std_range) # this is the lower bound of loss? #theta = nn.Parameter(true_theta.data.clone()+0.5*true_theta.data.clone()) theta = nn.Parameter(reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range)) ini_theta = theta.data.clone() loss_log = deque(maxlen=5000) theta_log = deque(maxlen=5000) optT = torch.optim.Adam([theta], lr=1e-3) prev_loss = 100000 loss_diff = deque(maxlen=5)
def run_inverse(data=None,theta=None,filename=None): import os import warnings warnings.filterwarnings('ignore') from copy import copy import time import random seed=time.time().as_integer_ratio()[0] seed=0 random.seed(seed) import torch torch.manual_seed(seed) import numpy as np np.random.seed(int(seed)) from numpy import pi torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # -----------invser functions------------- from InverseFuncs import trajectory, getLoss, reset_theta, theta_range,reset_theta_log, single_inverse # ---------loading env and agent---------- from stable_baselines import DDPG,TD3 from FireflyEnv import ffenv_new_cord from Config import Config arg = Config() DISCOUNT_FACTOR = 0.99 arg.NUM_SAMPLES=2 arg.NUM_EP = 1000 arg.NUM_IT = 2 # number of iteration for gradient descent arg.NUM_thetas = 1 arg.ADAM_LR = 0.007 arg.LR_STEP = 2 arg.LR_STOP = 50 arg.lr_gamma = 0.95 arg.PI_STD=1 arg.goal_radius_range=[0.05,0.2] # agent convert to torch model import policy_torch baselines_mlp_model = TD3.load('trained_agent//TD_95gamma_mc_smallgoal_500000_9_24_1_6.zip') agent = policy_torch.copy_mlp_weights(baselines_mlp_model,layers=[128,128]) # loading enviorment, same as training env=ffenv_new_cord.FireflyAgentCenter(arg) env.agent_knows_phi=False true_theta_log = [] true_loss_log = [] true_loss_act_log = [] true_loss_obs_log = [] final_theta_log = [] stderr_log = [] result_log = [] number_update=100 if data is None: save_dict={'theta_estimations':[]} else: save_dict=data # use serval theta to inverse for num_thetas in range(arg.NUM_thetas): # make sure phi and true theta stay the same true_theta = torch.Tensor(data['true_theta']) env.presist_phi=True env.reset(phi=true_theta,theta=true_theta) # here we first testing teacher truetheta=phi case theta=torch.Tensor(data['theta_estimations'][0]) phi=torch.Tensor(data['phi']) save_dict['true_theta']=true_theta.data.clone().tolist() save_dict['phi']=true_theta.data.clone().tolist() save_dict['inital_theta']=theta.data.clone().tolist() for num_update in range(number_update): states, actions, tasks = trajectory( agent, phi, true_theta, env, arg.NUM_EP) result = single_theta_inverse(true_theta, phi, arg, env, agent, states, actions, tasks, filename, num_thetas, initial_theta=theta) save_dict['theta_estimations'].append(result.tolist()) if filename is None: savename=('inverse_data/' + filename + "EP" + str(arg.NUM_EP) + "updates" + str(number_update)+"sample"+str(arg.NUM_SAMPLES) +"IT"+ str(arg.NUM_IT) + '.pkl') torch.save(save_dict, savename) elif filename[:-4]=='.pkl': torch.save(save_dict, filename) else: torch.save(save_dict, (filename+'.pkf')) print(result) print('done')
arg.DELTA_T=0.2 arg.EPISODE_LEN=35 a=load_inverse_data('17_21_34') theta_trajectory=a['theta_estimations'] true_theta=a['true_theta'] theta_estimation=theta_trajectory[-1] phi=np.array(a['phi']) # no bg, faster env=ffac_1d.FireflyTrue1d_cpu(arg) baselines_mlp_model =TD3_torch.TD3.load('trained_agent/1d_1000000_9_16_22_20.zip') agent=baselines_mlp_model.actor agent.cpu() agent.requires_grad=False is1d=True H_dim=7 num_episodes=100 states, actions, tasks=trajectory(agent, torch.Tensor(phi), torch.Tensor(true_theta), env, num_episodes,is1d=is1d) theta_estimation=torch.nn.Parameter(torch.Tensor(theta_estimation)) phi=torch.nn.Parameter(torch.Tensor(phi)) phi.requires_grad=False loss = getLoss(agent, actions, tasks, phi, theta_estimation, env,states=states, gpu=False) grads = torch.autograd.grad(loss, theta_estimation, create_graph=True,allow_unused=True)[0] print(grads) H = torch.zeros(H_dim,H_dim) for i in range(H_dim): print(i) H[i] = torch.autograd.grad(grads[i], theta_estimation, retain_graph=True,allow_unused=True)[0].view(-1)