コード例 #1
0
ファイル: atest.py プロジェクト: yizhoucc/ffsb
def compute_loss(env, agent, theta_estimation, true_theta, phi, trajectory_data=None, num_episodes=100,is1d=False):
  if trajectory_data is None:
    states, actions, tasks=trajectory(agent, torch.Tensor(phi), torch.Tensor(true_theta), env, num_episodes, is1d=is1d)
  else:
    actions=trajectory_data['actions']
    tasks=trajectory_data['tasks']
  theta_estimation=torch.nn.Parameter(torch.Tensor(theta_estimation))
  loss = getLoss(agent, actions, tasks, torch.Tensor(phi), theta_estimation, env,states=states)
  return loss
コード例 #2
0
ファイル: atest.py プロジェクト: yizhoucc/ffsb
def compute_H(env, agent, theta_estimation, true_theta, phi, trajectory_data=None,H_dim=9, num_episodes=100,is1d=False):
  states, actions, tasks=trajectory(agent, torch.Tensor(phi), torch.Tensor(true_theta), env, num_episodes,is1d=is1d)
  theta_estimation=torch.nn.Parameter(torch.Tensor(theta_estimation))
  phi=torch.nn.Parameter(torch.Tensor(phi))
  phi.requires_grad=False
  loss = get_loss(agent, actions, tasks, phi, theta_estimation, env,states=states, gpu=False)
  grads = torch.autograd.grad(loss, theta_estimation, create_graph=True,allow_unused=True)[0]
  H = torch.zeros(H_dim,H_dim)
  for i in range(H_dim):
      print(i)
      H[i] = torch.autograd.grad(grads[i], theta_estimation, retain_graph=True,allow_unused=True)[0].view(-1)
  return H
コード例 #3
0
env = Model(arg) # build an environment
agent = Agent(env.state_dim, env.action_dim, arg,  filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001, device = "cpu")
agent.load(filename)


true_theta_log = []
final_theta_log = []
stderr_log = []
result_log = []

for num_thetas in range(10):

    true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range)
    true_theta_log.append(true_theta.data.clone())
    x_traj, obs_traj, a_traj, _ = trajectory(agent, true_theta, arg.INVERSE_BATCH_SIZE, env, arg, arg.gains_range, arg.std_range, arg.goal_radius_range) # generate true trajectory
    true_loss = getLoss(agent, x_traj, obs_traj, a_traj, true_theta, env, arg.gains_range, arg.std_range) # this is the lower bound of loss?


    #theta = nn.Parameter(true_theta.data.clone()+0.5*true_theta.data.clone())
    theta = nn.Parameter(reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range))
    ini_theta = theta.data.clone()


    loss_log = deque(maxlen=5000)
    theta_log = deque(maxlen=5000)
    optT = torch.optim.Adam([theta], lr=1e-3)
    prev_loss = 100000
    loss_diff = deque(maxlen=5)

コード例 #4
0
ファイル: atest.py プロジェクト: yizhoucc/ffsb
def run_inverse(data=None,theta=None,filename=None):
  import os
  import warnings
  warnings.filterwarnings('ignore')
  from copy import copy
  import time
  import random
  seed=time.time().as_integer_ratio()[0]
  seed=0
  random.seed(seed)
  import torch
  torch.manual_seed(seed)
  import numpy as np
  np.random.seed(int(seed))
  from numpy import pi
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False
  # -----------invser functions-------------
  from InverseFuncs import trajectory, getLoss, reset_theta, theta_range,reset_theta_log, single_inverse
  # ---------loading env and agent----------
  from stable_baselines import DDPG,TD3
  from FireflyEnv import ffenv_new_cord
  from Config import Config
  arg = Config()
  DISCOUNT_FACTOR = 0.99
  arg.NUM_SAMPLES=2
  arg.NUM_EP = 1000
  arg.NUM_IT = 2 # number of iteration for gradient descent
  arg.NUM_thetas = 1
  arg.ADAM_LR = 0.007
  arg.LR_STEP = 2
  arg.LR_STOP = 50
  arg.lr_gamma = 0.95
  arg.PI_STD=1
  arg.goal_radius_range=[0.05,0.2]


  # agent convert to torch model
  import policy_torch
  baselines_mlp_model = TD3.load('trained_agent//TD_95gamma_mc_smallgoal_500000_9_24_1_6.zip')
  agent = policy_torch.copy_mlp_weights(baselines_mlp_model,layers=[128,128])

  # loading enviorment, same as training
  env=ffenv_new_cord.FireflyAgentCenter(arg)
  env.agent_knows_phi=False

  true_theta_log = []
  true_loss_log = []
  true_loss_act_log = []
  true_loss_obs_log = []
  final_theta_log = []
  stderr_log = []
  result_log = []
  number_update=100
  if data is None:
    save_dict={'theta_estimations':[]}
  else:
    save_dict=data


  # use serval theta to inverse
  for num_thetas in range(arg.NUM_thetas):

      # make sure phi and true theta stay the same 
      true_theta = torch.Tensor(data['true_theta'])
      env.presist_phi=True
      env.reset(phi=true_theta,theta=true_theta) # here we first testing teacher truetheta=phi case
      theta=torch.Tensor(data['theta_estimations'][0])
      phi=torch.Tensor(data['phi'])
  

      save_dict['true_theta']=true_theta.data.clone().tolist()
      save_dict['phi']=true_theta.data.clone().tolist()
      save_dict['inital_theta']=theta.data.clone().tolist()


      for num_update in range(number_update):
          states, actions, tasks = trajectory(
              agent, phi, true_theta, env, arg.NUM_EP)
              
          result = single_theta_inverse(true_theta, phi, arg, env, agent, states, actions, tasks, filename, num_thetas, initial_theta=theta)
          
          save_dict['theta_estimations'].append(result.tolist())
          if filename is None:
            savename=('inverse_data/' + filename + "EP" + str(arg.NUM_EP) + "updates" + str(number_update)+"sample"+str(arg.NUM_SAMPLES) +"IT"+ str(arg.NUM_IT) + '.pkl')
            torch.save(save_dict, savename)
          elif filename[:-4]=='.pkl':
            torch.save(save_dict, filename)
          else:
            torch.save(save_dict, (filename+'.pkf'))

          print(result)

  print('done')
コード例 #5
0
ファイル: atest.py プロジェクト: yizhoucc/ffsb
arg.DELTA_T=0.2
arg.EPISODE_LEN=35
a=load_inverse_data('17_21_34')
theta_trajectory=a['theta_estimations']
true_theta=a['true_theta']
theta_estimation=theta_trajectory[-1]
phi=np.array(a['phi'])
# no bg, faster
env=ffac_1d.FireflyTrue1d_cpu(arg)
baselines_mlp_model =TD3_torch.TD3.load('trained_agent/1d_1000000_9_16_22_20.zip')
agent=baselines_mlp_model.actor

agent.cpu()
agent.requires_grad=False

is1d=True
H_dim=7
num_episodes=100
states, actions, tasks=trajectory(agent, torch.Tensor(phi), torch.Tensor(true_theta), env, num_episodes,is1d=is1d)
theta_estimation=torch.nn.Parameter(torch.Tensor(theta_estimation))
phi=torch.nn.Parameter(torch.Tensor(phi))
phi.requires_grad=False
loss = getLoss(agent, actions, tasks, phi, theta_estimation, env,states=states, gpu=False)
grads = torch.autograd.grad(loss, theta_estimation, create_graph=True,allow_unused=True)[0]
print(grads)
H = torch.zeros(H_dim,H_dim)
for i in range(H_dim):
    print(i)
    H[i] = torch.autograd.grad(grads[i], theta_estimation, retain_graph=True,allow_unused=True)[0].view(-1)