Beispiel #1
0
arg.lr_gamma = 0.95
arg.PI_STD=1
arg.goal_radius_range=[0.1,0.3]
arg.TERMINAL_VEL = 0.025
arg.goal_radius_range=[0.15,0.3]
arg.std_range = [0.02,0.3,0.02,0.3]
arg.TERMINAL_VEL = 0.025  # terminal velocity? # norm(action) that you believe as a signal to stop 0.1.
arg.DELTA_T=0.2
arg.EPISODE_LEN=35

number_updates=100

# agent convert to torch model
import policy_torch
baselines_mlp_model =TD3.load('trained_agent/accac_final_1000000_9_11_20_25.zip')
agent = policy_torch.copy_mlp_weights(baselines_mlp_model,layers=[512,512],n_inputs=32)

# loading enviorment, same as training
env=firefly_accac.FireflyAccAc(arg)
# ---seting the env for inverse----
# TODO, move it to a function of env
env.agent_knows_phi=False


for i in range(10):
    filename=(str(time.localtime().tm_mday)+'_'+str(time.localtime().tm_hour)+'_'+str(time.localtime().tm_min))
    single_theta_inverse(arg, env, agent, filename, 
                    number_updates=number_updates,
                    true_theta=None, 
                    phi=None,
                    init_theta=None,
Beispiel #2
0
arg.NUM_thetas = 1
arg.ADAM_LR = 0.1
arg.LR_STEP = 2
arg.LR_STOP = 0.003
arg.lr_gamma = 0.95
arg.PI_STD = 1
arg.goal_radius_range = [0.1, 0.3]
arg.TERMINAL_VEL = 0.025
number_updates = 100

# agent convert to torch model
import policy_torch
baselines_mlp_model = TD3.load(
    'trained_agent//acc_retrain_1000000_2_18_21_4.zip')
agent = policy_torch.copy_mlp_weights(baselines_mlp_model,
                                      layers=[128, 128],
                                      n_inputs=30)

# loading enviorment, same as training
env = firefly_acc.FireflyAcc(arg)
# ---seting the env for inverse----
# TODO, move it to a function of env
env.agent_knows_phi = False

for i in range(10):
    filename = ("test_acc_EP" + str(arg.NUM_EP) + "updates" +
                str(number_updates) + "lr" + str(arg.ADAM_LR) + 'step' +
                str(arg.LR_STEP) + str(time.localtime().tm_mday) + '_' +
                str(time.localtime().tm_hour) + '_' +
                str(time.localtime().tm_min))
    single_theta_inverse(arg,
Beispiel #3
0
import numpy as np
from numpy import pi
import matplotlib.pyplot as plt
agent_name = "DDPG_selu_skip_96reward1000000_9 26 16 43"
num_episode = 20
arg = Config()
# arg.gains_range[0:2]=[0.9,0.91]
# arg.std_range=[0.02,0.03,0.02,0.03]

# arg.std_range=[0.0001,0.001,0.0001,0.001]
# arg.gains_range=[0.99,1.,0.99,1.]

env = ffenv.FireflyEnv(arg)
baselines_selu = DDPG.load(agent_name)
torch_model_selu = policy_torch.copy_mlp_weights(baselines_selu,
                                                 layers=[256, 256, 64, 32],
                                                 act_fn=nn.functional.selu)
torch_model_selu.name = 'selu'

# baselines_relu = DDPG.load("DDPG_theta")
# torch_model_relu = policy_torch.copy_mlp_weights(baselines_relu,layers=[32,64])
# torch_model_relu.name='relu'

agent = torch_model_selu

# create saving vars
all_ep = []

# for ecah episode,
for i in range(num_episode):
    ep_data = {}
Beispiel #4
0
 def load_policy(self):
     '''load policy'''
     sbpolicy=DDPG.load("DDPG_theta") # 100k step trained, with std noise.
     # convert to torch policy
     return policy_torch.copy_mlp_weights(sbpolicy)
Beispiel #5
0
def run_inverse(data=None,theta=None,filename=None):
  import os
  import warnings
  warnings.filterwarnings('ignore')
  from copy import copy
  import time
  import random
  seed=time.time().as_integer_ratio()[0]
  seed=0
  random.seed(seed)
  import torch
  torch.manual_seed(seed)
  import numpy as np
  np.random.seed(int(seed))
  from numpy import pi
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False
  # -----------invser functions-------------
  from InverseFuncs import trajectory, getLoss, reset_theta, theta_range,reset_theta_log, single_inverse
  # ---------loading env and agent----------
  from stable_baselines import DDPG,TD3
  from FireflyEnv import ffenv_new_cord
  from Config import Config
  arg = Config()
  DISCOUNT_FACTOR = 0.99
  arg.NUM_SAMPLES=2
  arg.NUM_EP = 1000
  arg.NUM_IT = 2 # number of iteration for gradient descent
  arg.NUM_thetas = 1
  arg.ADAM_LR = 0.007
  arg.LR_STEP = 2
  arg.LR_STOP = 50
  arg.lr_gamma = 0.95
  arg.PI_STD=1
  arg.goal_radius_range=[0.05,0.2]


  # agent convert to torch model
  import policy_torch
  baselines_mlp_model = TD3.load('trained_agent//TD_95gamma_mc_smallgoal_500000_9_24_1_6.zip')
  agent = policy_torch.copy_mlp_weights(baselines_mlp_model,layers=[128,128])

  # loading enviorment, same as training
  env=ffenv_new_cord.FireflyAgentCenter(arg)
  env.agent_knows_phi=False

  true_theta_log = []
  true_loss_log = []
  true_loss_act_log = []
  true_loss_obs_log = []
  final_theta_log = []
  stderr_log = []
  result_log = []
  number_update=100
  if data is None:
    save_dict={'theta_estimations':[]}
  else:
    save_dict=data


  # use serval theta to inverse
  for num_thetas in range(arg.NUM_thetas):

      # make sure phi and true theta stay the same 
      true_theta = torch.Tensor(data['true_theta'])
      env.presist_phi=True
      env.reset(phi=true_theta,theta=true_theta) # here we first testing teacher truetheta=phi case
      theta=torch.Tensor(data['theta_estimations'][0])
      phi=torch.Tensor(data['phi'])
  

      save_dict['true_theta']=true_theta.data.clone().tolist()
      save_dict['phi']=true_theta.data.clone().tolist()
      save_dict['inital_theta']=theta.data.clone().tolist()


      for num_update in range(number_update):
          states, actions, tasks = trajectory(
              agent, phi, true_theta, env, arg.NUM_EP)
              
          result = single_theta_inverse(true_theta, phi, arg, env, agent, states, actions, tasks, filename, num_thetas, initial_theta=theta)
          
          save_dict['theta_estimations'].append(result.tolist())
          if filename is None:
            savename=('inverse_data/' + filename + "EP" + str(arg.NUM_EP) + "updates" + str(number_update)+"sample"+str(arg.NUM_SAMPLES) +"IT"+ str(arg.NUM_IT) + '.pkl')
            torch.save(save_dict, savename)
          elif filename[:-4]=='.pkl':
            torch.save(save_dict, filename)
          else:
            torch.save(save_dict, (filename+'.pkf'))

          print(result)

  print('done')
Beispiel #6
0
arg.DELTA_T = 0.1
arg.EPISODE_TIME = 1  # # maximum length of time for one episode. if monkey can't firefly within this time period, new firefly comes
arg.EPISODE_LEN = int(arg.EPISODE_TIME / arg.DELTA_T)
arg.NUM_SAMPLES = 2
arg.NUM_EP = 50
arg.NUM_IT = 200  # number of iteration for gradient descent
arg.NUM_thetas = 10
arg.ADAM_LR = 0.2
arg.LR_STEP = 2
arg.LR_STOP = 50
arg.lr_gamma = 0.95

# agent
import policy_torch
baselines_mlp_model = DDPG.load("DDPG_theta")
agent = policy_torch.copy_mlp_weights(baselines_mlp_model)
# agent=baselines_mlp_model
env = ffenv_sigmoid.FireflyEnv(arg)
env.max_goal_radius = (arg.goal_radius_range[1]
                       )  # use the largest world size for goal radius
env.box = arg.WORLD_SIZE
env.reset_theta = False

true_theta_log = []
true_loss_log = []
true_loss_act_log = []
true_loss_obs_log = []
final_theta_log = []
stderr_log = []
result_log = []