Beispiel #1
0
arg.NUM_SAMPLES=2
arg.NUM_EP = 200
arg.NUM_IT = 2 # number of iteration for gradient descent
arg.NUM_thetas = 1
arg.ADAM_LR = 0.25
arg.LR_STEP = 50
arg.LR_STOP = 0.1
arg.lr_gamma = 0.95
arg.PI_STD=1
arg.goal_radius_range=[0.1,0.3]
arg.TERMINAL_VEL = 0.025
arg.goal_radius_range=[0.15,0.3]
arg.std_range = [0.02,0.3,0.02,0.3]
arg.TERMINAL_VEL = 0.025  # terminal velocity? # norm(action) that you believe as a signal to stop 0.1.
arg.DELTA_T=0.2
arg.EPISODE_LEN=35

number_updates=100

# agent convert to torch model
import policy_torch
baselines_mlp_model =TD3.load('trained_agent/accac_final_1000000_9_11_20_25.zip')
agent = policy_torch.copy_mlp_weights(baselines_mlp_model,layers=[512,512],n_inputs=32)

# loading enviorment, same as training
env=firefly_accac.FireflyAccAc(arg)
# ---seting the env for inverse----
# TODO, move it to a function of env
env.agent_knows_phi=False

Beispiel #2
0
learning_arg = torch.load(
    '../firefly-monkey-data/data/20191111-151539_arg.pkl')
DISCOUNT_FACTOR = learning_arg['argument']['DISCOUNT_FACTOR']

gains_range = [1, 1, 1, 1]  #learning_arg['argument']['gains_range']
std_range = [0.05, 0.05, 0.05, 0.05]  #learning_arg['argument']['std_range']
goal_radius_range = [0.9, 0.9]  #learning_arg['argument']['goal_radius_range']

#gains_range = learning_arg['argument']['gains_range']
#std_range = learning_arg['argument']['std_range']
#goal_radius_range = learning_arg['argument']['goal_radius_range']
arg.WORLD_SIZE = learning_arg['argument']['WORLD_SIZE']
arg.DELTA_T = learning_arg['argument']['DELTA_T']
arg.EPISODE_TIME = learning_arg['argument']['EPISODE_TIME']
arg.EPISODE_LEN = learning_arg['argument']['EPISODE_LEN']

# df = pd.read_csv('../firefly-inverse-data/data/' + filename + '_log.csv',
#                  usecols=['discount_factor','process gain forward', 'process gain angular', 'process noise std forward',
#                           'process noise std angular', 'obs gain forward', 'obs gain angular', 'obs noise std forward',
#                           'obs noise std angular', 'goal radius'])
#
# DISCOUNT_FACTOR = df['discount_factor'][0]
# gains_range = [np.floor(df['process gain forward'].min()), np.ceil(df['process gain forward'].max()),
#                np.floor(df['process gain angular'].min()), np.ceil(df['process gain angular'].max())]
#
# std_range = [df['process noise std forward'].min(), df['process noise std forward'].max(),
#                df['process noise std angular'].min(), df['process noise std angular'].max()]
# goal_radius_range = [df['goal radius'].min(), df['goal radius'].max()]

env = gym.make('FireflyTorch-v0')  #,PROC_NOISE_STD,OBS_NOISE_STD)
Beispiel #3
0
from monkey_functions import *
from Config import Config
arg = Config()
arg.presist_phi=False
arg.agent_knows_phi=False
arg.goal_distance_range=[0.4,1]
arg.gains_range =[0.05,1.5,pi/4,pi/1]
arg.goal_radius_range=[0.05,0.3]
arg.std_range = [0.08,0.3,pi/80,pi/80*5]
arg.mag_action_cost_range= [0.0001,0.001]
arg.dev_action_cost_range= [0.0001,0.005]
arg.dev_v_cost_range= [0.1,0.5]
arg.dev_w_cost_range= [0.1,0.5]
arg.TERMINAL_VEL = 0.1
arg.DELTA_T=0.1
arg.EPISODE_LEN=100
arg.agent_knows_phi=False
DISCOUNT_FACTOR = 0.99
arg.sample=100
arg.batch = 70
# arg.NUM_SAMPLES=1
# arg.NUM_EP=1
arg.NUM_IT = 1 
arg.NUM_thetas = 1
arg.ADAM_LR = 0.0002
arg.LR_STEP = 20
arg.LR_STOP = 0.5
arg.lr_gamma = 0.95
arg.PI_STD=1
arg.presist_phi=False
arg.cost_scale=1
Beispiel #4
0
# acc mdp model
env = firefly_mdp.FireflyMDP(arg)
model = TD3.load('trained_agent/mdp_noise_1000000_2_9_18_8')
model.set_env(env)

# 1d real model
# easy
arg.gains_range = [0.99, 1]
arg.goal_radius_range = [25, 25.3]
arg.std_range = [0.5, 0.51, 49.5, 50]
arg.mag_action_cost_range = [0.00001, 0.000011]
arg.dev_action_cost_range = [0.00001, 0.000012]
arg.TERMINAL_VEL = 20
arg.DELTA_T = 0.2
arg.EPISODE_LEN = 50
arg.training = True
arg.presist_phi = False
arg.agent_knows_phi = False
env = ffacc_real.FireflyTrue1d_real(arg)
# hard
arg.gains_range = [0.1, 5]
arg.goal_radius_range = [1, 50]
arg.std_range = [0.01, 2, 0.01, 100]
arg.mag_action_cost_range = [0.00001, 0.0001]
arg.dev_action_cost_range = [0.00001, 0.00005]
arg.TERMINAL_VEL = 20
arg.DELTA_T = 0.2
arg.EPISODE_LEN = 50
arg.training = True
arg.presist_phi = False
Beispiel #5
0
arg.init_action_noise = 0.5
arg.goal_distance_range = [0.2, 1]
arg.mag_action_cost_range = [0.1, 1.]
arg.dev_action_cost_range = [0.1, 1.]
arg.dev_v_cost_range = [0.1, 1.]
arg.dev_w_cost_range = [0.1, 1.]
# arg.goal_distance_range=[0.01,0.99]
arg.gains_range = [0.35, 0.45, pi / 2 - 0.1, pi / 2 + 0.1]
# arg.goal_radius_range=[0.07,0.2]
arg.std_range = [0.1, 0.7, 0.1, 0.7]
# arg.mag_action_cost_range= [0.0001,0.0005]
# arg.dev_action_cost_range= [0.0001,0.0005]
arg.REWARD = 100
arg.TERMINAL_VEL = 0.1
arg.DELTA_T = 0.1
arg.EPISODE_LEN = 40
arg.training = True
arg.presist_phi = False
arg.agent_knows_phi = True
arg.cost_scale = 1
env = ffacc_real.FireflyFinal2(arg)
env.no_skip = True
modelname = None
# modelname='re_re_skipcostscale_200000_4_15_13_16_17_19'
note = 're'
from stable_baselines3 import SAC

# # 1d test
# arg.initial_uncertainty_range=[0,1]
# env=ffacc_real.Simple1d(arg)
# env.no_skip=False