Exemple #1
0
DISCOUNT_FACTOR = 0.99
arg.NUM_SAMPLES=2
arg.NUM_EP = 200
arg.NUM_IT = 2 # number of iteration for gradient descent
arg.NUM_thetas = 1
arg.ADAM_LR = 0.25
arg.LR_STEP = 50
arg.LR_STOP = 0.1
arg.lr_gamma = 0.95
arg.PI_STD=1
arg.goal_radius_range=[0.1,0.3]
arg.TERMINAL_VEL = 0.025
arg.goal_radius_range=[0.15,0.3]
arg.std_range = [0.02,0.3,0.02,0.3]
arg.TERMINAL_VEL = 0.025  # terminal velocity? # norm(action) that you believe as a signal to stop 0.1.
arg.DELTA_T=0.2
arg.EPISODE_LEN=35

number_updates=100

# agent convert to torch model
import policy_torch
baselines_mlp_model =TD3.load('trained_agent/accac_final_1000000_9_11_20_25.zip')
agent = policy_torch.copy_mlp_weights(baselines_mlp_model,layers=[512,512],n_inputs=32)

# loading enviorment, same as training
env=firefly_accac.FireflyAccAc(arg)
# ---seting the env for inverse----
# TODO, move it to a function of env
env.agent_knows_phi=False
Exemple #2
0
filename = '20191111-151539-12011329'  # agent information

learning_arg = torch.load(
    '../firefly-monkey-data/data/20191111-151539_arg.pkl')
DISCOUNT_FACTOR = learning_arg['argument']['DISCOUNT_FACTOR']

gains_range = [1, 1, 1, 1]  #learning_arg['argument']['gains_range']
std_range = [0.05, 0.05, 0.05, 0.05]  #learning_arg['argument']['std_range']
goal_radius_range = [0.9, 0.9]  #learning_arg['argument']['goal_radius_range']

#gains_range = learning_arg['argument']['gains_range']
#std_range = learning_arg['argument']['std_range']
#goal_radius_range = learning_arg['argument']['goal_radius_range']
arg.WORLD_SIZE = learning_arg['argument']['WORLD_SIZE']
arg.DELTA_T = learning_arg['argument']['DELTA_T']
arg.EPISODE_TIME = learning_arg['argument']['EPISODE_TIME']
arg.EPISODE_LEN = learning_arg['argument']['EPISODE_LEN']

# df = pd.read_csv('../firefly-inverse-data/data/' + filename + '_log.csv',
#                  usecols=['discount_factor','process gain forward', 'process gain angular', 'process noise std forward',
#                           'process noise std angular', 'obs gain forward', 'obs gain angular', 'obs noise std forward',
#                           'obs noise std angular', 'goal radius'])
#
# DISCOUNT_FACTOR = df['discount_factor'][0]
# gains_range = [np.floor(df['process gain forward'].min()), np.ceil(df['process gain forward'].max()),
#                np.floor(df['process gain angular'].min()), np.ceil(df['process gain angular'].max())]
#
# std_range = [df['process noise std forward'].min(), df['process noise std forward'].max(),
#                df['process noise std angular'].min(), df['process noise std angular'].max()]
# goal_radius_range = [df['goal radius'].min(), df['goal radius'].max()]