df = pd.read_csv('../firefly-inverse-data/data/' + filename + '_log.csv', usecols=['discount_factor','process gain forward', 'process gain angular', 'process noise std forward', 'process noise std angular', 'obs gain forward', 'obs gain angular', 'obs noise std forward', 'obs noise std angular', 'goal radius']) DISCOUNT_FACTOR = df['discount_factor'][0] arg.gains_range = [np.floor(df['process gain forward'].min()), np.ceil(df['process gain forward'].max()), np.floor(df['process gain angular'].min()), np.ceil(df['process gain angular'].max())] arg.std_range = [df['process noise std forward'].min(), df['process noise std forward'].max(), df['process noise std angular'].min(), df['process noise std angular'].max()] arg.goal_radius_range = [df['goal radius'].min(), df['goal radius'].max()] env = Model(arg) # build an environment agent = Agent(env.state_dim, env.action_dim, arg, filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001, device = "cpu") agent.load(filename) true_theta_log = [] final_theta_log = [] stderr_log = [] result_log = [] for num_thetas in range(10): true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range) true_theta_log.append(true_theta.data.clone()) x_traj, obs_traj, a_traj, _ = trajectory(agent, true_theta, arg.INVERSE_BATCH_SIZE, env, arg, arg.gains_range, arg.std_range, arg.goal_radius_range) # generate true trajectory true_loss = getLoss(agent, x_traj, obs_traj, a_traj, true_theta, env, arg.gains_range, arg.std_range) # this is the lower bound of loss?
rewards = deque(maxlen=100) video_path = './pretrained/ddpg_minhae/video.mp4' TOT_T = 500 env = gym.make('FireflyTorch-v0') #rec = VideoRecorder(env, video_path, enabled=video_path is not None) #for video state_dim = env.state_dim action_dim = env.action_dim std = 0.05 noise = Noise(action_dim, mean=0., std=std) agent = Agent(PROC_NOISE_STD, OBS_NOISE_STD, gains, obs_gains, rew_std, state_dim, action_dim, hidden_dim=128, tau=0.001) agent.load('pretrained/ddpg_minhae/ddpg_model_EE.pth.tar') tot_t = 0. episode = 0. while tot_t <= TOT_T: episode += 1 # every episode starts a new firefly t, x, P, ox, b, state = env.reset() episode_reward = 0. while t < EPISODE_LEN: action = agent.select_action(state, noise)
env.setup(arg) env.model.box = arg.WORLD_SIZE env.model.min_goal_radius = goal_radius_range[0] x, b, state, pro_gains, pro_noise_stds, obs_gains, obs_noise_stds, goal_radius = env.reset( gains_range, std_range, goal_radius_range) state_dim = env.model.state_dim action_dim = env.model.action_dim MAX_EPISODE = 1000 std = 0.00001 #0.05 noise = Noise(action_dim, mean=0., std=std) agent = Agent(state_dim, action_dim, arg, filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001) agent.load(filename) tot_t = 0. episode = 0. COLUMNS = [ 'total time', 'ep', 'time step', 'reward', 'goal', 'a_vel', 'a_ang', 'true_r', 'true_rel_ang', 'r', 'rel_ang', 'vel', 'ang_vel', 'vecL1', 'vecL2', 'vecL3', 'vecL4', 'vecL5', 'vecL6', 'vecL7', 'vecL8', 'vecL9', 'vecL10', 'vecL11', 'vecL12', 'vecL13', 'vecL14', 'vecL15', 'process gain forward', 'process gain angular', 'process noise std forward', 'process noise std angular',
tot_t = 0. # number of total time steps episode = 0. # number of fireflies int_t = 1 # variable for changing the world setting every EPISODE_LEN time steps state_dim = env.state_dim action_dim = env.action_dim filename = arg.filename argument = arg.__dict__ torch.save(argument, arg.data_path + 'data/' + filename + '_arg.pkl') agent = Agent(state_dim, action_dim, arg, filename, hidden_dim=128, gamma=arg.DISCOUNT_FACTOR, tau=0.001) #""" # if you want to use pretrained agent, load the data as below # if not, comment it out #agent.load('20191004-160540') #""" b, state, obs_gains, obs_noise_ln_vars = agent.Bstep.reset( x, torch.zeros(1), pro_gains, pro_noise_ln_vars, goal_radius, arg.gains_range, arg.noise_range) # reset monkey's internal model # action space noise