'obs gain forward', 'obs gain angular', 'obs noise std forward', 'obs noise std angular', 'goal radius', 'box_size', 'discount_factor' ] history = pd.DataFrame(columns=COLUMNS) while episode <= MAX_EPISODE: episode += 1 # every episode starts a new firefly t = torch.zeros(1) # to track the amount of time steps to catch a firefly theta = (pro_gains, pro_noise_stds, obs_gains, obs_noise_stds, goal_radius) env.Brender(b, x, arg.WORLD_SIZE, goal_radius ) # display pop-up window (for every new action in each step) while t < arg.EPISODE_LEN: # for a single FF action = agent.select_action(state, action_noise=noise, param=None) # with action noise next_x, reached_target, next_b, reward, info, next_state = env.step( episode, x, b, action, t, theta, arg.REWARD) env.Brender( next_b, next_x, arg.WORLD_SIZE, goal_radius ) # display pop-up window (for every new action in each step) #time.sleep(0.1) # delay for 0.005 sec if info['stop']: time.sleep(1) # check time limit TimeEnd = ( t + 1 == arg.EPISODE_LEN ) # if the monkey can't catch the firefly in EPISODE_LEN, reset the game. mask = torch.tensor([1 - float(TimeEnd)]) # mask = 0: episode is over
rew_std, state_dim, action_dim, hidden_dim=128, tau=0.001) agent.load('pretrained/ddpg_minhae/ddpg_model_EE.pth.tar') tot_t = 0. episode = 0. while tot_t <= TOT_T: episode += 1 # every episode starts a new firefly t, x, P, ox, b, state = env.reset() episode_reward = 0. while t < EPISODE_LEN: action = agent.select_action(state, noise) next_x, reached_target, next_b, reward, info, next_state = env.step( episode, x, b, action, t) env.Brender( next_b, next_x ) # display pop-up window (for every new action in each step) #rec.capture_frame() # for video time.sleep(0.1) # delay for 0.005 sec if info['stop']: time.sleep(2) # check time limit TimeEnd = ( t + 1 == EPISODE_LEN ) # if the monkey can't catch the firefly in EPISODE_LEN, reset the game.