def evaluate(actor_main, env, control_stepsize, state_dim,action_dim,actions_per_control ,video_info = None): action_stepsize = int(control_stepsize / actions_per_control) timestep = env.reset() _, _, _, s = timestep prev_action = np.zeros([actions_per_control,action_dim]) s = utils.state_1d_flat(s) s_a = np.append(s, prev_action.reshape([-1])) s_a = torch.FloatTensor(s_a).to(device) step_i = 0 ep_reward = 0 if video_info is not None: video_dir = video_info[0] epi_i = video_info[1] video_filepath = os.path.join(video_dir,"training_"+str(epi_i)+".avi") video_saver = utils.VideoSaver(video_filepath, int(1.0/env.control_timestep()), 30, width=320, height=240) frame = env.physics.render(camera_id=0, width=320,height=240) video_saver.write(utils.RGB2BGR(frame)) while step_i < 1000: with torch.no_grad(): a = actor_main.forward(s_a.view(-1,state_dim)).cpu().numpy()[0] actions = a.reshape([actions_per_control,action_dim]) for action_iter in range(actions_per_control): for _ in range(action_stepsize): timestep = env.step(prev_action[action_iter]) step_i += 1 if video_info is not None: frame = env.physics.render(camera_id=0, width=320, height=240) video_saver.write(utils.RGB2BGR(frame)) if step_i > 1000: break if step_i > 1000: break if step_i > 1000: break t, r, _, s2 = timestep s2 = utils.state_1d_flat(s2) s2_a = np.append(s2, actions.reshape([-1])) s2_a = torch.FloatTensor(s2_a).to(device) s_a = s2_a ep_reward += r prev_action = actions if video_info is not None: video_saver.release() return ep_reward
def evaluate(actor_main, env, control_stepsize, state_dim, action_dim, video_info=None): timestep = env.reset() _, _, _, s = timestep s = torch.FloatTensor(utils.state_1d_flat(s)).to(device) step_i = 0 ep_reward = 0 if video_info is not None: video_dir = video_info[0] epi_i = video_info[1] video_filepath = os.path.join(video_dir, "training_" + str(epi_i) + ".avi") video_saver = utils.VideoSaver(video_filepath, int(1.0 / env.control_timestep()), 30, width=320, height=240) frame = env.physics.render(camera_id=0, width=320, height=240) video_saver.write(utils.RGB2BGR(frame)) while step_i < 1000: with torch.no_grad(): a = actor_main.forward(s.view(-1, state_dim)).cpu().numpy()[0] for _ in range(control_stepsize): timestep = env.step(np.reshape(a, (action_dim, ))) step_i += 1 if video_info is not None: frame = env.physics.render(camera_id=0, width=320, height=240) video_saver.write(utils.RGB2BGR(frame)) if step_i > 1000: break if step_i > 1000: break t, r, _, s2 = timestep s2 = torch.FloatTensor(utils.state_1d_flat(s2)).to(device) s = s2 ep_reward += r if video_info is not None: video_saver.release() return ep_reward
prev_action = np.zeros([action_dim]) # timestep, reward, discount, observation _, _, _, s = timestep s = utils.state_1d_flat(s) s_a = np.append(s, prev_action) s_a = torch.FloatTensor(s_a).to(device) # for recording if epi_i % video_save_period == 1: video_filepath = os.path.join( record_dir, "training_noise_" + str(epi_i) + ".avi") video_saver = utils.VideoSaver(video_filepath, int(1.0 / env.control_timestep()), 30, width=320, height=240) frame = env.physics.render(camera_id=0, width=320, height=240) video_saver.write(utils.RGB2BGR(frame)) step_i = 0 while step_i < 1000: with torch.no_grad(): a = actor_main.forward(s_a.view( -1, state_action_dim)).cpu().numpy() if epi_i < action_gradation + 1: a = a * float(epi_i) / float(action_gradation) + noise() else: