solved_episodes = [] for step in range(args.num_steps): # render if args.render: envs.render() # select action value, action, action_log_probs = agent.select_action(step) # take a step in the environment obs, reward, done, infos = envs.step(action) # calculate intrinsic reward if args.add_intrinsic_reward: intrinsic_reward = args.intrinsic_coef * agent.compute_intrinsic_reward( step) if args.max_intrinsic_reward is not None: intrinsic_reward = torch.clamp( agent.compute_intrinsic_reward(step), 0.0, args.max_intrinsic_reward) else: intrinsic_reward = torch.tensor(0).view(1, 1) intrinsic_rewards.extend(list( intrinsic_reward.numpy().reshape(-1))) # store experience agent.store_rollout(obs[1], action, action_log_probs, value, reward, intrinsic_reward, done) # get final episode rewards for info in infos:
solved_episodes = [] for step in range(args.num_steps): # render if args.render: envs.render() # select action value, action, action_log_probs = agent.select_action(step) # take a step in the environment obs, reward, done, infos = envs.step(action) # calculate intrinsic reward if args.add_intrinsic_reward: intrinsic_reward = args.intrinsic_coef * agent.compute_intrinsic_reward(step) if args.max_intrinsic_reward is not None: intrinsic_reward = torch.clamp(agent.compute_intrinsic_reward(step), 0.0, args.max_intrinsic_reward) else: intrinsic_reward = torch.tensor(0).view(1, 1) intrinsic_rewards.extend(list(intrinsic_reward.numpy().reshape(-1))) # store experience agent.store_rollout(obs[1], action, action_log_probs, value, reward, intrinsic_reward, done) # get final episode rewards for info in infos: if 'episode' in info.keys(): extrinsic_rewards.append(info['episode']['r'])