예제 #1
0
 def _load_task(self, task_dict, states_dir):
     task = Task(resume_utg=False, **task_dict)
     for i in range(len(task_dict["state_history"])):
         state_str = task_dict["state_history"][i]
         action_str = task_dict["action_history"][i]
         state = State.load(state_dir=states_dir, state_str=state_str)
         state.setup(task)
         action = self._load_action(state, action_str)
         task.state_history.append(state)
         task.action_history.append(action)
     task.state = State.load(state_dir=states_dir,
                             state_str=task_dict["state"])
     task.state.setup(task)
     task.reward = task_dict["reward"]
     task.total_reward = task_dict["total_reward"]
     task.done = task_dict["done"]
     return task
예제 #2
0
 def compute_reward(task, trace_lines):
     # logging.info(f"compute_reward starts at {datetime.now()}")
     states = []
     actions = []
     # browser.reset(task.start_url)
     state_action_lines = [(line[:(line.find(": "))],
                            line[(line.find(": ") + 2):])
                           for line in trace_lines]
     current_state_str, action_line = state_action_lines[0]
     current_state = State.load(states_dir, current_state_str)
     actions.append("RESET")
     states.append(current_state)
     task.reset(current_state, update_utg=False)
     last_action = load_action(current_state, action_line)
     actions.append(action_line)
     end_reached = False
     correct_rewards = [0]
     incorrect_rewards = [task.total_reward]
     for state_str, action_line in state_action_lines[1:]:
         current_state = State.load(states_dir, state_str)
         states.append(current_state)
         task.update(last_action, current_state, update_utg=False)
         if task.target_achieved:
             correct_rewards.append(task.total_reward)
         else:
             incorrect_rewards.append(task.total_reward)
         if action_line == "END":
             end_reached = True
             break
         else:
             last_action = load_action(current_state, action_line)
     max_correct_reward = max(correct_rewards)
     max_incorrect_reward = max(incorrect_rewards)
     logging.info(
         f"  task got correct reward {max_correct_reward:6.3f}"
         f" and incorrect reward {max_incorrect_reward:3.3f}: {task.name}"
     )
     return max_correct_reward, max_incorrect_reward