Ejemplo n.º 1
0
    def consistent_programs(self, action, state, timestep):
        """Returns a list of WeightedPrograms that are consistent with the
        action at (state, timestep).

        Args:
            action (MiniWoBAction):
            state (MiniWoBState)
            timestep (int)

        Returns:
            a_given_p ({WeightedProgram: float}): the keys are all the
                consistent programs. the probability of the action given the
                consistent program
        """
        assert timestep < len(self._demo)
        w_programs = self._demo.programs(timestep)
        env = ExecutionEnvironment(state)
        a_given_p = {}
        for w_program in w_programs:
            program = w_program.program
            if program is None:  # Skip skip actions
                continue
            # Non-executable programs are consistent with MiniWoBTerminate
            if isinstance(action, MiniWoBTerminate):
                try:
                    num_execution_paths = program.execution_paths(env)
                    if num_execution_paths == 0:
                        a_given_p[w_program] = 1.
                except ProgramExecutionException as e:
                    a_given_p[w_program] = 1.
            else:  # Regular action was played
                try:  # Sometimes programs cannot execute
                    if program is not None and program.consistent(env, action):
                        num_execution_paths = program.execution_paths(env)
                        a_given_p[w_program] = 1. / num_execution_paths
                except ProgramExecutionException as e:
                    logging.info(
                        "consistent_programs({}, {}, {}, {}): {}".format(
                            self, action, state, timestep, e))
        return a_given_p
Ejemplo n.º 2
0
    def _init_weights(self, labeled_demo):
        weight = float(self._config.weight_init)
        for i in range(len(labeled_demo)):
            weighted_programs = labeled_demo.programs(i)
            if not weighted_programs:
                continue
            if weight == 0.:
                for weighted_program in weighted_programs:
                    weighted_program.set_weight(0.)
            else:
                # Clipping-based: give high weight to precise programs
                env = ExecutionEnvironment(labeled_demo.state(i))
                num_results = []
                for weighted_program in weighted_programs:
                    program = weighted_program.program
                    if program is None:  # Skip is good
                        num_result = 1
                    else:
                        try:
                            num_result = program.execution_paths(env)
                            assert num_result >= 0
                        except ProgramExecutionException as e:
                            num_result = 999
                    num_results.append(num_result)
                # Find programs with minimal number of matches
                min_result = min(num_results)
                assert len(weighted_programs) == len(num_results)
                for program, num_result in zip(weighted_programs, num_results):
                    if num_result == min_result:
                        program.set_weight(weight)
                    else:
                        program.set_weight(0.)

            # TODO: This prunes programs randomly after choosing the most
            # restrictive
            pruned_programs = sorted(
                weighted_programs, reverse=True,
                key=lambda x: x.weight)[:self._config.max_programs]
            labeled_demo.set_programs(i, pruned_programs)
        labeled_demo.initialize_critics(float(self._config.init_v))
Ejemplo n.º 3
0
    def _edges_to_programs(vertex):
        """Collect ActionEdges originating from the given StateVertex, and list
        all WeightedPrograms that could execute to the actions in those edges.

        Args:
            vertex (StateVertex)

        Returns:
            list[WeightedProgram]
        """
        weighted_programs = []
        env = ExecutionEnvironment(vertex.state)

        for action_edge in vertex.action_edges:
            action = action_edge.action
            state_incr = action_edge.end - action_edge.start
            if action is None:
                weighted_programs.append(WeightedProgram(None, 1., state_incr))
                continue

            # All string tokens
            strings = [StringToken(s) for s in env.valid_strings]

            # All fields tokens
            fields = env.fields
            fields_tokens = [
                FieldsValueSelectorToken(i) for i in range(len(fields.keys))
            ]
            strings += fields_tokens

            # TODO: Support last. Hard because it depends on the actual exec
            # env.
            element_sets = [TagToken(tag) for tag in env.tags]
            # All of the Like
            element_sets += [
                LikeToken(string_token) for string_token in strings
            ]
            element_sets += [
                ExactMatchToken(string_token) for string_token in strings
            ]

            # Max one-level of Near, SameRow, SameCol
            classes = action.element.classes
            distance_programs = [
                NearToken(elem_token, classes) for elem_token in element_sets
            ]
            distance_programs += [
                SameRowToken(elem_token, classes)
                for elem_token in element_sets
            ]
            distance_programs += [
                SameColToken(elem_token, classes)
                for elem_token in element_sets
            ]
            element_sets += distance_programs

            click_actions = [
                ClickToken(element_token) for element_token in element_sets
            ]
            type_actions = [
                FocusAndTypeToken(element_token, string_token)
                for element_token, string_token in itertools.product(
                    element_sets, fields_tokens)
            ]
            # Random typing actions
            type_actions += [
                FocusAndRandomFieldTypeToken(element_token)
                for element_token in element_sets
            ]

            if isinstance(action, MiniWoBElementClick):
                consistent_clicks = [
                    WeightedProgram(click, 1., state_incr)
                    for click in click_actions
                    if click.consistent(env, action)
                ]
                weighted_programs.extend(consistent_clicks)
            elif isinstance(action, MiniWoBFocusAndType):
                consistent_types = [
                    WeightedProgram(type_action, 1., state_incr)
                    for type_action in type_actions
                    if type_action.consistent(env, action)
                ]
                weighted_programs.extend(consistent_types)
            else:
                raise ValueError("Action: {} not supported.".format(action))

        return weighted_programs
Ejemplo n.º 4
0
    def next_action(self, state):
        """Returns a next sampled action from following this demonstration.
        If demonstration is already played through, returns FAIL.

        Args:
            state (MiniWoBState): the current state

        Returns:
            action (MiniWoBAction)
        """
        # Update environment
        if self._env is None:
            self._env = ExecutionEnvironment(state)
        else:
            self._env.observe(state)

        # Greedy: choose the best action that executes
        if self._test:
            # NOTE: selected_programs and candidate_programs are not updated
            # because you should not be taking gradient steps on test.
            action, new_cursor = self._get_best_action(state, self._cursor)
            self._cursor = new_cursor
            return action
        else:
            # Sample until you get a concrete action
            justifications = []
            while True:
                selected_w_program = self._sample_program(state, self._cursor)

                # Update book-keeping
                weighted_programs, probs = self._programs_and_probs(
                    self._cursor)

                if len(weighted_programs) > 0:
                    self._trajectory_cursors.append(self._cursor)
                    self._selected_programs.append(selected_w_program)
                    self._candidate_programs.append(weighted_programs)
                    state_value = self._demo.critics[self._cursor]
                else:  # Sampled action is a terminate
                    state_value = None
                self._cursor += selected_w_program.state_incr

                program = selected_w_program.program
                if program is None:  # Skip action
                    justifications.append(
                        DemoJustification(weighted_programs, probs,
                                          selected_w_program, ElementSet.EMPTY,
                                          state_value))
                else:  # Regular weighted program
                    elem_set = ElementSet.EMPTY
                    try:
                        action = program.execute(self._env)
                        elem_set = program.possible_elements(self._env)
                    except ProgramExecutionException as e:
                        logging.info("DemoPlayer: %s", e)
                        action = MiniWoBTerminate()
                    justifications.append(
                        DemoJustification(weighted_programs, probs,
                                          selected_w_program, elem_set,
                                          state_value))
                    action.justification = DemoJustificationList(
                        justifications)
                    return action
Ejemplo n.º 5
0
class DemoPlayer(object):
    """Wraps a demo, execution env, and cursor inside of a demonstration.

    Args:
        demo (LabeledDemonstration)
        policy (ProgramPolicy)
        test (bool)
    """
    def __init__(self, demo, policy, test=False):
        self._demo = demo
        self._policy = policy
        self._test = test
        self._env = None
        self._cursor = 0
        # list[int]
        self._trajectory_cursors = []
        # list[WeightedProgram]
        self._selected_programs = []
        # list[list[WeightedProgram]]
        self._candidate_programs = []

    def next_action(self, state):
        """Returns a next sampled action from following this demonstration.
        If demonstration is already played through, returns FAIL.

        Args:
            state (MiniWoBState): the current state

        Returns:
            action (MiniWoBAction)
        """
        # Update environment
        if self._env is None:
            self._env = ExecutionEnvironment(state)
        else:
            self._env.observe(state)

        # Greedy: choose the best action that executes
        if self._test:
            # NOTE: selected_programs and candidate_programs are not updated
            # because you should not be taking gradient steps on test.
            action, new_cursor = self._get_best_action(state, self._cursor)
            self._cursor = new_cursor
            return action
        else:
            # Sample until you get a concrete action
            justifications = []
            while True:
                selected_w_program = self._sample_program(state, self._cursor)

                # Update book-keeping
                weighted_programs, probs = self._programs_and_probs(
                    self._cursor)

                if len(weighted_programs) > 0:
                    self._trajectory_cursors.append(self._cursor)
                    self._selected_programs.append(selected_w_program)
                    self._candidate_programs.append(weighted_programs)
                    state_value = self._demo.critics[self._cursor]
                else:  # Sampled action is a terminate
                    state_value = None
                self._cursor += selected_w_program.state_incr

                program = selected_w_program.program
                if program is None:  # Skip action
                    justifications.append(
                        DemoJustification(weighted_programs, probs,
                                          selected_w_program, ElementSet.EMPTY,
                                          state_value))
                else:  # Regular weighted program
                    elem_set = ElementSet.EMPTY
                    try:
                        action = program.execute(self._env)
                        elem_set = program.possible_elements(self._env)
                    except ProgramExecutionException as e:
                        logging.info("DemoPlayer: %s", e)
                        action = MiniWoBTerminate()
                    justifications.append(
                        DemoJustification(weighted_programs, probs,
                                          selected_w_program, elem_set,
                                          state_value))
                    action.justification = DemoJustificationList(
                        justifications)
                    return action

    # TODO: Define a SkipToken?
    def _sample_program(self, state, cursor):
        """Returns a WeightedProgram sampled at the current cursor. The program
        in the WeightedProgram may be None, indicating a skip action.

        Args:
            state (MiniWoBState): concrete state
            cursor (int): index of the current demo state
        Returns:
            WeightedProgram: If the WeightedProgram is None, no programs
            were available for sampling.
        """
        weighted_programs, probs = self._programs_and_probs(cursor)
        if not weighted_programs:  # No programs available for sampling.
            return WeightedProgram(TerminateToken(), 0.)

        weighted_program = np.random.choice(weighted_programs, p=probs)
        return weighted_program

    def _get_best_action(self, state, cursor):
        """Execute the highest scoring program that executes to produce an
        action.

        The justification for the action includes zero or more justifications
        for skip actions, which just advance the cursor.

        Args:
            state (MiniWoBState): concrete state
            cursor (int): index of the current demo state
        Returns:
            action (ProgramAction)
            new_cursor (int): the new cursor position
        """
        def helper(state, cursor, justifications):
            """Returns action, new cursor position keeping track of
            justifications in a list.
            """
            weighted_programs, probs = self._programs_and_probs(cursor)
            assert len(weighted_programs) == len(probs)
            ranked = sorted(zip(weighted_programs, probs),
                            key=lambda x: x[1],
                            reverse=True)

            state_value = self._demo.critics[self._cursor] \
                if len(weighted_programs) > 0 else None
            for weighted_program, prob in ranked:
                program = weighted_program.program
                if program is not None:  # Regular program
                    # See if the program executes
                    try:
                        action = weighted_program.program.execute(self._env)
                    except ProgramExecutionException as e:
                        logging.info("DemoPlayer: %s", e)
                        continue

                    new_cursor = cursor + weighted_program.state_incr
                    # Compute justification
                    element_set = program.possible_elements(self._env)
                    justifications.append(
                        DemoJustification(weighted_programs, probs,
                                          weighted_program, element_set,
                                          state_value))
                    action.justification = DemoJustificationList(
                        justifications)
                    return action, new_cursor
                else:  # Skip edge
                    new_cursor = cursor + weighted_program.state_incr
                    # Compute justification
                    justifications.append(
                        DemoJustification(weighted_programs, probs,
                                          weighted_program, ElementSet.EMPTY,
                                          state_value))
                    return helper(state, new_cursor, justifications)
            action = MiniWoBTerminate()
            justifications.append(
                DemoJustification(weighted_programs, probs, None,
                                  ElementSet.EMPTY, state_value))
            action.justification = DemoJustificationList(justifications)
            return action, cursor

        return helper(state, cursor, [])

    def _programs_and_probs(self, cursor):
        """Returns three parallel lists of weighted programs and their
        probabilities at the current cursor.

        Args:
            cursor (int)

        Returns:
            list[WeightedProgram]
            list[float]
        """
        # Past the end of the demo
        if cursor >= len(self._demo):
            return [], []

        weighted_programs = self._demo.programs(cursor)
        if not weighted_programs:
            return [], []
        probs = self._policy.compute_program_probs(weighted_programs)
        return weighted_programs, probs

    def consistent_programs(self, action, state, timestep):
        """Returns a list of WeightedPrograms that are consistent with the
        action at (state, timestep).

        Args:
            action (MiniWoBAction):
            state (MiniWoBState)
            timestep (int)

        Returns:
            a_given_p ({WeightedProgram: float}): the keys are all the
                consistent programs. the probability of the action given the
                consistent program
        """
        assert timestep < len(self._demo)
        w_programs = self._demo.programs(timestep)
        env = ExecutionEnvironment(state)
        a_given_p = {}
        for w_program in w_programs:
            program = w_program.program
            if program is None:  # Skip skip actions
                continue
            # Non-executable programs are consistent with MiniWoBTerminate
            if isinstance(action, MiniWoBTerminate):
                try:
                    num_execution_paths = program.execution_paths(env)
                    if num_execution_paths == 0:
                        a_given_p[w_program] = 1.
                except ProgramExecutionException as e:
                    a_given_p[w_program] = 1.
            else:  # Regular action was played
                try:  # Sometimes programs cannot execute
                    if program is not None and program.consistent(env, action):
                        num_execution_paths = program.execution_paths(env)
                        a_given_p[w_program] = 1. / num_execution_paths
                except ProgramExecutionException as e:
                    logging.info(
                        "consistent_programs({}, {}, {}, {}): {}".format(
                            self, action, state, timestep, e))
        return a_given_p

    @property
    def demo(self):
        """Returns the LabeledDemonstration object."""
        return self._demo

    @property
    def trajectory_cursors(self):
        """Returns the list[int] of cursors at each selected program."""
        return self._trajectory_cursors

    @property
    def selected_programs(self):
        """Returns the list[WeightedPrograms] that were played in order."""
        return self._selected_programs

    @property
    def candidate_programs(self):
        """Returns the list[list[WeightedPrograms]] of candidate programs."""
        return self._candidate_programs

    @property
    def fields(self):
        """Returns the Fields associated with this demo."""
        return self._demo.fields