Beispiel #1
0
    def _find_equivalent_action(self, dom_diff, state_before, state_after):
        """Return a single action that could produce the dom diff.

        Args:
            dom_diff (list[DOMElement])
            state_before (MiniWoBState)
            state_after (MiniWoBState)
        Returns:
            MiniWoBAction or None
        """
        if len(dom_diff) > 1:
            return
        ref = dom_diff[0].ref
        elt_before = self._element_with_ref(state_before, ref)
        elt_after = self._element_with_ref(state_after, ref)
        if not elt_before or not elt_after:
            return
        # Click
        if (elt_before.value == elt_after.value and not elt_before.tampered
                and elt_after.tampered):
            return MiniWoBElementClick(elt_before)
        if elt_before.value != elt_after.value:
            if elt_before.tag in ('input_checkbox', 'input_radio'):
                return MiniWoBElementClick(elt_before)
            else:
                return MiniWoBFocusAndType(elt_before, elt_after.value)
Beispiel #2
0
    def execute(self, env):
        elements = self._parameter.execute(env)
        element = elements.sample_non_text()

        # Update last
        env.set_last(element)
        return MiniWoBElementClick(element)
Beispiel #3
0
 def get_element_click(self, state):
     """Get the action that clicks the button."""
     for element in state.dom_elements:
         if element.tag == 'button':
             action = MiniWoBElementClick(element, fail_hard=True)
             print 'Clicking with {}'.format(action)
             return action
     raise ValueError('Cannot find button: {}'.format(str(state.dom_elements)))
Beispiel #4
0
 def get_input_click(self, state):
     """Get the action that clicks an input element."""
     for element in state.dom_elements:
         if element.tag == 'input_text':
             action = MiniWoBElementClick(element, fail_hard=True)
             print('Clicking with {}'.format(action))
             return action
     raise ValueError('Cannot find input: {}'.format(str(
         state.dom_elements)))
Beispiel #5
0
    def _find_shortcuts(self, state_vertices):
        """Takes the list of StateVertex and finds ActionEdges between
        non-consecutive state vertices and adds these edges to the passed StateVertexs

        Modifies edges in state_vertices directly

        Args:
            state_vertices (list[StateVertex])
        """
        # Single step:
        for i, vi in enumerate(state_vertices):
            is_last = (i == len(state_vertices) - 1)
            for action_edge in vi.action_edges[:]:
                dom_diff = True if is_last else self._dom_diff(
                        vi.state.dom, state_vertices[i + 1].state.dom)
                if self._logfile and (dom_diff is True or len(dom_diff) <= 5):
                    print >> self._logfile, 'DIFF', i, ':', dom_diff
                if not dom_diff:
                    vi.action_edges.append(ActionEdge(None, i, i + 1))
                elif isinstance(action_edge, DummyActionEdge):
                    chunk = action_edge.chunk
                    if action_edge.reason == 'nonleaf':
                        # Action on non-leaf: Try all leaves instead
                        leaves = self._get_leaves(chunk.target)
                        if len(leaves) <= self.MAX_LEAVES:
                            for leaf in self._get_leaves(chunk.target):
                                if chunk.action == 'click':
                                    action = MiniWoBElementClick(leaf)
                                elif chunk.action == 'type':
                                    action = MiniWoBFocusAndType(leaf, chunk.args)
                                else:
                                    raise ValueError('Invalid nonleaf DummyActionEdge')
                                vi.action_edges.append(ActionEdge(action, i, i + 1))
                    elif not is_last:
                        action = self._find_equivalent_action(
                                dom_diff, vi.state, state_vertices[i + 1].state)
                        if action:
                            vi.action_edges.append(ActionEdge(action, i, i + 1))
        # Multiple steps:
        for i in xrange(len(state_vertices)):
            vi = state_vertices[i]
            for j in xrange(i + 2, min(i + 1 + self.MAX_SHORTCUT_LENGTH, len(state_vertices))):
                vj = state_vertices[j]
                dom_diff = self._dom_diff(vi.state.dom, vj.state.dom)
                if self._logfile and len(dom_diff) <= 5:
                    print >> self._logfile, 'DIFF', i, '->', j, ':', dom_diff
                if not dom_diff:
                    vi.action_edges.append(ActionEdge(None, i, j))
                else:
                    action = self._find_equivalent_action(dom_diff, vi.state, vj.state)
                    if action:
                        vi.action_edges.append(ActionEdge(action, i, j))
Beispiel #6
0
    def _compute_action_scores(elem_indices, dom_elems, dom_probs,
                               click_or_type_probs, type_values, type_value_probs,
                               state_values):
        """Compute action scores (log probabilities).

        Args:
            dom_elems (list[list[DOMElement]]): a batch of DOMElement lists
                (padded to the same length)
            dom_probs (Variable): of shape (batch_size, max_dom_elems)
            click_or_type_probs (Variable): of shape (batch_size, 2). 0 index
                = click, 1 index = type
            type_values (list[list[unicode]]): a batch of query values (NOT
                padded!)
            type_value_probs (Variable): of shape (batch_size, max_values)
            state_values (Variable): of shape (batch_size,)

        Returns:
            action_scores_batch (list[ActionScores])
        """
        # shape checks
        batch_size = len(dom_elems)
        dim1, max_dom_elems = dom_probs.size()
        dim2, max_type_values = type_value_probs.size()
        dim3, click_or_type_classes = click_or_type_probs.size()
        assert dim1 == batch_size
        assert dim2 == batch_size
        assert dim3 == batch_size
        assert click_or_type_classes == 2
        assert len(type_values) == batch_size

        # check that dom_elems has been appropriately padded
        for elems in dom_elems:
            assert len(elems) == max_dom_elems

        # NOTE: type_values are NOT padded

        action_scores_batch = []
        for batch_idx in range(batch_size):
            action_scores = {}

            click_prob, type_prob = click_or_type_probs[batch_idx]
            # these are scalar Variables

            dom_elems_b, dom_probs_b = dom_elems[batch_idx], dom_probs[batch_idx]
            type_values_b, type_value_probs_b = type_values[batch_idx], type_value_probs[batch_idx]
            assert len(dom_elems_b) == len(dom_probs_b)
            assert len(type_values_b) <= len(type_value_probs_b)

            # TODO: HACK :'(
            chosen_index = elem_indices[batch_idx]
            elem = dom_elems_b[chosen_index]
            elem_prob = dom_probs_b[chosen_index]
            # elem_prob is a scalar Variable: it has size == (1,)

            assert not isinstance(elem, DOMElementPAD)
            assert elem.tag != 't'

            # generate click action
            click_action = MiniWoBElementClick(elem)

            action_scores[click_action] = torch.log(click_prob) + \
                torch.log(elem_prob)

            # generate focus-and-type actions
            for type_value, value_prob in zip(type_values_b, type_value_probs_b):
                # note that zip truncates to the shorter of its two arguments
                type_action = MiniWoBFocusAndType(elem, type_value)
                action_scores[type_action] = torch.log(type_prob) + \
                        torch.log(elem_prob) + torch.log(value_prob)

            state_value = state_values[batch_idx]  # scalar Variable
            action_scores_batch.append(ActionScores(action_scores, state_value))

        return action_scores_batch
Beispiel #7
0
    def _parse_raw_demo_chunk(self,
                              raw_demo,
                              field_extractor,
                              find_shortcuts=False):
        """Takes the raw demo and spits out the relevant states.

        Algorithm: Consider each chunk of events that express a single action.
        Possible chunks are:
        - click (mousedown mouseup click)
            - double-click is ignored for now
        - drag (mousedown mouseup click, with different coordinates)
        - type (keydown* keypress keyup)
        - hotkey (keydown* keyup, where keyup is not a modifier key)

        Args:
            raw_demo (dict): json contents of demo file
            field_extractor (FieldsExtractor): the fields extractor for this task
            find_shortcuts (bool): whether to also find possible shortcuts
                in the graph. If false, the graph will be a sequence.

        Returns:
            state_vertices (list[StateVertex])
        """
        utterance = raw_demo['utterance']
        if 'fields' in raw_demo:
            fields = Fields(raw_demo['fields'])
        else:
            fields = field_extractor(utterance)
        raw_states = raw_demo['states']

        # Group BEFORE and AFTER
        # Some AFTER are missing due to event propagation being stopped,
        #   in which case we also use BEFORE for AFTER
        raw_state_pairs = []
        current_before = None
        for i, raw_state in enumerate(raw_states[1:]):
            if raw_state['action']['type'] == 'scroll':
                # Skip all scroll actions
                continue
            if raw_state['action']['timing'] == EpisodeGraph.BEFORE:
                if current_before:
                    # Two consecutive BEFOREs
                    logging.warning('state %d is BEFORE without AFTER', i - 1)
                    raw_state_pairs.append((current_before, current_before))
                current_before = raw_state
            elif raw_state['action']['timing'] == EpisodeGraph.AFTER:
                if not current_before:
                    # Two consecutive AFTERs
                    logging.warning('state %d is AFTER without BEFORE', i)
                    current_before = raw_state
                raw_state_pairs.append((current_before, raw_state))
                current_before = None
        if current_before:
            # Lingering BEFORE at the end
            logging.warning('state %d is BEFORE without AFTER', i - 1)
            raw_state_pairs.append((current_before, current_before))

        if self._logfile:
            # print >> self._logfile, 'Utterance:', utterance
            # print >> self._logfile, 'Fields:', fields
            # print >> self._logfile, '#' * 10, 'PAIRS'
            self._logfile.write('Utterance: {}'.format(utterance))
            self._logfile.write('Fields: {}'.format(fields))
            self._logfile.write('#' * 10 + 'PAIRS')
            for i, (s1, s2) in enumerate(raw_state_pairs):
                self._logfile.write('@' + str(i) + ':' + str(s1['action']) +
                                    str(s2['action']))

        chunks = self._chunk_events(raw_state_pairs, utterance, fields)
        chunks = self._collapse_type_actions(chunks)

        if self._logfile:
            # print >> self._logfile, 'Utterance:', utterance
            # print >> self._logfile, 'Fields:', fields
            # print >> self._logfile, '#' * 10, 'CHUNKS'
            self._logfile.write('Utterance: {}'.format(utterance))
            self._logfile.write('Fields:{}'.format(fields))
            self._logfile.write('#' * 10 + 'CHUNKS')
            for i, chunk in enumerate(chunks):
                # print >> self._logfile, '@', i, ':', chunk
                self._logfile.write('@' + str(i) + ':' + str(chunk))

        # Create base vertices
        state_vertices = []
        for chunk in chunks:
            start, end = len(state_vertices), len(state_vertices) + 1
            if not chunk.target:
                # Probably clicking/dragging on the instruction box
                continue
            if chunk.action == 'click':
                action = MiniWoBElementClick(chunk.target)
                if chunk.target.is_leaf:
                    action_edge = ActionEdge(action, start, end)
                else:
                    action_edge = DummyActionEdge(chunk, start, end, 'nonleaf')
            elif chunk.action == 'type':
                action = MiniWoBFocusAndType(chunk.target, chunk.args)
                if chunk.target.is_leaf:
                    action_edge = ActionEdge(action, start, end)
                else:
                    action_edge = DummyActionEdge(chunk, start, end, 'nonleaf')
            else:
                action_edge = DummyActionEdge(chunk, start, end, 'unknown')
            # If we don't plan to find shortcuts, we cannot have dummy edges
            if not find_shortcuts and isinstance(action_edge, DummyActionEdge):
                continue
            state_vertex = StateVertex(chunk.state, [action_edge])
            state_vertices.append(state_vertex)

        if self._logfile:
            # print >> self._logfile, '#' * 10, 'GRAPH'
            self._logfile.write('#' * 10 + 'GRAPH')
            for i, v in enumerate(state_vertices):
                self._logfile.write('@' + str(i) + ':' + str(v.action_edges))
                self._logfile.write(v.state.dom.visualize())

        if find_shortcuts:
            if self._logfile:
                # print >> self._logfile, '#' * 10, 'SHORTCUTS'
                self._logfile.write('#' * 10 + 'SHORTCUTS')
            self._find_shortcuts(state_vertices)

        # Remove dummy edges
        for i, state_vertex in enumerate(state_vertices):
            state_vertex.action_edges[:] = [
                e for e in state_vertex.action_edges
                if not isinstance(e, DummyActionEdge)
            ]
            # To prevent empty states, add skip edges to the next state
            if not state_vertex.action_edges:
                state_vertex.action_edges.append(ActionEdge(None, i, i + 1))

        if self._logfile:
            # print >> self._logfile, '#' * 10, 'FINAL'
            # print >> self._logfile, 'Utterance:', utterance
            # print >> self._logfile, 'Fields:', fields
            self._logfile.write('#' * 10 + 'FINAL')
            self._logfile.write('Utterance:{}'.format(utterance))
            self._logfile.write('Fields:{}'.format(fields))
            for i, v in enumerate(state_vertices):
                # print >> self._logfile, '@', i, ':', v.action_edges
                self._logfile.write('@' + str(i) + ':' + str(v.action_edges))

        return state_vertices
Beispiel #8
0
    def _parse_raw_demo_original(self, raw_demo, field_extractor):
        """Takes the raw demo and spits out the relevant states.

        Algorithm: Look at mousedown / keypress events

        Args:
            raw_demo (dict): json contents of demo file
            field_extractor (FieldsExtractor): the fields extractor for this task

        Returns:
            state_vertices (list[StateVertex])
        """
        # Filter out only for keypresses and mousedowns (BEFORE)
        utterance = raw_demo['utterance']
        if 'fields' in raw_demo:
            fields = Fields(raw_demo['fields'])
        else:
            fields = field_extractor(utterance)
        raw_states = raw_demo["states"]
        state_vertices = []
        actions = []
        vertex_number = 0
        for i, raw_state in enumerate(raw_states[1:]):
            raw_action = raw_state["action"]
            if raw_action["timing"] == EpisodeGraph.BEFORE:
                if raw_action["type"] == "mousedown":
                    miniwob_state = MiniWoBState(utterance, fields,
                                                 raw_state['dom'])
                    target = self._target(miniwob_state.dom_elements)
                    if not target:  # target = yellow instruction box
                        continue
                    click = MiniWoBElementClick(target)
                    state_vertex = StateVertex(
                        miniwob_state,
                        [ActionEdge(click, vertex_number, vertex_number + 1)])
                    state_vertices.append(state_vertex)
                    vertex_number += 1
                elif raw_action["type"] == "keypress":
                    miniwob_state = MiniWoBState(utterance, fields,
                                                 raw_state['dom'])
                    char = chr(raw_action["keyCode"])
                    target = self._target(miniwob_state.dom_elements)
                    if not target:  # target = yellow instruction box
                        continue
                    type_action = MiniWoBFocusAndType(target, char)
                    state_vertex = StateVertex(miniwob_state, [
                        ActionEdge(type_action, vertex_number,
                                   vertex_number + 1)
                    ])
                    state_vertices.append(state_vertex)
                    vertex_number += 1

        # Collapse consecutive FocusAndTypes into one
        for i, vertex in enumerate(state_vertices):
            curr_action = vertex.action_edges[0].action
            if not isinstance(curr_action, MiniWoBFocusAndType):
                continue

            aggregated_text = curr_action.text
            while i + 1 < len(state_vertices):
                next_action = state_vertices[i + 1].action_edges[0].action
                if not isinstance(next_action, MiniWoBFocusAndType) or \
                    curr_action.element != next_action.element:
                    break
                aggregated_text += next_action.text
                del next_action
                del state_vertices[i + 1]
            vertex.action_edges[0] = ActionEdge(
                MiniWoBFocusAndType(curr_action.element, aggregated_text), i,
                i + 1)

        # Collapse Click then FocusAndType into just FocusAndType
        collapsed_state_vertices = []
        for index in range(len(state_vertices) - 1):
            curr_action = state_vertices[index].action_edges[0].action
            next_action = state_vertices[index + 1].action_edges[0].action
            if not(isinstance(curr_action, MiniWoBElementClick) and \
                   isinstance(next_action, MiniWoBFocusAndType) and \
                   curr_action.element == next_action.element):
                collapsed_state_vertices.append(state_vertices[index])
        collapsed_state_vertices.append(state_vertices[-1])

        # Correct the edge indices
        for i, state_vertex in enumerate(collapsed_state_vertices):
            state_vertex.action_edges[0] = ActionEdge(
                state_vertex.action_edges[0].action, i, i + 1)

        return collapsed_state_vertices
Beispiel #9
0
 def create_element_click_action(self, element):
     action = MiniWoBElementClick(element, fail_hard=True)
     print 'Clicking with {}'.format(action)
     return action