Esempio n. 1
0
def _task_to_features_dict(task, do_stats):
    """Converts one task to features dict.

  Args:
    task: Task instance, the task to be converted.
    do_stats: whether do stats on this task. Set it to False for debug or test.
  Returns:
    features: dict of (string, np.array) which contains columns and values:
    features = {
      'instruction_str': string of answer, np string array, shape = (1,)
      'instruction_word_id_seq': word id sequence of question, np string array,
          shape = (word_num,)

      'verb_id_seq': word id of verb, np int array, shape = (action_num,)
      'obj_desc_position_seq': index of word id of object in answer, np int
          array, shape = (actions * 2,)
      'input_str_position_seq': additional info of the action, shape =
          (action_num * 2,)

      'raters_count_per_task': shape = (1,)
      'agreement_count': shape = (1,)
  Raises:
    ValueError: raise error when fail to parse actions of tasks.
  """
    answer = task['instruction'].lower()
    features = {}
    features['instruction_str'] = np.array([answer], dtype=np.string_)
    tokens, _ = string_utils.tokenize_to_ids(answer)
    features['instruction_word_id_seq'] = np.array(tokens, dtype=np.int64)

    verb_id_seq = []
    verb_str_position_seq = []
    obj_desc_position_seq = []
    input_str_position_seq = []

    for action in task['actions']:
        try:
            verb_id = common.ActionTypes[action.verb_type.upper().strip()]
        except KeyError:
            raise ValueError('Verb "%s" cannot be recognized.' %
                             action.verb_type)
        if verb_id == common.ActionTypes.OTHERS:
            verb = answer[action.verb_start_pos:action.verb_end_pos].strip(
            ).lower()
            verb_id = common.VERB_ID_MAP.get(verb, common.ActionTypes.OTHERS)
        verb_id_seq.append(verb_id.value)

        verb_str_position_seq.extend(
            string_utils.get_token_pos_from_char_pos(answer,
                                                     action.verb_start_pos,
                                                     action.verb_end_pos))
        if do_stats and task['agreement-count'] >= 2:
            distributions['longest_verb_str'][verb_str_position_seq[-1] -
                                              verb_str_position_seq[-2]] += 1

        obj_desc_position_seq.extend(
            string_utils.get_token_pos_from_char_pos(
                answer, action.object_desc_start_pos,
                action.object_desc_end_pos))
        if do_stats and task['agreement-count'] >= 2:
            distributions['longest_obj_desc'][obj_desc_position_seq[-1] -
                                              obj_desc_position_seq[-2]] += 1

        if not (action.input_content_start_pos == 0
                and action.input_content_end_pos == 0):
            input_str_position_seq.extend(
                string_utils.get_token_pos_from_char_pos(
                    answer, action.input_content_start_pos,
                    action.input_content_end_pos))
            if do_stats and task['agreement-count'] >= 2:
                distributions['longest_input_str'][
                    input_str_position_seq[-1] -
                    input_str_position_seq[-2]] += 1
        else:
            input_str_position_seq.extend([config.LABEL_DEFAULT_VALUE_INT] * 2)

    features['verb_id_seq'] = np.array(verb_id_seq, dtype=np.int64)
    features['verb_str_position_seq'] = np.array(verb_str_position_seq,
                                                 dtype=np.int64)
    features['obj_desc_position_seq'] = np.array(obj_desc_position_seq,
                                                 dtype=np.int64)
    features['input_str_position_seq'] = np.array(input_str_position_seq,
                                                  dtype=np.int64)

    features['agreement_count'] = np.array([task['agreement-count']],
                                           dtype=np.int64)

    if do_stats:
        distributions['step_num'][len(task['actions'])] += 1
        distributions['longest_instruction'][len(tokens)] += 1
        counters['total_verb_refs'] += len(verb_id_seq)
        counters['total_obj_refs'] += len(obj_desc_position_seq) / 2
        counters['total_input_refs'] += (
            (len(input_str_position_seq) -
             input_str_position_seq.count(config.LABEL_DEFAULT_VALUE_INT)) / 2)
        for verb in common.ActionTypes:
            if verb.value in verb_id_seq:
                counters['Instructions contain %s in verbs' % verb.name] += 1
        if input_str_position_seq.count(
                config.LABEL_DEFAULT_VALUE_INT) != len(input_str_position_seq):
            counters['Instructions contain INPUT Content'] += 1
        if ' and then ' in answer:
            chosen_examples['instruction_contains_and-then'].append(answer)
        if ' after ' in answer:
            chosen_examples['instruction_contains_after'].append(answer)
        if '. ' in answer:
            counters['instruction_contains_dot'] += 1
        if ', ' in answer:
            counters['instruction_contains_comma'] += 1

    return features
Esempio n. 2
0
def _get_ui_object_attributes(view_hierarchy_leaf_nodes, lower_case=False):
    """Parses ui object informationn from a view hierachy leaf node list.

  Args:
    view_hierarchy_leaf_nodes: a list of view hierachy leaf nodes.
    lower_case: lower case all the ui texts.

  Returns:
    An un-padded attribute dictionary as follow:
      'type_id_seq': numpy array of ui object types from view hierarchy.
      'word_id_seq': numpy array of encoding for words in ui object.
      'char_id_seq': numpy array of encoding for words in ui object.
      'clickable_seq': numpy array of ui object clickable status.
      'cord_x_seq': numpy array of ui object x coordination.
      'cord_y_seq': numpy array of ui object y coordination.
      'dom_location_seq': numpy array of ui object depth, pre-order-traversal
      index, post-order-traversal index.
      'word_str_sequence': numpy array of ui object name strings.
  """
    type_sequence = []
    word_id_sequence = []
    char_id_sequence = []
    clickable_sequence = []
    cord_x_sequence = []
    cord_y_sequence = []
    dom_location_sequence = []
    obj_str_sequence = []

    def _is_ascii(s):
        return all(ord(c) < 128 for c in s)

    for vh_node in view_hierarchy_leaf_nodes:
        ui_obj = vh_node.uiobject
        type_sequence.append(ui_obj.obj_type.value)
        cord_x_sequence.append(ui_obj.bounding_box.x1)
        cord_x_sequence.append(ui_obj.bounding_box.x2)
        cord_y_sequence.append(ui_obj.bounding_box.y1)
        cord_y_sequence.append(ui_obj.bounding_box.y2)
        clickable_sequence.append(ui_obj.clickable)
        dom_location_sequence.extend(ui_obj.dom_location)

        valid_words = [w for w in ui_obj.word_sequence if _is_ascii(w)]
        word_sequence = ' '.join(valid_words)

        if lower_case:
            word_sequence = word_sequence.lower()
        obj_str_sequence.append(word_sequence)

        word_ids, char_ids = string_utils.tokenize_to_ids(word_sequence)
        word_id_sequence.append(word_ids)
        char_id_sequence.append(char_ids)
    ui_feature = {
        'type_id_seq': np.array(type_sequence),
        'word_id_seq': np.array(word_id_sequence),
        'clickable_seq': np.array(clickable_sequence),
        'cord_x_seq': np.array(cord_x_sequence),
        'cord_y_seq': np.array(cord_y_sequence),
        'dom_location_seq': np.array(dom_location_sequence),
        'obj_str_seq': np.array(obj_str_sequence, dtype=np.str),
    }
    return ui_feature
def get_synthetic_feature_dict(synthetic_action_list,
                               max_word_num,
                               unused_max_word_length,
                               parse_consumed=False):
  """Get padded synthetic action feature dictionary.

  This dictionary contains all features related to the synthetic instructions.

  Args:
    synthetic_action_list: List of common.Action() instances
    max_word_num: max word number for padding
    unused_max_word_length: max word length for padding.
    parse_consumed: whether to parse consumed tag.

  Returns:
    a padded feature dictionary
  """
  feature = {
      'instruction_str': [],
      'instruction_rule_id': [],
      'instruction_word_id_seq': [],
      'verb_id_seq': [],
      'ui_target_id_seq': [],
      'verb_str_position_seq': [],
      'input_str_position_seq': [],
      'obj_desc_position_seq': [],
  }
  if parse_consumed:
    feature['consumed_tag'] = []
    feature['step_str_position_seq'] = []

  for action in synthetic_action_list:
    if not action.is_valid():
      continue
    action.convert_to_lower_case()
    word_id_seq, char_id_seq = string_utils.tokenize_to_ids(
        action.instruction_str)
    # skips the synthetic actions that have more than max_word_num tokens
    if len(word_id_seq) > max_word_num:
      tf.logging.info('[Dropped Long Synthetic Action]:%s',
                      action.instruction_str)
      continue
    feature['instruction_str'].append(action.instruction_str)
    feature['instruction_rule_id'].append(action.action_rule.value)
    feature['instruction_word_id_seq'].append(word_id_seq)
    # Enable this when using word token
    if 'instruction_char_id_seq' in feature:
      feature['instruction_char_id_seq'].append(char_id_seq)
    feature['verb_id_seq'].append(action.action_type.value)
    feature['ui_target_id_seq'].append(action.target_obj_idx)
    feature['verb_str_position_seq'].extend(
        string_utils.get_token_pos_from_char_pos(action.instruction_str,
                                                 action.verb_str_pos[0],
                                                 action.verb_str_pos[1]))
    feature['obj_desc_position_seq'].extend(
        string_utils.get_token_pos_from_char_pos(action.instruction_str,
                                                 action.obj_str_pos[0],
                                                 action.obj_str_pos[1]))
    if action.has_valid_input():
      feature['input_str_position_seq'].extend(
          string_utils.get_token_pos_from_char_pos(action.instruction_str,
                                                   action.input_str_pos[0],
                                                   action.input_str_pos[1]))
    else:
      feature['input_str_position_seq'].extend(action.input_str_pos)
    if parse_consumed:
      feature['consumed_tag'].append(int(action.is_consumed))
      step_token_pos = string_utils.get_token_pos_from_char_pos(
          action.instruction_str, action.step_str_pos[0],
          action.step_str_pos[1])
      feature['step_str_position_seq'].extend(step_token_pos)

  for key in feature:
    feature[key] = np.array(feature[key])

  phrase_count = feature['instruction_str'].shape[0]
  feature_padding_info = {
      'instruction_str': [phrase_count, np.string_, ''],
      'instruction_rule_id': [(phrase_count), np.int64, 0],
      'instruction_word_id_seq': [(phrase_count, max_word_num), np.int64, 0],
      'verb_id_seq': [(phrase_count), np.int64, 0],
      'ui_target_id_seq': [(phrase_count), np.int64, 0],
      'verb_str_position_seq': [(phrase_count * 2), np.int64, 0],
      'input_str_position_seq': [(phrase_count * 2), np.int64, 0],
      'obj_desc_position_seq': [(phrase_count * 2), np.int64, 0],
  }
  if parse_consumed:
    feature_padding_info['consumed_tag'] = [(phrase_count), np.int64, 0]
    feature_padding_info['step_str_position_seq'] = [(phrase_count * 2),
                                                     np.int64, 0]

  padding_shape, padding_type, padding_value = {}, {}, {}
  for key in feature_padding_info:
    shape, pad_type, value = feature_padding_info[key]
    padding_shape[key] = shape
    padding_type[key] = pad_type
    padding_value[key] = value

  padded_feature_dict = proto_utils.padding_dictionary(feature, padding_shape,
                                                       padding_type,
                                                       padding_value)
  return padded_feature_dict