コード例 #1
0
  def _compute_inner_update_scinol(self, var, grad, state):
    update_ops = []

    betting_domain = tf.cast(
        state.get_hyper(BETTING_DOMAIN), var.dtype.base_dtype)

    reward = state.get_slot(var, INNER_REWARD)
    betting_fraction = state.get_slot(var, OUTER_BETTING_FRACTION)
    sum_grad_squared = state.get_slot(var, INNER_SUM_GRAD_SQUARED)
    sum_grad = state.get_slot(var, INNER_SUM_GRAD)
    inner_maximum_gradient = state.get_slot(var, INNER_MAXIMUM_GRADIENT)

    # clip inner gradient to respect previous inner_maximum_gradient value
    # This introduces at most an additive constant overhead in the regret
    # since the inner betting fraction lies in a bounded domain.
    clipped_grad = tf.clip_by_value(grad, -inner_maximum_gradient,
                                    inner_maximum_gradient)

    with tf.control_dependencies([clipped_grad]):
      inner_maximum_gradient_updated = self._assign(
          inner_maximum_gradient,
          tf.maximum(inner_maximum_gradient, tf.abs(grad)))
      update_ops.append(inner_maximum_gradient_updated)

    clipped_old_betting_fraction = tf.clip_by_value(betting_fraction,
                                                    -betting_domain,
                                                    betting_domain)

    # Process grad to respect truncation to [-betting_domain, betting_domain]
    truncated_grad = tf.where(
        tf.greater_equal(
            clipped_grad * (betting_fraction - clipped_old_betting_fraction),
            0.0), clipped_grad, tf.zeros(tf.shape(clipped_grad)))

    reward_delta = -betting_fraction * truncated_grad
    reward_updated = self._assign_add(reward, reward_delta)
    update_ops.append(reward_updated)

    sum_grad_squared_updated = self._assign_add(sum_grad_squared,
                                                tf.square(truncated_grad))
    update_ops.append(sum_grad_squared_updated)

    sum_grad_updated = self._assign_add(sum_grad, truncated_grad)
    update_ops.append(sum_grad_updated)

    # The second term in this maximum, inner_maximum_gradient_updated / self.eta
    # is a hack to force the betting fraction to not be too big at first.
    scaling = tf.minimum(tf.rsqrt(sum_grad_squared_updated +
                tf.square(inner_maximum_gradient_updated)),
                         self.eta/inner_maximum_gradient_updated)
    theta = -sum_grad_updated * scaling

    # rescale inner flag is a hack that rescales the epsilon_v by the
    # maximum inner gradient.
    if self.rescale_inner:
      epsilon_scaling = inner_maximum_gradient_updated
    else:
      epsilon_scaling = 1.0

    inner_betting_fraction = tf.sign(theta) * tf.minimum(tf.abs(theta),
                                                         1.0) * scaling / 2.0
    new_betting_fraction = inner_betting_fraction * (
        reward_updated + epsilon_scaling * self.epsilon_v)

    betting_fraction_updated = self._assign(betting_fraction,
                                            new_betting_fraction)
    update_ops.append(betting_fraction_updated)

    clipped_betting_fraction = tf.clip_by_value(betting_fraction_updated,
                                                -betting_domain, betting_domain)

    if self.output_summaries:
      mean_unclipped_betting_fraction_summary = tf.reduce_mean(
          tf.abs(betting_fraction_updated))
      max_unclipped_betting_fraction_summary = tf.reduce_max(
          tf.abs(betting_fraction_updated))

      mean_clipped_betting_fraction_summary = tf.reduce_mean(
          tf.abs(clipped_betting_fraction))
      max_clipped_betting_fraction_summary = tf.reduce_max(
          tf.abs(clipped_betting_fraction))

      max_abs_gradient = tf.reduce_max(tf.abs(grad))
      max_truncated_grad = tf.reduce_max(tf.abs(truncated_grad))

      tf.summary.scalar(self._name + "/mean_unclipped_bet/" + var.name,
                        mean_unclipped_betting_fraction_summary)
      tf.summary.scalar(self._name + "/max_unclipped_bet/" + var.name,
                        max_unclipped_betting_fraction_summary)
      tf.summary.scalar(self._name + "/mean_clipped_bet/" + var.name,
                        mean_clipped_betting_fraction_summary)
      tf.summary.scalar(self._name + "/max_clipped_bet/" + var.name,
                        max_clipped_betting_fraction_summary)

      tf.summary.scalar(self._name + "/max_abs_inner_grad/" + var.name,
                        max_abs_gradient)
      tf.summary.scalar(
          self._name + "/max_abs_truncated_inner_grad/" + var.name,
          max_truncated_grad)
    return clipped_betting_fraction, tf.group(*update_ops)
コード例 #2
0
def _process_rico_sca(feature_dict, max_range, max_dom_pos,
                      load_dom_dist=False, load_extra=False, load_screen=True):
  """Processes one_shot feature dictionary.

  Args:
    feature_dict: feature dictionary
    max_range: the max range.
    max_dom_pos: the max dom pos.
    load_dom_dist: whether to load the dom distance feature.
    load_extra: whether to load the extra data for debugging.
    load_screen: whether to load the screen features.
  Returns:
    A processed feature dictionary.
  """
  phrase_count = tf.size(feature_dict['obj_desc_position_seq']) // 2
  feature = {
      'task':
          tf.reshape(feature_dict['instruction_word_id_seq'],
                     [phrase_count, NUM_TOKENS_PER_SYN]),
      'input_refs':
          tf.reshape(feature_dict['input_str_position_seq'],
                     [phrase_count, 1, 2]),
      'obj_refs':
          tf.reshape(feature_dict['obj_desc_position_seq'],
                     [phrase_count, 1, 2]),
      'verb_refs':
          tf.reshape(feature_dict['verb_str_position_seq'],
                     [phrase_count, 1, 2]),
      'rule':
          tf.reshape(feature_dict['instruction_rule_id'], [phrase_count]),
  }
  selected_synthetic_action_idx = tf.random_uniform(
      shape=(), minval=0, maxval=phrase_count, dtype=tf.int32)
  for key in feature:
    feature[key] = feature[key][selected_synthetic_action_idx]
  if load_extra:
    feature['raw_task'] = tf.reshape(
        feature_dict['instruction_str'],
        [phrase_count])[selected_synthetic_action_idx]
    feature['task_id'] = tf.constant('empty_task_id', dtype=tf.string)
  if load_screen:
    feature['verbs'] = tf.reshape(
        feature_dict['verb_id_seq'],
        [phrase_count, 1])[selected_synthetic_action_idx]
    feature['objects'] = tf.reshape(
        feature_dict['ui_target_id_seq'],
        [phrase_count, 1])[selected_synthetic_action_idx]
    feature['obj_text'] = tf.reshape(feature_dict['ui_obj_word_id_seq'],
                                     [1, -1, NUM_TOKENS_PER_OBJ])
    feature['obj_type'] = tf.reshape(
        feature_dict['ui_obj_type_id_seq'], [1, -1])
    feature['obj_clickable'] = tf.reshape(feature_dict['ui_obj_clickable_seq'],
                                          [1, -1])
    def _make_obj_screen_pos():
      return tf.concat([
          tf.reshape(feature_dict['ui_obj_cord_x_seq'], [1, -1, 2]),
          tf.reshape(feature_dict['ui_obj_cord_y_seq'], [1, -1, 2])
      ], 2)

    feature['obj_screen_pos'] = tf.cond(
        tf.equal(
            tf.size(feature_dict['ui_obj_cord_x_seq']),
            0), lambda: tf.fill([1, tf.shape(feature['obj_type'])[1], 4], 0.),
        _make_obj_screen_pos)
    feature['obj_dom_pos'] = tf.reshape(feature_dict['ui_obj_dom_location_seq'],
                                        [1, -1, 3])
    feature['obj_dom_pos'] = tf.minimum(feature['obj_dom_pos'], max_dom_pos - 1)
    if load_dom_dist:
      num_ui_obj = tf.to_int32(
          tf.sqrt(tf.to_float(tf.size(feature_dict['ui_obj_dom_distance']))))
      feature['obj_dom_dist'] = tf.reshape(feature_dict['ui_obj_dom_distance'],
                                           [1, num_ui_obj, num_ui_obj])
    if load_extra:
      feature['obj_raw_text'] = tf.reshape(feature_dict['ui_obj_str_seq'],
                                           [1, -1])
  else:
    _load_fake_screen(feature, load_extra, load_dom_dist)
  _bound_refs(feature, max_range)
  feature['data_source'] = tf.constant(0, dtype=tf.int32)
  feature['agreement_count'] = tf.constant(100, dtype=tf.int32)

  return feature
コード例 #3
0
def _iou_per_anchor(pred_boxes: FloatType,
                    target_boxes: FloatType,
                    iou_type: Text = 'iou') -> tf.Tensor:
    """Computing the IoU for a single anchor.

  Args:
    pred_boxes: predicted boxes, with coordinate [y_min, x_min, y_max, x_max].
    target_boxes: target boxes, with coordinate [y_min, x_min, y_max, x_max].
    iou_type: one of ['iou', 'ciou', 'diou', 'giou'].

  Returns:
    IoU loss float `Tensor`.
  """
    # t_ denotes target boxes and p_ denotes predicted boxes.
    t_ymin, t_xmin, t_ymax, t_xmax = target_boxes
    p_ymin, p_xmin, p_ymax, p_xmax = pred_boxes

    zero = tf.convert_to_tensor(0.0, t_ymin.dtype)
    p_width = tf.maximum(zero, p_xmax - p_xmin)
    p_height = tf.maximum(zero, p_ymax - p_ymin)
    t_width = tf.maximum(zero, t_xmax - t_xmin)
    t_height = tf.maximum(zero, t_ymax - t_ymin)
    p_area = p_width * p_height
    t_area = t_width * t_height

    intersect_ymin = tf.maximum(p_ymin, t_ymin)
    intersect_xmin = tf.maximum(p_xmin, t_xmin)
    intersect_ymax = tf.minimum(p_ymax, t_ymax)
    intersect_xmax = tf.minimum(p_xmax, t_xmax)
    intersect_width = tf.maximum(zero, intersect_xmax - intersect_xmin)
    intersect_height = tf.maximum(zero, intersect_ymax - intersect_ymin)
    intersect_area = intersect_width * intersect_height

    union_area = p_area + t_area - intersect_area
    iou_v = tf.math.divide_no_nan(intersect_area, union_area)
    if iou_type == 'iou':
        return iou_v  # iou is the simplest form.

    enclose_ymin = tf.minimum(p_ymin, t_ymin)
    enclose_xmin = tf.minimum(p_xmin, t_xmin)
    enclose_ymax = tf.maximum(p_ymax, t_ymax)
    enclose_xmax = tf.maximum(p_xmax, t_xmax)

    assert iou_type in ('giou', 'diou', 'ciou')
    if iou_type == 'giou':  # giou is the generalized iou.
        enclose_width = tf.maximum(zero, enclose_xmax - enclose_xmin)
        enclose_height = tf.maximum(zero, enclose_ymax - enclose_ymin)
        enclose_area = enclose_width * enclose_height
        giou_v = iou_v - tf.math.divide_no_nan(
            (enclose_area - union_area), enclose_area)
        return giou_v

    assert iou_type in ('diou', 'ciou')
    p_center = tf.stack([(p_ymin + p_ymax) / 2, (p_xmin + p_xmax) / 2])
    t_center = tf.stack([(t_ymin + t_ymax) / 2, (t_xmin + t_xmax) / 2])
    euclidean = tf.linalg.norm(t_center - p_center)
    diag_length = tf.linalg.norm(
        [enclose_ymax - enclose_ymin, enclose_xmax - enclose_xmin])
    diou_v = iou_v - tf.math.divide_no_nan(euclidean**2, diag_length**2)
    if iou_type == 'diou':  # diou is the distance iou.
        return diou_v

    assert iou_type == 'ciou'
    v = _get_v(p_height, p_width, t_height, t_width)
    alpha = tf.math.divide_no_nan(v, ((1 - iou_v) + v))
    return diou_v - alpha * v  # the last one is ciou.
コード例 #4
0
    def generate_trips(self, min_gap=1, max_gap=5):
        """Generate a tf Dataset of training triplets with an offset between three frames.

    Args:
      min_gap: (int) the minimum offset between two frames of a sampled triplet.
      max_gap: (int) the maximum offset between two frames of a sampled triplet.

    Returns:
      A tf.data.Dataset of ViewSequences without images, consisting of
      triplets from the input sequence separated by the given offset.
    """
        def mapper(timestamp_trips, rgb_trips, pano_trips, depth_trips,
                   normal_trips, pose_trips):
            """A function mapping a data tuple to ViewTrip."""
            return ViewTrip(self.scene_id, self.sequence_id, timestamp_trips,
                            rgb_trips, pano_trips, depth_trips, normal_trips,
                            tf.zeros([1]), pose_trips, self.intrinsics[0],
                            self.resolution[0])

        with tf.control_dependencies([
                tf.Assert(tf.less(max_gap, self.length()),
                          [max_gap, self.length()])
        ]):
            timestamp_trips = []
            rgb_trips = []
            pano_trips = []
            depth_trips = []
            normal_trips = []
            pose_trips = []
            # generate triplets with an offset that ranges
            # from 'min_gap' to 'max_gap'.
            for stride in range(min_gap, max_gap + 1):
                inds = tf.range(stride, self.length() - stride)
                inds_jitter = tf.random.uniform(
                    minval=-40,
                    maxval=40,
                    shape=[self.length() - 2 * stride],
                    dtype=tf.int32)
                rand_inds = tf.minimum(tf.maximum(inds + inds_jitter, 0),
                                       self.length() - 1)
                timestamp = tf.stack([
                    self.timestamp[:-2 * stride], self.timestamp[2 * stride:],
                    self.timestamp[stride:-stride],
                    tf.gather(self.timestamp, rand_inds)
                ],
                                     axis=1)
                rgb = tf.stack([
                    self.rgb[:-2 * stride], self.rgb[2 * stride:],
                    self.rgb[stride:-stride],
                    tf.gather(self.rgb, rand_inds)
                ],
                               axis=1)
                pano = tf.stack([
                    self.pano[:-2 * stride], self.pano[2 * stride:],
                    self.pano[stride:-stride],
                    tf.gather(self.pano, rand_inds)
                ],
                                axis=1)
                depth = tf.stack([
                    self.depth[:-2 * stride], self.depth[2 * stride:],
                    self.depth[stride:-stride],
                    tf.gather(self.depth, rand_inds)
                ],
                                 axis=1)
                normal = tf.stack([
                    self.normal[:-2 * stride], self.normal[2 * stride:],
                    self.normal[stride:-stride],
                    tf.gather(self.normal, rand_inds)
                ],
                                  axis=1)
                pose = tf.stack([
                    self.pose[:-2 * stride], self.pose[2 * stride:],
                    self.pose[stride:-stride],
                    tf.gather(self.pose, rand_inds)
                ],
                                axis=1)
                timestamp_trips.append(timestamp)
                rgb_trips.append(rgb)
                pano_trips.append(pano)
                depth_trips.append(depth)
                normal_trips.append(normal)
                pose_trips.append(pose)

            timestamp_trips = tf.concat(timestamp_trips, 0)
            rgb_trips = tf.concat(rgb_trips, 0)
            pano_trips = tf.concat(pano_trips, 0)
            depth_trips = tf.concat(depth_trips, 0)
            normal_trips = tf.concat(normal_trips, 0)
            pose_trips = tf.concat(pose_trips, 0)
            dataset = tf.data.Dataset.from_tensor_slices(
                (timestamp_trips, rgb_trips, pano_trips, depth_trips,
                 normal_trips, pose_trips))
            return dataset.map(mapper)
コード例 #5
0
    def _compute_model_loss(self, input_sequence, output_sequence,
                            sequence_length, control_sequence):
        """Builds a model with loss for train/eval."""
        hparams = self.hparams
        batch_size = hparams.batch_size

        input_sequence = tf.to_float(input_sequence)
        output_sequence = tf.to_float(output_sequence)

        max_seq_len = tf.minimum(
            tf.shape(output_sequence)[1], hparams.max_seq_len)

        input_sequence = input_sequence[:, :max_seq_len]

        if control_sequence is not None:
            control_depth = control_sequence.shape[-1]
            control_sequence = tf.to_float(control_sequence)
            control_sequence = control_sequence[:, :max_seq_len]
            # Shouldn't be necessary, but the slice loses shape information when
            # control depth is zero.
            control_sequence.set_shape([batch_size, None, control_depth])

        # The target/expected outputs.
        x_target = output_sequence[:, :max_seq_len]
        # Inputs to be fed to decoder, including zero padding for the initial input.
        x_input = tf.pad(output_sequence[:, :max_seq_len - 1], [(0, 0), (1, 0),
                                                                (0, 0)])
        x_length = tf.minimum(sequence_length, max_seq_len)

        # Either encode to get `z`, or do unconditional, decoder-only.
        if hparams.z_size:  # vae mode:
            q_z = self.encode(input_sequence, x_length, control_sequence)
            z = q_z.sample()

            # Prior distribution.
            p_z = ds.MultivariateNormalDiag(loc=[0.] * hparams.z_size,
                                            scale_diag=[1.] * hparams.z_size)

            # KL Divergence (nats)
            kl_div = ds.kl_divergence(q_z, p_z)

            # Concatenate the Z vectors to the inputs at each time step.
        else:  # unconditional, decoder-only generation
            kl_div = tf.zeros([batch_size, 1], dtype=tf.float32)
            z = None

        r_loss, metric_map = self.decoder.reconstruction_loss(
            x_input, x_target, x_length, z, control_sequence)[0:2]

        free_nats = hparams.free_bits * tf.math.log(2.0)
        kl_cost = tf.maximum(kl_div - free_nats, 0)

        beta = (
            (1.0 - tf.pow(hparams.beta_rate, tf.to_float(self.global_step))) *
            hparams.max_beta)
        self.loss = tf.reduce_mean(r_loss) + beta * tf.reduce_mean(kl_cost)

        scalars_to_summarize = {
            'loss': self.loss,
            'losses/r_loss': r_loss,
            'losses/kl_loss': kl_cost,
            'losses/kl_bits': kl_div / tf.math.log(2.0),
            'losses/kl_beta': beta,
        }
        return metric_map, scalars_to_summarize
コード例 #6
0
ファイル: BetLearn.py プロジェクト: CriMenghini/DrBC
    def BuildNet(self):
        # [node_feat_dim, embed_dim]
        w_n2l = tf.Variable(
            tf.truncated_normal([node_feat_dim, self.embedding_size],
                                stddev=initialization_stddev),
            tf.float32,
            name="w_n2l")
        # [embed_dim, embed_dim]
        p_node_conv = tf.Variable(tf.truncated_normal(
            [self.embedding_size, self.embedding_size],
            stddev=initialization_stddev),
                                  tf.float32,
                                  name="p_node_conv")

        if combineID == 1:  # 'graphsage'
            # [embed_dim, embed_dim]
            p_node_conv2 = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                                       tf.float32,
                                       name="p_node_conv2")
            # [2*embed_dim, embed_dim]
            p_node_conv3 = tf.Variable(tf.truncated_normal(
                [2 * self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                                       tf.float32,
                                       name="p_node_conv3")
        elif combineID == 2:  #GRU
            w_r = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                              tf.float32,
                              name="w_r")
            u_r = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                              tf.float32,
                              name="u_r")
            w_z = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                              tf.float32,
                              name="w_z")
            u_z = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                              tf.float32,
                              name="u_z")
            w = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                            tf.float32,
                            name="w")
            u = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                            tf.float32,
                            name="u")

        # [embed_dim, reg_hidden]
        h1_weight = tf.Variable(tf.truncated_normal(
            [self.embedding_size, self.reg_hidden],
            stddev=initialization_stddev),
                                tf.float32,
                                name="h1_weight")
        # [reg_hidden+aux_feat_dim, 1]
        h2_weight = tf.Variable(tf.truncated_normal(
            [self.reg_hidden + aux_feat_dim, 1], stddev=initialization_stddev),
                                tf.float32,
                                name="h2_weight")
        # [reg_hidden, 1]
        last_w = h2_weight

        # [node_cnt, node_feat_dim]
        node_size = tf.shape(self.n2nsum_param)[0]
        node_input = self.node_feat

        #[node_cnt, embed_dim]
        input_message = tf.matmul(tf.cast(node_input, tf.float32), w_n2l)

        lv = 0
        # [node_cnt, embed_dim], no sparse
        cur_message_layer = self.activation(input_message)
        cur_message_layer = tf.nn.l2_normalize(cur_message_layer, axis=1)

        if JK:  # # 1:max_pooling; 2:min_pooling; 3:mean_pooling; 4:LSTM with attention
            cur_message_layer_JK = cur_message_layer
        if JK == 4:  #LSTM init hidden layer
            w_r_JK = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                                 tf.float32,
                                 name="w_r_JK")
            u_r_JK = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                                 tf.float32,
                                 name="u_r_JK")
            w_z_JK = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                                 tf.float32,
                                 name="w_z_JK")
            u_z_JK = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                                 tf.float32,
                                 name="u_z_JK")
            w_JK = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                               tf.float32,
                               name="w_JK")
            u_JK = tf.Variable(tf.truncated_normal(
                [self.embedding_size, self.embedding_size],
                stddev=initialization_stddev),
                               tf.float32,
                               name="u_JK")
            #attention matrix
            JK_attention = tf.Variable(tf.truncated_normal(
                [self.embedding_size, 1], stddev=initialization_stddev),
                                       tf.float32,
                                       name="JK_attention")
            #attention list
            JK_attention_list = []
            JK_Hidden_list = []
            cur_message_layer_list = []
            cur_message_layer_list.append(cur_message_layer)
            JK_Hidden = tf.truncated_normal(tf.shape(cur_message_layer),
                                            stddev=initialization_stddev)

        # max_bp_iter steps of neighbor propagation
        while lv < max_bp_iter:
            lv = lv + 1
            # [node_cnt, node_cnt]*[node_cnt, embed_dim] = [node_cnt, embed_dim]
            n2npool = tf.sparse_tensor_dense_matmul(
                tf.cast(self.n2nsum_param, tf.float64),
                tf.cast(cur_message_layer, tf.float64))
            n2npool = tf.cast(n2npool, tf.float32)

            # [node_cnt, embed_dim] * [embedding, embedding] = [node_cnt, embed_dim], dense
            node_linear = tf.matmul(n2npool, p_node_conv)

            if combineID == 0:  # 'structure2vec'
                # [node_cnt, embed_dim] + [node_cnt, embed_dim] = [node_cnt, embed_dim], return tensed matrix
                merged_linear = tf.add(node_linear, input_message)
                # [node_cnt, embed_dim]
                cur_message_layer = self.activation(merged_linear)
                if JK == 1:
                    cur_message_layer_JK = tf.maximum(cur_message_layer_JK,
                                                      cur_message_layer)
                elif JK == 2:
                    cur_message_layer_JK = tf.minimum(cur_message_layer_JK,
                                                      cur_message_layer)
                elif JK == 3:
                    cur_message_layer_JK = tf.add(cur_message_layer_JK,
                                                  cur_message_layer)
                elif JK == 4:
                    cur_message_layer_list.append(cur_message_layer)

            elif combineID == 1:  # 'graphsage'
                # [node_cnt, embed_dim] * [embed_dim, embed_dim] = [node_cnt, embed_dim], dense
                cur_message_layer_linear = tf.matmul(
                    tf.cast(cur_message_layer, tf.float32), p_node_conv2)
                # [[node_cnt, embed_dim] [node_cnt, embed_dim]] = [node_cnt, 2*embed_dim], return tensed matrix
                merged_linear = tf.concat(
                    [node_linear, cur_message_layer_linear], 1)
                # [node_cnt, 2*embed_dim]*[2*embed_dim, embed_dim] = [node_cnt, embed_dim]
                cur_message_layer = self.activation(
                    tf.matmul(merged_linear, p_node_conv3))

                if JK == 1:
                    cur_message_layer_JK = tf.maximum(cur_message_layer_JK,
                                                      cur_message_layer)
                elif JK == 2:
                    cur_message_layer_JK = tf.minimum(cur_message_layer_JK,
                                                      cur_message_layer)
                elif JK == 3:
                    cur_message_layer_JK = tf.add(cur_message_layer_JK,
                                                  cur_message_layer)
                elif JK == 4:
                    cur_message_layer_list.append(cur_message_layer)

            elif combineID == 2:  #gru
                r_t = tf.nn.relu(
                    tf.add(tf.matmul(node_linear, w_r),
                           tf.matmul(cur_message_layer, u_r)))
                z_t = tf.nn.relu(
                    tf.add(tf.matmul(node_linear, w_z),
                           tf.matmul(cur_message_layer, u_z)))
                h_t = tf.nn.tanh(
                    tf.add(tf.matmul(node_linear, w),
                           tf.matmul(r_t * cur_message_layer, u)))
                cur_message_layer = (1 - z_t) * cur_message_layer + z_t * h_t
                cur_message_layer = tf.nn.l2_normalize(cur_message_layer,
                                                       axis=1)

                if JK == 1:
                    cur_message_layer_JK = tf.maximum(cur_message_layer_JK,
                                                      cur_message_layer)
                elif JK == 2:
                    cur_message_layer_JK = tf.minimum(cur_message_layer_JK,
                                                      cur_message_layer)
                elif JK == 3:
                    cur_message_layer_JK = tf.add(cur_message_layer_JK,
                                                  cur_message_layer)
                elif JK == 4:
                    cur_message_layer_list.append(cur_message_layer)

            cur_message_layer = tf.nn.l2_normalize(cur_message_layer, axis=1)

        if JK == 1 or JK == 2:
            cur_message_layer = cur_message_layer_JK
        elif JK == 3:
            cur_message_layer = cur_message_layer_JK / (max_bp_iter + 1)
        elif JK == 4:
            for X_value in cur_message_layer_list:
                #[node_cnt,embed_size]
                r_t_JK = tf.nn.relu(
                    tf.add(tf.matmul(X_value, w_r_JK),
                           tf.matmul(JK_Hidden, u_r_JK)))
                z_t_JK = tf.nn.relu(
                    tf.add(tf.matmul(X_value, w_z_JK),
                           tf.matmul(JK_Hidden, u_z_JK)))
                h_t_JK = tf.nn.tanh(
                    tf.add(tf.matmul(X_value, w_JK),
                           tf.matmul(r_t_JK * JK_Hidden, u_JK)))
                JK_Hidden = (1 - z_t_JK) * h_t_JK + z_t_JK * JK_Hidden
                JK_Hidden = tf.nn.l2_normalize(JK_Hidden, axis=1)
                #[max_bp_iter+1,node_cnt,embed_size]
                JK_Hidden_list.append(JK_Hidden)
                # [max_bp_iter+1,node_cnt,1] =  [node_cnt,embed_size]*[embed_size,1]=[node_cnt,1]
                attention = tf.nn.tanh(tf.matmul(JK_Hidden, JK_attention))
                JK_attention_list.append(attention)
                cur_message_layer = JK_Hidden

            # [max_bp_iter+1,node_cnt,1]
            JK_attentions = tf.reshape(JK_attention_list,
                                       [max_bp_iter + 1, node_size, 1])
            cofficient = tf.nn.softmax(JK_attentions, axis=0)
            JK_Hidden_list = tf.reshape(
                JK_Hidden_list,
                [max_bp_iter + 1, node_size, self.embedding_size])
            # [max_bpr_iter+1,node_cnt,1]* [max_bp_iter + 1,node_cnt,embed_size] = [max_bp_iter + 1,node_cnt,embed_size]
            #[max_bp_iter + 1,node_cnt,embed_size]
            result = cofficient * JK_Hidden_list
            cur_message_layer = tf.reduce_sum(result, 0)
            cur_message_layer = tf.reshape(cur_message_layer,
                                           [node_size, self.embedding_size])

        cur_message_layer = tf.nn.l2_normalize(cur_message_layer, axis=1)

        # node embedding, [node_cnt, embed_dim]
        embed_s_a = cur_message_layer

        # decoder, two-layer MLP
        hidden = tf.matmul(embed_s_a, h1_weight)
        last_output = self.activation(hidden)
        last_output = tf.concat([last_output, self.aux_feat], axis=1)
        betw_pred = tf.matmul(last_output, last_w)

        # [pair_size, 1]
        labels = tf.nn.embedding_lookup(
            self.label, self.pair_ids_src) - tf.nn.embedding_lookup(
                self.label, self.pair_ids_tgt)
        preds = tf.nn.embedding_lookup(
            betw_pred, self.pair_ids_src) - tf.nn.embedding_lookup(
                betw_pred, self.pair_ids_tgt)

        loss = self.pairwise_ranking_loss(preds, labels)
        trainStep = tf.train.AdamOptimizer(self.learning_rate).minimize(loss)

        return loss, trainStep, betw_pred, embed_s_a, tf.trainable_variables()
コード例 #7
0
def mask(config: configure_pretraining.PretrainingConfig,
         inputs: pretrain_data.Inputs,
         mask_prob,
         proposal_distribution=1.0,
         disallow_from_mask=None,
         already_masked=None):
    """Implementation of dynamic masking. The optional arguments aren't needed for
  BERT/ELECTRA and are from early experiments in "strategically" masking out
  tokens instead of uniformly at random.

  Args:
    config: configure_pretraining.PretrainingConfig
    inputs: pretrain_data.Inputs containing input input_ids/input_mask
    mask_prob: percent of tokens to mask
    proposal_distribution: for non-uniform masking can be a [B, L] tensor
                           of scores for masking each position.
    disallow_from_mask: a boolean tensor of [B, L] of positions that should
                        not be masked out
    already_masked: a boolean tensor of [B, N] of already masked-out tokens
                    for multiple rounds of masking
  Returns: a pretrain_data.Inputs with masking added
  """
    # Get the batch size, sequence length, and max masked-out tokens
    N = config.max_predictions_per_seq
    B, L = modeling.get_shape_list(inputs.input_ids)

    # Find indices where masking out a token is allowed
    vocab = tokenization.FullTokenizer(
        config.vocab_file, do_lower_case=config.do_lower_case).vocab
    candidates_mask = _get_candidates_mask(inputs, vocab, disallow_from_mask)

    # Set the number of tokens to mask out per example
    num_tokens = tf.cast(tf.reduce_sum(inputs.input_mask, -1), tf.float32)
    num_to_predict = tf.maximum(
        1, tf.minimum(N, tf.cast(tf.round(num_tokens * mask_prob), tf.int32)))
    masked_lm_weights = tf.cast(tf.sequence_mask(num_to_predict, N),
                                tf.float32)
    if already_masked is not None:
        masked_lm_weights *= (1 - already_masked)

    # Get a probability of masking each position in the sequence
    candidate_mask_float = tf.cast(candidates_mask, tf.float32)
    sample_prob = (proposal_distribution * candidate_mask_float)
    sample_prob /= tf.reduce_sum(sample_prob, axis=-1, keepdims=True)

    # Sample the positions to mask out
    sample_prob = tf.stop_gradient(sample_prob)
    sample_logits = tf.log(sample_prob)
    masked_lm_positions = tf.random.categorical(sample_logits,
                                                N,
                                                dtype=tf.int32)
    masked_lm_positions *= tf.cast(masked_lm_weights, tf.int32)

    # Get the ids of the masked-out tokens
    shift = tf.expand_dims(L * tf.range(B), -1)
    flat_positions = tf.reshape(masked_lm_positions + shift, [-1, 1])
    masked_lm_ids = tf.gather_nd(tf.reshape(inputs.input_ids, [-1]),
                                 flat_positions)
    masked_lm_ids = tf.reshape(masked_lm_ids, [B, -1])
    masked_lm_ids *= tf.cast(masked_lm_weights, tf.int32)

    masked_synonym_ids = tf.gather_nd(
        tf.reshape(inputs.synonym_ids, [B * L, -1]), flat_positions)
    masked_synonym_ids = tf.reshape(masked_synonym_ids, [B, N, -1])
    masked_synonym_ids *= tf.expand_dims(tf.cast(masked_lm_weights, tf.int32),
                                         -1)

    # Update the input ids
    replace_with_mask_positions = masked_lm_positions * tf.cast(
        tf.less(tf.random.uniform([B, N]), 0.85), tf.int32)
    inputs_ids, _ = scatter_update(inputs.input_ids,
                                   tf.fill([B, N], vocab["[MASK]"]),
                                   replace_with_mask_positions)

    return pretrain_data.get_updated_inputs(
        inputs,
        input_ids=tf.stop_gradient(inputs_ids),
        masked_lm_positions=masked_lm_positions,
        masked_lm_ids=masked_lm_ids,
        masked_synonym_ids=masked_synonym_ids,
        masked_lm_weights=masked_lm_weights)
コード例 #8
0
ファイル: mlv.py プロジェクト: LONG-9621/Stackedcapsule
  def build_train_graph(self,
                        inputs,
                        min_depth,
                        max_depth,
                        cube_res,
                        theta_res,
                        phi_res,
                        r_res,
                        scale_factors,
                        num_mpi_planes,
                        learning_rate=0.0001,
                        vgg_model_weights=None,
                        global_step=0,
                        depth_clip=20.0):
    """Construct the training computation graph.

    Args:
      inputs: dictionary of tensors (see 'input_data' below) needed for training
      min_depth: minimum depth for the PSV and MPI planes
      max_depth: maximum depth for the PSV and MPI planes
      cube_res: per-side cube resolution
      theta_res: environment map width
      phi_res: environment map height
      r_res: number of radii to use when sampling spheres for rendering
      scale_factors: downsampling factors of cubes relative to the coarsest
      num_mpi_planes: number of MPI planes to infer
      learning_rate: learning rate
      vgg_model_weights: vgg weights (needed when vgg loss is used)
      global_step: training iteration
      depth_clip: maximum depth for coarsest resampled volumes

    Returns:
      A train_op to be used for training.
    """
    with tf.name_scope('setup'):
      psv_planes = pj.inv_depths(min_depth, max_depth, num_mpi_planes)
      mpi_planes = pj.inv_depths(min_depth, max_depth, num_mpi_planes)

    with tf.name_scope('input_data'):

      tgt_image = inputs['tgt_image']
      ref_image = inputs['ref_image']
      src_images = inputs['src_images']
      env_image = inputs['env_image']

      ref_depth = inputs['ref_depth']

      tgt_pose = inputs['tgt_pose']
      ref_pose = inputs['ref_pose']
      src_poses = inputs['src_poses']
      env_pose = inputs['env_pose']

      intrinsics = inputs['intrinsics']

      _, _, _, num_source = src_poses.get_shape().as_list()

    with tf.name_scope('inference'):
      num_mpi_planes = tf.shape(mpi_planes)[0]
      pred = self.infer_mpi(src_images, ref_image, ref_pose, src_poses,
                            intrinsics, psv_planes)
      rgba_layers = pred['rgba_layers']
      psv = pred['psv']

    with tf.name_scope('synthesis'):
      output_image, output_alpha_acc, _ = self.mpi_render_view(
          rgba_layers, ref_pose, tgt_pose, mpi_planes, intrinsics)
    with tf.name_scope('environment_rendering'):
      mpi_gt = self.img2mpi(ref_image, ref_depth, mpi_planes)
      output_image_gt, _, _ = self.mpi_render_view(mpi_gt, ref_pose, tgt_pose,
                                                   mpi_planes, intrinsics)

      lightvols_gt, _, _, _, _ = self.predict_lighting_vol(
          mpi_gt,
          mpi_planes,
          intrinsics,
          cube_res,
          scale_factors,
          depth_clip=depth_clip)

      lightvols, lightvol_centers, \
      lightvol_side_lengths, \
      cube_rel_shapes, \
      cube_nest_inds = self.predict_lighting_vol(rgba_layers, mpi_planes,
                                                 intrinsics, cube_res,
                                                 scale_factors,
                                                 depth_clip=depth_clip)

      lightvols_out = nets.cube_net_multires(lightvols, cube_rel_shapes,
                                             cube_nest_inds)

      gt_envmap, gt_shells = self.render_envmap(lightvols_gt, lightvol_centers,
                                                lightvol_side_lengths,
                                                cube_rel_shapes, cube_nest_inds,
                                                ref_pose, env_pose, theta_res,
                                                phi_res, r_res)

      prenet_envmap, prenet_shells = self.render_envmap(
          lightvols, lightvol_centers, lightvol_side_lengths, cube_rel_shapes,
          cube_nest_inds, ref_pose, env_pose, theta_res, phi_res, r_res)

      output_envmap, output_shells = self.render_envmap(
          lightvols_out, lightvol_centers, lightvol_side_lengths,
          cube_rel_shapes, cube_nest_inds, ref_pose, env_pose, theta_res,
          phi_res, r_res)

    with tf.name_scope('loss'):
      # mask loss for pixels outside reference frustum
      loss_mask = tf.where(
          tf.equal(output_alpha_acc[Ellipsis, tf.newaxis], 0.0),
          tf.zeros_like(output_image[:, :, :, 0:1]),
          tf.ones_like(output_image[:, :, :, 0:1]))
      loss_mask = tf.stop_gradient(loss_mask)
      tf.summary.image('loss_mask', loss_mask)

      # helper functions for loss
      def compute_error(real, fake, mask):
        mask = tf.ones_like(real) * mask
        return tf.reduce_sum(mask * tf.abs(fake - real)) / (
            tf.reduce_sum(mask) + 1.0e-8)

      # Normalized VGG loss
      def downsample(tensor, ds):
        return tf.nn.avg_pool(tensor, [1, ds, ds, 1], [1, ds, ds, 1], 'SAME')

      def vgg_loss(tgt_image, output_image, loss_mask, vgg_weights):
        """VGG activation loss definition."""

        vgg_real = nets.build_vgg19(tgt_image * 255.0, vgg_weights)
        rescaled_output_image = output_image * 255.0
        vgg_fake = nets.build_vgg19(rescaled_output_image, vgg_weights)
        p0 = compute_error(vgg_real['input'], vgg_fake['input'], loss_mask)
        p1 = compute_error(vgg_real['conv1_2'], vgg_fake['conv1_2'],
                           loss_mask) / 2.6
        p2 = compute_error(vgg_real['conv2_2'], vgg_fake['conv2_2'],
                           downsample(loss_mask, 2)) / 4.8
        p3 = compute_error(vgg_real['conv3_2'], vgg_fake['conv3_2'],
                           downsample(loss_mask, 4)) / 3.7
        p4 = compute_error(vgg_real['conv4_2'], vgg_fake['conv4_2'],
                           downsample(loss_mask, 8)) / 5.6
        p5 = compute_error(vgg_real['conv5_2'], vgg_fake['conv5_2'],
                           downsample(loss_mask, 16)) * 10 / 1.5
        total_loss = p0 + p1 + p2 + p3 + p4 + p5
        return total_loss

      # rendered image loss
      render_loss = vgg_loss(tgt_image, output_image, loss_mask,
                             vgg_model_weights) / 100.0
      total_loss = render_loss

      # rendered envmap loss
      envmap_loss = vgg_loss(env_image, output_envmap[Ellipsis, :3],
                             tf.ones_like(env_image[Ellipsis, 0:1]),
                             vgg_model_weights) / 100.0

      # set envmap loss to 0 when only training mpi network (see paper)
      envmap_loss = tf.where(tf.greater(global_step, 240000), envmap_loss, 0.0)

      total_loss += envmap_loss

      # adversarial loss for envmap
      real_logit = nets.discriminator(env_image, scope='discriminator')
      fake_logit = nets.discriminator(
          output_envmap[Ellipsis, :3], scope='discriminator')
      adv_loss_list = []
      for i in range(len(fake_logit)):
        adv_loss_list.append(0.1 * -1.0 * tf.reduce_mean(fake_logit[i][-1]))
      adv_loss = tf.reduce_mean(adv_loss_list)
      real_loss_list = []
      fake_loss_list = []
      for i in range(len(fake_logit)):
        real_loss_list.append(
            -1.0 * tf.reduce_mean(tf.minimum(real_logit[i][-1] - 1, 0.0)))
        fake_loss_list.append(
            -1.0 *
            tf.reduce_mean(tf.minimum(-1.0 * fake_logit[i][-1] - 1, 0.0)))
      real_loss = tf.reduce_mean(real_loss_list)
      fake_loss = tf.reduce_mean(fake_loss_list)
      disc_loss = real_loss + fake_loss

      # set adv/disc losses to 0 until end of training
      adv_loss = tf.where(tf.greater(global_step, 690000), adv_loss, 0.0)
      disc_loss = tf.where(tf.greater(global_step, 690000), disc_loss, 0.0)

      tf.summary.scalar('loss_disc', disc_loss)
      tf.summary.scalar('loss_disc_real', real_loss)
      tf.summary.scalar('loss_disc_fake', fake_loss)
      tf.summary.scalar('loss_adv', adv_loss)

      total_loss += adv_loss

    with tf.name_scope('train_op'):
      train_variables = [
          var for var in tf.trainable_variables()
          if 'discriminator' not in var.name
      ]
      optim = tf.train.AdamOptimizer(learning_rate, epsilon=1e-4)
      grads_and_variables = optim.compute_gradients(
          total_loss, var_list=train_variables)
      grads = [gv[0] for gv in grads_and_variables]
      variables = [gv[1] for gv in grads_and_variables]

      def denan(x):
        return tf.where(tf.is_nan(x), tf.zeros_like(x), x)

      grads_clipped = [denan(g) for g in grads]
      grads_clipped, _ = tf.clip_by_global_norm(grads_clipped, 100.0)
      train_op = [optim.apply_gradients(zip(grads_clipped, variables))]
      tf.summary.scalar('gradient global norm', tf.linalg.global_norm(grads))
      tf.summary.scalar('clipped gradient global norm',
                        tf.linalg.global_norm(grads_clipped))

      d_variables = [
          var for var in tf.trainable_variables() if 'discriminator' in var.name
      ]
      optim_d = tf.train.AdamOptimizer(learning_rate, beta1=0.0)
      train_op.append(optim_d.minimize(disc_loss, var_list=d_variables))

    with tf.name_scope('envmap_gt'):
      tf.summary.image('envmap', gt_envmap)
      tf.summary.image('envmap_alpha', gt_envmap[Ellipsis, -1:])
      for i in range(len(gt_shells)):
        i_envmap = pj.over_composite(gt_shells[i])
        tf.summary.image('envmap_level_' + str(i), i_envmap)
    with tf.name_scope('envmap_prenet'):
      tf.summary.image('envmap', prenet_envmap)
      tf.summary.image('envmap_alpha', prenet_envmap[Ellipsis, -1:])
      for i in range(len(prenet_shells)):
        i_envmap = pj.over_composite(prenet_shells[i])
        tf.summary.image('envmap_level_' + str(i), i_envmap)
    with tf.name_scope('envmap_output'):
      tf.summary.image('envmap', output_envmap)
      tf.summary.image('envmap_alpha', output_envmap[Ellipsis, -1:])
      for i in range(len(output_shells)):
        i_envmap = pj.over_composite(output_shells[i])
        tf.summary.image('envmap_level_' + str(i), i_envmap)

    tf.summary.scalar('loss_total', total_loss)
    tf.summary.scalar('loss_render', render_loss)
    tf.summary.scalar('loss_envmap', envmap_loss)
    tf.summary.scalar('min_depth', min_depth)
    tf.summary.scalar('max_depth', max_depth)

    with tf.name_scope('level_stats'):
      for i in range(len(lightvols)):
        tf.summary.scalar('cube_side_length_' + str(i),
                          lightvol_side_lengths[i])
        tf.summary.scalar('cube_center_' + str(i), lightvol_centers[i][0, -1])

    # Source images
    for i in range(num_source):
      src_image = src_images[:, :, :, i * 3:(i + 1) * 3]
      tf.summary.image('image_src_%d' % i, src_image)
    # Output image
    tf.summary.image('image_output', output_image)
    tf.summary.image('image_output_Gt', output_image_gt)
    # Target image
    tf.summary.image('image_tgt', tgt_image)
    tf.summary.image('envmap_tgt', env_image)
    # Ref image
    tf.summary.image('image_ref', ref_image)
    # Predicted color and alpha layers, and PSV
    num_summ = 8  # number of plane summaries to show in tensorboard
    for i in range(num_summ):
      ind = tf.to_int32(i * num_mpi_planes / num_summ)
      rgb = rgba_layers[:, :, :, ind, :3]
      alpha = rgba_layers[:, :, :, ind, -1:]
      ref_plane = psv[:, :, :, ind, :3]
      source_plane = psv[:, :, :, ind, 3:6]
      tf.summary.image('layer_rgb_%d' % i, rgb)
      tf.summary.image('layer_alpha_%d' % i, alpha)
      tf.summary.image('layer_rgba_%d' % i, rgba_layers[:, :, :, ind, :])
      tf.summary.image('psv_avg_%d' % i, 0.5 * ref_plane + 0.5 * source_plane)
      tf.summary.image('psv_ref_%d' % i, ref_plane)
      tf.summary.image('psv_source_%d' % i, source_plane)

    return train_op
コード例 #9
0
ファイル: input_utils.py プロジェクト: zymale/tpu
def resize_and_crop_image_v2(image,
                             short_side,
                             long_side,
                             padded_size,
                             aug_scale_min=1.0,
                             aug_scale_max=1.0,
                             seed=1,
                             method=tf.image.ResizeMethod.BILINEAR):
    """Resizes the input image to output size (Faster R-CNN style).

  Resize and pad images given the specified short / long side length and the
  stride size.

  Here are the preprocessing steps.
  1. For a given image, keep its aspect ratio and first try to rescale the short
     side of the original image to `short_side`.
  2. If the scaled image after 1 has a long side that exceeds `long_side`, keep
     the aspect ratio and rescal the long side of the image to `long_side`.
  2. Pad the rescaled image to the padded_size.

  Args:
    image: a `Tensor` of shape [height, width, 3] representing an image.
    short_side: a scalar `Tensor` or `int` representing the desired short side
      to be rescaled to.
    long_side: a scalar `Tensor` or `int` representing the desired long side to
      be rescaled to.
    padded_size: a `Tensor` or `int` list/tuple of two elements representing
      [height, width] of the padded output image size. Padding will be applied
      after scaling the image to the desired_size.
    aug_scale_min: a `float` with range between [0, 1.0] representing minimum
      random scale applied to desired_size for training scale jittering.
    aug_scale_max: a `float` with range between [1.0, inf] representing maximum
      random scale applied to desired_size for training scale jittering.
    seed: seed for random scale jittering.
    method: function to resize input image to scaled image.

  Returns:
    output_image: `Tensor` of shape [height, width, 3] where [height, width]
      equals to `output_size`.
    image_info: a 2D `Tensor` that encodes the information of the image and the
      applied preprocessing. It is in the format of
      [[original_height, original_width], [desired_height, desired_width],
       [y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
      desired_width] is the actual scaled image size, and [y_scale, x_scale] is
      the scaling factor, which is the ratio of
      scaled dimension / original dimension.
  """
    with tf.name_scope('resize_and_crop_image_v2'):
        image_size = tf.cast(tf.shape(image)[0:2], tf.float32)

        scale_using_short_side = (short_side /
                                  tf.minimum(image_size[0], image_size[1]))
        scale_using_long_side = (long_side /
                                 tf.maximum(image_size[0], image_size[1]))

        scaled_size = tf.round(image_size * scale_using_short_side)
        scaled_size = tf.where(
            tf.greater(tf.maximum(scaled_size[0], scaled_size[1]), long_side),
            tf.round(image_size * scale_using_long_side), scaled_size)
        desired_size = scaled_size

        random_jittering = (aug_scale_min != 1.0 or aug_scale_max != 1.0)

        if random_jittering:
            random_scale = tf.random_uniform([],
                                             aug_scale_min,
                                             aug_scale_max,
                                             seed=seed)
            scaled_size = tf.round(random_scale * scaled_size)

        # Computes 2D image_scale.
        image_scale = scaled_size / image_size

        # Selects non-zero random offset (x, y) if scaled image is larger than
        # desired_size.
        if random_jittering:
            max_offset = scaled_size - desired_size
            max_offset = tf.where(tf.less(max_offset, 0),
                                  tf.zeros_like(max_offset), max_offset)
            offset = max_offset * tf.random_uniform([
                2,
            ], 0, 1, seed=seed)
            offset = tf.cast(offset, tf.int32)
        else:
            offset = tf.zeros((2, ), tf.int32)

        scaled_image = tf.image.resize_images(image,
                                              tf.cast(scaled_size, tf.int32),
                                              method=method)

        if random_jittering:
            scaled_image = scaled_image[offset[0]:offset[0] + desired_size[0],
                                        offset[1]:offset[1] +
                                        desired_size[1], :]

        output_image = tf.image.pad_to_bounding_box(scaled_image, 0, 0,
                                                    padded_size[0],
                                                    padded_size[1])

        image_info = tf.stack([
            image_size,
            tf.cast(desired_size, dtype=tf.float32), image_scale,
            tf.cast(offset, tf.float32)
        ])
        return output_image, image_info
コード例 #10
0
def _clip_by_global_norm(t_list, clip_norm, use_norm, name=None):
    """Clips values of multiple tensors by the ratio of the sum of their norms.
  Given a tuple or list of tensors `t_list`, and a clipping ratio `clip_norm`,
  this operation returns a list of clipped tensors `list_clipped`
  and the global norm (`global_norm`) of all tensors in `t_list`. The global
  norm is expected to be pre-computed and passed as use_norm.
  To perform the clipping, the values `t_list[i]` are set to:
      t_list[i] * clip_norm / max(global_norm, clip_norm)
  where:
      global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))
  If `clip_norm > global_norm` then the entries in `t_list` remain as they are,
  otherwise they're all shrunk by the global ratio.
  Any of the entries of `t_list` that are of type `None` are ignored.
  This is the correct way to perform gradient clipping (for example, see
  [Pascanu et al., 2012](http://arxiv.org/abs/1211.5063)
  ([pdf](http://arxiv.org/pdf/1211.5063.pdf))).
  However, it is slower than `clip_by_norm()` because all the parameters must be
  ready before the clipping operation can be performed.
  Args:
    t_list: A tuple or list of mixed `Tensors`, `IndexedSlices`, or None.
    clip_norm: A 0-D (scalar) `Tensor` > 0. The clipping ratio.
    use_norm: A 0-D (scalar) `Tensor` of type `float` (optional). The global
      norm to use. If not provided, `global_norm()` is used to compute the norm.
    name: A name for the operation (optional).
  Returns:
    list_clipped: A list of `Tensors` of the same type as `list_t`.
    global_norm: A 0-D (scalar) `Tensor` representing the global norm.
  Raises:
    TypeError: If `t_list` is not a sequence.
  """
    if not isinstance(t_list, collections.Sequence) or isinstance(
            t_list, six.string_types):
        raise TypeError('t_list should be a sequence')
    t_list = list(t_list)

    # Removed as use_norm should always be passed
    # if use_norm is None:
    #   use_norm = global_norm(t_list, name)

    with tf.name_scope(name, 'clip_by_global_norm',
                       t_list + [clip_norm]) as name:
        # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
        scale = clip_norm * tf.minimum(
            1.0 / use_norm,
            tf.ones([1], dtype=use_norm.dtype) / clip_norm)

        values = [
            tf.cast(
                tf.convert_to_tensor(
                    t.values if isinstance(t, tf.IndexedSlices) else t,
                    name='t_%d' % i,
                ),
                dtype=tf.float32,
            ) if t is not None else t for i, t in enumerate(t_list)
        ]

        values_clipped = []
        for i, v in enumerate(values):
            if v is None:
                values_clipped.append(None)
            else:
                with tf.colocate_with(v):
                    values_clipped.append(
                        tf.identity(v * scale, name='%s_%d' % (name, i)))

        list_clipped = [
            tf.IndexedSlices(c_v, t.indices, t.dense_shape) if isinstance(
                t, tf.IndexedSlices) else c_v
            for (c_v, t) in zip(values_clipped, t_list)
        ]

    return list_clipped, use_norm
コード例 #11
0
ファイル: mlv.py プロジェクト: LONG-9621/Stackedcapsule
  def predict_lighting_vol(self,
                           mpi,
                           planes,
                           intrinsics,
                           cube_res,
                           scale_factors,
                           depth_clip=20.0):
    """Predict lighting volumes from MPI.

    Args:
      mpi: input mpi
      planes: input mpi plane depths
      intrinsics: ref camera intrinsics
      cube_res: resolution of cube volume for lighting prediction
      scale_factors: scales for multiresolution cube sampling
      depth_clip: farthest depth (sets limits of coarsest cube)

    Returns:
      list of completed lighting volumes
    """

    batchsize = tf.shape(mpi)[0]

    max_depth = tf.minimum(planes[0], depth_clip)

    cube_side_lengths = [2.0 * max_depth]
    for i in range(len(scale_factors)):
      cube_side_lengths.append(2.0 * max_depth / scale_factors[i])

    # shape of each cube's footprint within the next coarser volume
    cube_rel_shapes = []
    for i in range(len(scale_factors)):
      if i == 0:
        i_rel_shape = cube_res // scale_factors[0]
      else:
        i_rel_shape = (cube_res * scale_factors[i - 1]) // scale_factors[i]
      cube_rel_shapes.append(i_rel_shape)

    cube_centers = [tf.zeros([batchsize, 3])]
    for i in range(len(scale_factors)):
      i_center_depth = (cube_side_lengths[i] / (cube_res - 1)) * (
          cube_rel_shapes[i] // 2)
      cube_centers.append(
          tf.concat([
              tf.zeros([batchsize, 2]), i_center_depth * tf.ones([batchsize, 1])
          ],
                    axis=1))

    cube_nest_inds = []
    for i in range(len(scale_factors)):
      if i == 0:
        i_nest_inds = [(cube_res - cube_rel_shapes[i]) // 2,
                       (cube_res - cube_rel_shapes[i]) // 2,
                       cube_res // 2 - cube_rel_shapes[i]]
      else:
        i_nest_inds = [(cube_res - cube_rel_shapes[i]) // 2,
                       (cube_res - cube_rel_shapes[i]) // 2,
                       cube_res - cube_rel_shapes[i]]
      cube_nest_inds.append(i_nest_inds)

    cube_list = []
    for i in range(len(cube_centers)):
      i_cube, _ = pj.mpi_resample_cube(mpi, cube_centers[i], intrinsics, planes,
                                       cube_side_lengths[i], cube_res)

      cube_list.append(i_cube)
    return cube_list, cube_centers, cube_side_lengths, cube_rel_shapes, cube_nest_inds
コード例 #12
0
def post_process_gradients(grads_and_vars, summaries, lr, clip_gradients,
                           larc_params):
    """Applies post processing to gradients, i.e. clipping, LARC, summaries."""
    if 'global_gradient_norm' in summaries:
        tf.summary.scalar('global_gradient_norm',
                          _global_norm_with_cast(grads_and_vars))

    # Optionally clip gradients by global norm.
    if clip_gradients is not None:
        grads_and_vars = _clip_gradients_by_norm(grads_and_vars,
                                                 clip_gradients)

    # Add histograms for variables, gradients and gradient norms.

    if 'global_gradient_norm' in summaries:
        for gradient, variable in grads_and_vars:
            if isinstance(gradient, tf.IndexedSlices):
                grad_values = gradient.values
            else:
                grad_values = gradient

            if isinstance(variable, tf.IndexedSlices):
                var_values = variable.values
            else:
                var_values = variable

            if grad_values is not None:
                var_name = variable.name.replace(':', '_')
                if 'gradients' in summaries:
                    # need to mask nans for automatic loss scaling
                    tf.summary.histogram('gradients/%s' % var_name,
                                         mask_nans(grad_values))
                if 'gradient_norm' in summaries:
                    tf.summary.scalar('gradient_norm/%s' % var_name,
                                      tf.norm(grad_values))
                if 'variables' in summaries:
                    tf.summary.histogram('variables/%s' % var_name, var_values)
                if 'variable_norm' in summaries:
                    tf.summary.scalar('variable_norm/%s' % var_name,
                                      tf.norm(var_values))

    if clip_gradients is not None and 'global_gradient_norm' in summaries:
        tf.summary.scalar(
            'global_clipped_gradient_norm',
            _global_norm_with_cast(grads_and_vars),
        )

    # LARC gradient re-scaling
    if larc_params is not None:
        check_params(
            config=larc_params,
            required_dict={'larc_eta': float},
            optional_dict={
                'larc_mode': ['clip', 'scale'],
                'min_update': float,
                'epsilon': float,
            },
        )
        larc_eta = larc_params['larc_eta']
        larc_mode = larc_params.get('larc_mode', 'clip')
        min_update = larc_params.get('min_update', 1e-7)
        eps = larc_params.get('epsilon', 1e-7)

        grads_and_vars_larc = [None] * len(grads_and_vars)
        for idx, (g, v) in enumerate(grads_and_vars):
            var_dtype = v.dtype
            v_norm = tf.norm(tensor=tf.cast(v, tf.float32), ord=2)
            g_norm = tf.norm(tensor=tf.cast(g, tf.float32), ord=2)

            if larc_mode == 'clip':
                larc_grad_update = tf.maximum(
                    larc_eta * v_norm / (lr * (g_norm + eps)), min_update)
                if 'larc_summaries' in summaries:
                    tf.summary.scalar(
                        'larc_clip_on/{}'.format(v.name),
                        tf.cast(tf.less(larc_grad_update, 1.0), tf.int32),
                    )
                larc_grad_update = tf.minimum(larc_grad_update, 1.0)
            else:
                larc_grad_update = tf.maximum(
                    larc_eta * v_norm / (g_norm + eps), min_update)
            larc_grad_update = tf.saturate_cast(larc_grad_update, var_dtype)
            grads_and_vars_larc[idx] = (larc_grad_update * g, v)

            # adding additional summary
            if 'larc_summaries' in summaries:
                tf.summary.scalar('larc_grad_update/{}'.format(v.name),
                                  larc_grad_update)
                tf.summary.scalar(
                    'larc_final_lr/{}'.format(v.name),
                    tf.cast(lr, var_dtype) * larc_grad_update,
                )
        grads_and_vars = grads_and_vars_larc
    return grads_and_vars
コード例 #13
0
ファイル: yolo.py プロジェクト: videetparekh/model-zoo-models
    def call(self, x):
        input_image, y_pred, y_true, true_boxes = x

        # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class]
        y_pred = tf.reshape(
            y_pred,
            tf.concat([tf.shape(y_pred)[:3],
                       tf.constant([3, -1])], axis=0))

        # initialize the masks
        object_mask = tf.expand_dims(y_true[..., 4], 4)

        # the variable to keep track of number of batches processed
        batch_seen = tf.Variable(0.)

        # compute grid factor and net factor
        grid_h = tf.shape(y_true)[1]
        grid_w = tf.shape(y_true)[2]
        grid_factor = tf.reshape(tf.cast([grid_w, grid_h], tf.float32),
                                 [1, 1, 1, 1, 2])

        net_h = tf.shape(input_image)[1]
        net_w = tf.shape(input_image)[2]
        net_factor = tf.reshape(tf.cast([net_w, net_h], tf.float32),
                                [1, 1, 1, 1, 2])
        """
        Adjust prediction
        """
        pred_box_xy = (self.cell_grid[:, :grid_h, :grid_w, :, :] +
                       tf.sigmoid(y_pred[..., :2]))  # sigma(t_xy) + c_xy
        pred_box_wh = y_pred[..., 2:4]  # t_wh
        pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]),
                                       4)  # adjust confidence
        pred_box_class = y_pred[..., 5:]  # adjust class probabilities
        """
        Adjust ground truth
        """
        true_box_xy = y_true[..., 0:2]  # (sigma(t_xy) + c_xy)
        true_box_wh = y_true[..., 2:4]  # t_wh
        true_box_conf = tf.expand_dims(y_true[..., 4], 4)
        true_box_class = tf.argmax(y_true[..., 5:], -1)
        """
        Compare each predicted box to all true boxes
        """
        # initially, drag all objectness of all boxes to 0
        conf_delta = pred_box_conf - 0

        # then, ignore the boxes which have good overlap with some true box
        true_xy = true_boxes[..., 0:2] / grid_factor
        true_wh = true_boxes[..., 2:4] / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4)
        pred_wh = tf.expand_dims(
            tf.exp(pred_box_wh) * self.anchors / net_factor, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)

        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)

        best_ious = tf.reduce_max(iou_scores, axis=4)
        conf_delta *= tf.expand_dims(
            tf.to_float(best_ious < self.ignore_thresh), 4)
        """
        Compute some online statistics
        """
        true_xy = true_box_xy / grid_factor
        true_wh = tf.exp(true_box_wh) * self.anchors / net_factor

        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        pred_xy = pred_box_xy / grid_factor
        pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        intersect_mins = tf.maximum(pred_mins, true_mins)
        intersect_maxes = tf.minimum(pred_maxes, true_maxes)
        intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        true_areas = true_wh[..., 0] * true_wh[..., 1]
        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = tf.truediv(intersect_areas, union_areas)
        iou_scores = object_mask * tf.expand_dims(iou_scores, 4)

        count = tf.reduce_sum(object_mask)
        count_noobj = tf.reduce_sum(1 - object_mask)
        detect_mask = tf.to_float((pred_box_conf * object_mask) >= 0.5)
        class_mask = tf.expand_dims(
            tf.to_float(tf.equal(tf.argmax(pred_box_class, -1),
                                 true_box_class)), 4)
        recall50 = tf.reduce_sum(
            tf.to_float(iou_scores >= 0.5) * detect_mask *
            class_mask) / (count + 1e-3)
        recall75 = tf.reduce_sum(
            tf.to_float(iou_scores >= 0.75) * detect_mask *
            class_mask) / (count + 1e-3)
        avg_iou = tf.reduce_sum(iou_scores) / (count + 1e-3)
        avg_obj = tf.reduce_sum(pred_box_conf * object_mask) / (count + 1e-3)
        avg_noobj = tf.reduce_sum(pred_box_conf *
                                  (1 - object_mask)) / (count_noobj + 1e-3)
        avg_cat = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3)
        """
        Warm-up training
        """
        batch_seen = tf.assign_add(batch_seen, 1.)

        true_box_xy, true_box_wh, xywh_mask = tf.cond(
            tf.less(batch_seen, self.warmup_batches + 1), lambda: [
                true_box_xy +
                (0.5 + self.cell_grid[:, :grid_h, :grid_w, :, :]) *
                (1 - object_mask), true_box_wh + tf.zeros_like(true_box_wh) *
                (1 - object_mask),
                tf.ones_like(object_mask)
            ], lambda: [true_box_xy, true_box_wh, object_mask])
        """
        Compare each true box to all anchor boxes
        """
        wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor
        wh_scale = tf.expand_dims(
            2 - wh_scale[..., 0] * wh_scale[..., 1],
            axis=4)  # the smaller the box, the bigger the scale

        xy_delta = xywh_mask * (pred_box_xy -
                                true_box_xy) * wh_scale * self.xywh_scale
        wh_delta = xywh_mask * (pred_box_wh -
                                true_box_wh) * wh_scale * self.xywh_scale
        conf_delta = object_mask * (
            pred_box_conf - true_box_conf) * self.obj_scale + (
                1 - object_mask) * conf_delta * self.noobj_scale
        class_delta = object_mask * \
                      tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \
                      self.class_scale

        loss_xy = tf.reduce_sum(tf.square(xy_delta), list(range(1, 5)))
        loss_wh = tf.reduce_sum(tf.square(wh_delta), list(range(1, 5)))
        loss_conf = tf.reduce_sum(tf.square(conf_delta), list(range(1, 5)))
        loss_class = tf.reduce_sum(class_delta, list(range(1, 5)))

        loss = loss_xy + loss_wh + loss_conf + loss_class

        if debug:
            loss = tf.Print(loss, [grid_h, avg_obj],
                            message='avg_obj \t\t',
                            summarize=1000)
            loss = tf.Print(loss, [grid_h, avg_noobj],
                            message='avg_noobj \t\t',
                            summarize=1000)
            loss = tf.Print(loss, [grid_h, avg_iou],
                            message='avg_iou \t\t',
                            summarize=1000)
            loss = tf.Print(loss, [grid_h, avg_cat],
                            message='avg_cat \t\t',
                            summarize=1000)
            loss = tf.Print(loss, [grid_h, recall50],
                            message='recall50 \t',
                            summarize=1000)
            loss = tf.Print(loss, [grid_h, recall75],
                            message='recall75 \t',
                            summarize=1000)
            loss = tf.Print(loss, [grid_h, count],
                            message='count \t',
                            summarize=1000)
            loss = tf.Print(loss, [
                grid_h,
                tf.reduce_sum(loss_xy),
                tf.reduce_sum(loss_wh),
                tf.reduce_sum(loss_conf),
                tf.reduce_sum(loss_class)
            ],
                            message='loss xy, wh, conf, class: \t',
                            summarize=1000)

        return loss * self.grid_scale
コード例 #14
0
ファイル: tensorflow_fn.py プロジェクト: shishouyuan/xingtian
def minimum(arg1, arg2):
    """Get min item."""
    return tf.minimum(arg1, arg2)
コード例 #15
0
ファイル: tflite_test.py プロジェクト: nibnus/NNEF-Tools
    def test_min(self):
        input1 = tf.placeholder(shape=(4, 32, 32, 3), dtype=tf.float32)
        input2 = tf.placeholder(shape=(4, 32, 32, 3), dtype=tf.float32)
        output = tf.minimum(input1, input2)

        self._test_conversion('min', [input1, input2], [output])
コード例 #16
0
    def call(self, inputs, prev_state):
        """Evaluates one timestep of the current neural stack cell.

    See section 3.4 of Grefenstette et al., 2015.

    Args:
      inputs: The inputs to the neural stack cell should be a tf.float32 tensor
        with shape [batch_size, embedding_size]
      prev_state: The NeuralStackState from the previous timestep.

    Returns:
      A tuple of the output of the stack as well as the new NeuralStackState.
    """
        batch_size = tf.shape(inputs)[0]

        # Call the controller and get controller interface values.
        with tf.control_dependencies([prev_state.read_strengths]):
            controller_output = self.call_controller(
                inputs, prev_state.read_values, prev_state.controller_state,
                batch_size)

        # Always write input values to memory regardless of push strength.
        # See Equation-1 in Grefenstette et al., 2015.
        new_memory_values = prev_state.memory_values + tf.reduce_sum(
            tf.expand_dims(controller_output.write_values, axis=2) *
            prev_state.write_strengths,
            axis=1)

        # Attenuate the read strengths of existing memory values depending on the
        # current pop strength.
        # See Equation-2 in Grefenstette et al., 2015.
        new_read_strengths = prev_state.read_strengths
        for h in range(self._num_read_heads - 1, -1, -1):
            new_read_strengths = tf.nn.relu(new_read_strengths - tf.nn.relu(
                tf.slice(controller_output.pop_strengths, [0, h, 0, 0],
                         [-1, 1, -1, -1]) -
                tf.expand_dims(tf.reduce_sum(
                    new_read_strengths * self.get_read_mask(h), axis=2),
                               axis=3)))

        # Combine all write heads and their associated push values into a single set
        # of read weights.
        new_read_strengths += tf.reduce_sum(controller_output.push_strengths *
                                            prev_state.write_strengths,
                                            axis=1,
                                            keep_dims=True)

        # Calculate the "top" value of the stack by looking at read strengths.
        # See Equation-3 in Grefenstette et al., 2015.
        new_read_values = tf.reduce_sum(
            tf.minimum(
                new_read_strengths,
                tf.nn.relu(1 - tf.expand_dims(tf.reduce_sum(
                    new_read_strengths * tf.concat([
                        self.get_read_mask(h)
                        for h in range(self._num_read_heads)
                    ],
                                                   axis=1),
                    axis=2),
                                              axis=3))) *
            tf.expand_dims(new_memory_values, axis=1),
            axis=2)

        # Temporarily split write strengths apart so they can be shifted in
        # different directions.
        write_strengths_by_head = tf.split(prev_state.write_strengths,
                                           self._num_write_heads,
                                           axis=1)
        # Shift the write strengths for each write head in the direction indicated
        # by get_write_head_offset().
        new_write_strengths = tf.concat([
            tf.roll(
                write_strength, shift=self.get_write_head_offset(h), axis=2)
            for h, write_strength in enumerate(write_strengths_by_head)
        ],
                                        axis=1)

        return (controller_output.outputs,
                NeuralStackState(controller_state=controller_output.state,
                                 read_values=new_read_values,
                                 memory_values=new_memory_values,
                                 read_strengths=new_read_strengths,
                                 write_strengths=new_write_strengths))
コード例 #17
0
def features_to_nonpadding(features, inputs_or_targets="inputs"):
  """See transformer.features_to_nonpadding."""
  key = inputs_or_targets + "_segmentation"
  if features and key in features:
    return tf.minimum(tf.to_float(features[key]), 1.0)
  return None
コード例 #18
0
    def __init__(self,
                 session,
                 state_spec,
                 action_spec,
                 hidden_layers,
                 learning_rate,
                 learning_rate_action,
                 learning_rate_ga,
                 batch_size,
                 action_maximization_iterations,
                 name,
                 l2_loss_flag=False,
                 simple_lambda_flag=True,
                 solver=None,
                 sufficient_ascent_flag=False,
                 initial_lambda=10.0,
                 lambda_max=5e3):
        """Creates CAQL networks.

    Args:
      session: TF session.
      state_spec: tf_agents.specs.array_spec.ArraySpec. Specification for state.
      action_spec: tf_agents.specs.array_spec.ArraySpec. Specification for
        action.
      hidden_layers: list of integers. Number of hidden units for each hidden
        layer.
      learning_rate: float on Q function learning rate.
      learning_rate_action: float on action function learning rate.
      learning_rate_ga: float. Learning rate for gradient ascent optimizer.
      batch_size: int on batch size for training.
      action_maximization_iterations: int on CEM/gradient ascent iterations.
      name: string on name of network.
      l2_loss_flag: bool on using l2 loss.
      simple_lambda_flag: bool on using lambda hinge loss.
      solver: string on inner max optimizer. Supported optimizers are
        "gradient_ascent", "cross_entropy", "ails", "mip".
      sufficient_ascent_flag: bool on using sufficient ascent.
      initial_lambda: float on initial lambda (only for simple_lambda_flag).
      lambda_max: float on lambda upper-bound.
    """
        self._session = session
        self.state_spec = state_spec
        self.action_spec = action_spec
        self.state_dim = state_spec.shape[0]
        self.action_dim = action_spec.shape[0]
        self.action_max = action_spec.maximum
        self.action_min = action_spec.minimum
        self.hidden_layers = hidden_layers
        self.learning_rate = learning_rate
        self.learning_rate_action = learning_rate_action
        self.learning_rate_ga = learning_rate_ga
        self.batch_size = batch_size
        self.action_maximization_iterations = action_maximization_iterations

        self.name = name
        self.lambda_max = lambda_max
        if solver == "ails" or solver == "mip":
            raise ValueError("AILS and MIP solvers are not supported yet.")

        # define placeholders
        self._state_tensor = tf.placeholder(dtype=tf.float32,
                                            name="state_tensor",
                                            shape=(None, self.state_dim))
        self._state_deviation_tensor = tf.placeholder(
            dtype=tf.float32,
            name="state_deviation_tensor",
            shape=(None, self.state_dim))
        self._action_tensor = tf.placeholder(dtype=tf.float32,
                                             name="action_tensor",
                                             shape=(None, self.action_dim))
        self._next_state_tensor = tf.placeholder(dtype=tf.float32,
                                                 name="next_state_tensor",
                                                 shape=(None, self.state_dim))
        self._reward_tensor = tf.placeholder(dtype=tf.float32,
                                             name="reward_tensor",
                                             shape=(None, 1))
        self._done_tensor = tf.placeholder(dtype=tf.bool,
                                           name="done_tensor",
                                           shape=(None, 1))
        self._discount_factor = tf.placeholder(dtype=tf.float32,
                                               name="discounting_factor",
                                               shape=())
        self._maxq_label = tf.placeholder(dtype=tf.float32,
                                          shape=(None, 1),
                                          name="maxq_label")

        self._backup_tensor = self._reward_tensor + (1.0 - tf.to_float(
            self._done_tensor)) * self._discount_factor * self._maxq_label

        self._true_label = tf.placeholder(dtype=tf.float32,
                                          shape=(None, 1),
                                          name="true_label")

        self.q_function_network = self._build_q_function_net(
            self._state_tensor, self._action_tensor)
        self.state_perturbed_q_function_network = self.q_function_network \
            + tf.expand_dims(tf.einsum("ij,ij->i",
                                       tf.gradients(self.q_function_network,
                                                    self._state_tensor)[0],
                                       self._state_deviation_tensor),
                             axis=-1)

        self._td_rmse = tf.sqrt(
            tf.losses.mean_squared_error(
                self._reward_tensor + (1.0 - tf.to_float(self._done_tensor)) *
                self._discount_factor * self._maxq_label,
                self.q_function_network))

        if simple_lambda_flag:
            with tf.variable_scope("{}_{}".format(self.name,
                                                  "lambda_function")):
                lambda_var = tf.Variable(initial_value=initial_lambda,
                                         trainable=True,
                                         name="lambda_var")
                self.lambda_function_network = tf.tile(
                    tf.reshape(
                        tf.minimum(lambda_max,
                                   tf.maximum(0.0, lambda_var),
                                   name="lambda_proj"), (-1, 1)),
                    (self.batch_size, 1))
        else:
            self.lambda_function_network = self._build_lambda_function_net(
                self._state_tensor, self._action_tensor)

        # define loss
        if l2_loss_flag:
            self._q_function_loss = tf.losses.mean_squared_error(
                self._true_label, self.q_function_network)
        else:
            self._q_function_loss = tf.reduce_mean(
                self.q_function_network + self.lambda_function_network *
                tf.maximum(0.0, self._true_label - self.q_function_network))

        self._lambda_function_loss = tf.reduce_mean(
            -self.lambda_function_network *
            (self._true_label - self.q_function_network))

        # Action network to learn argmax of Q
        self._best_q_label = tf.placeholder(dtype=tf.float32,
                                            shape=(None, 1),
                                            name="best_q_label")

        # create network placeholders
        self._create_network_var_ph()

        self.action_function_network = self._build_action_function_net(
            self._state_tensor)
        self.dummy_q_function_network = self._build_q_function_net(
            self._state_tensor, self.action_function_network)

        self._action_function_loss = tf.losses.mean_squared_error(
            self._best_q_label, self.dummy_q_function_network)

        # optimizer
        # NOTE: Increment this by one by inlcuding it only in main_q trainer.
        global_step = tf.Variable(0,
                                  name="{}_global_step".format(self.name),
                                  trainable=False)
        with tf.variable_scope("{}_{}".format(self.name, "optimizer")):
            self._action_function_optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(
                    self._action_function_loss,
                    var_list=tf.trainable_variables("{}_{}".format(
                        self.name, "action_function")))
            self._q_function_optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(
                    self._q_function_loss,
                    global_step=global_step,
                    var_list=tf.trainable_variables("{}_{}".format(
                        self.name, "q_function")))
            self._lambda_function_optimizer = tf.train.AdamOptimizer(
                learning_rate=self.learning_rate).minimize(
                    self._lambda_function_loss,
                    var_list=tf.trainable_variables("{}_{}".format(
                        self.name, "lambda_function")))

        # Tensors for dual solvers
        self._create_dual_maxq_label_tensor()
        self._create_dual_active_constraint_condition_tensor()

        self.solver = solver
        self.sufficient_ascent_flag = sufficient_ascent_flag
コード例 #19
0
def resize_crop_pad(image,
                    desired_output_size,
                    stride,
                    aug_scale_min=1.0,
                    aug_scale_max=1.0,
                    boxes=None,
                    classes=None,
                    attributes=None,
                    masks=None,
                    crop_mask_size=112):
    """Resize, crop and pad images, boxes and masks (RetinaNet style).

  Resize, crop and pad images, (optionally boxes and masks) given the desired
  output size of the image and the stride size.

  Here are the preprocessing steps.
  1. For a given image, keep its aspect ratio and rescale the image to make it
     the largest rectangle to be bounded by the rectangle specified by the
     `desired_output_size`.
  2. Pad the rescaled image such that the height and width of the image become
     the smallest multiple of the stride that is larger or equal to the desired
     output diemension.

  Args:
    image: an image tensor of shape [original_height, original_width, 3].
    desired_output_size: a tuple of two integers indicating the desired output
      image size. Note that the actual output size could be different from this.
    stride: the stride of the backbone network. Each of the output image sides
      must be the multiple of this.
    aug_scale_min: a `float` with range between [0, 1.0] representing minimum
      random scale applied to desired_size for training scale jittering.
    aug_scale_max: a `float` with range between [1.0, inf] representing maximum
      random scale applied to desired_size for training scale jittering.
    boxes: (Optional) a tensor of shape [num_boxes, 4] represneting the box
      corners in normalized coordinates.
    classes: (Optional) a tensor of shape [num_boxes] representing the box
      classes.
    masks: (Optional) a tensor of shape [num_boxes, image_height, image_width]
      representing the instance masks which have the same shape as the input
      image.
    crop_mask_size: an integer indicating the size of the cropped mask.

  Returns:
    image: the processed image tensor after being resized and padded.
    image_info: a tensor of shape [5] which encodes the height, width before
      and after resizing and the scaling factor.
    boxes: None or the processed box tensor after being resized and padded.
      After the processing, boxes will be in the absolute coordinates w.r.t.
      the scaled image.
    classes: None or the processed class tensor after boxes being resized and
      filtered.
    masks: None or the processed mask tensor after being resized.
  """
    if boxes is not None:
        assert classes is not None

    input_shape = tf.shape(image)
    input_height = tf.cast(input_shape[0], dtype=tf.float32)
    input_width = tf.cast(input_shape[1], dtype=tf.float32)
    desired_height, desired_width = desired_output_size

    # Find the scale factor such that the scaled image is surrounded by the
    # rectangle of shape of desired_output_size.
    scale_if_resize_height = desired_height / input_height
    scale_if_resize_width = desired_width / input_width
    scale = tf.minimum(scale_if_resize_height, scale_if_resize_width)
    desired_scaled_height = scale * input_height
    desired_scaled_width = scale * input_width
    desired_scaled_size = tf.stack(
        [desired_scaled_height, desired_scaled_width], axis=0)

    random_jittering = aug_scale_min != 1.0 or aug_scale_max != 1.0

    if random_jittering:
        random_scale = tf.random_uniform([], aug_scale_min, aug_scale_max)
        scale = random_scale * scale
        scaled_size = tf.round(random_scale * desired_scaled_size)
    else:
        scaled_size = desired_scaled_size
    scaled_size_int = tf.cast(scaled_size, dtype=tf.int32)
    desired_scaled_size_int = tf.cast(desired_scaled_size, dtype=tf.int32)

    image = tf.image.resize_images(image,
                                   scaled_size_int,
                                   method=tf.image.ResizeMethod.BILINEAR)

    if boxes is not None:
        normalized_boxes = boxes
        # Convert the normalized coordinates to the coordinates w.r.t.
        # the scaled image.
        boxes = boxes * tf.tile(tf.expand_dims(scaled_size, axis=0), [1, 2])

        if masks is not None and not random_jittering:
            num_instances = tf.shape(boxes)[0]
            masks = tf.image.crop_and_resize(
                image=tf.expand_dims(masks, axis=-1),
                boxes=normalized_boxes,
                box_indices=tf.range(num_instances, dtype=tf.int32),
                crop_size=[crop_mask_size, crop_mask_size],
                method='bilinear')
            masks = tf.squeeze(masks, axis=-1)

    if random_jittering:
        max_offset = scaled_size - desired_scaled_size
        max_offset = tf.where(tf.less(max_offset, 0),
                              tf.zeros_like(max_offset), max_offset)
        offset = tf.cast(max_offset * tf.random_uniform((2, ), 0, 1),
                         dtype=tf.int32)

        image = image[offset[0]:offset[0] + desired_scaled_size_int[0],
                      offset[1]:offset[1] + desired_scaled_size_int[1], :]

        if boxes is not None:
            box_offsets = tf.cast(tf.tile(tf.expand_dims(offset, axis=0),
                                          [1, 2]),
                                  dtype=tf.float32)
            boxes -= box_offsets
            boxes = box_utils.clip_boxes(boxes, desired_scaled_size_int[0],
                                         desired_scaled_size_int[1])
            indices = tf.where(
                tf.logical_and(tf.greater(boxes[:, 2] - boxes[:, 0], 0),
                               tf.greater(boxes[:, 3] - boxes[:, 1], 0)))[:, 0]
            boxes = tf.gather(boxes, indices)
            classes = tf.gather(classes, indices)
            if attributes is not None:
                attributes = tf.gather(attributes, indices)
            if masks is not None:
                masks = tf.gather(masks, indices)

                # Convert the processed boxes back to the normalized coordinates w.r.t.
                # the original image in order to crop and resize the instance masks.
                cropped_boxes = boxes + box_offsets
                cropped_boxes /= tf.tile(tf.expand_dims(scaled_size, axis=0),
                                         [1, 2])

                num_instances = tf.shape(boxes)[0]
                masks = tf.image.crop_and_resize(
                    image=tf.expand_dims(masks, axis=-1),
                    boxes=cropped_boxes,
                    box_indices=tf.range(num_instances, dtype=tf.int32),
                    crop_size=[crop_mask_size, crop_mask_size],
                    method='bilinear')
                masks = tf.squeeze(masks, axis=-1)

    # Pad image such that its height and width are the closest multiple of stride.
    padded_height = int(math.ceil(desired_height * 1.0 / stride) * stride)
    padded_width = int(math.ceil(desired_width * 1.0 / stride) * stride)
    image = tf.image.pad_to_bounding_box(image, 0, 0, padded_height,
                                         padded_width)
    image.set_shape([padded_height, padded_width, 3])

    # desired_scaled_size is the actual image size. Pixels beyond this are from
    # padding.
    image_info = tf.stack([
        desired_scaled_size[0], desired_scaled_size[1], 1.0 / scale,
        input_height, input_width
    ])

    return image, image_info, boxes, classes, attributes, masks
コード例 #20
0
    def _create_dual_maxq_label_tensor(self, method="duality_based"):
        """Approximate the maxq label with dual."""
        w_transpose_list = []
        b_transpose_list = []
        num_layers = 1

        for itr, var in enumerate(self._vars_tf):
            if itr % 2 == 0:
                # even itr, multiplicative weights
                if itr == 0:
                    wx_transpose = self._dummy_network_var_ph["{}_ph".format(
                        var.name)][:self.state_dim, :]
                    w_transpose_list.append(
                        self._dummy_network_var_ph["{}_ph".format(
                            var.name)][self.state_dim:, :])

                else:
                    w_transpose_list.append(
                        self._dummy_network_var_ph["{}_ph".format(var.name)])
                num_layers += 1

            else:
                # odd itr, additive weights
                if itr == 1:
                    b_transpose_list.append(
                        tf.tile(
                            tf.expand_dims(self._dummy_network_var_ph[
                                "{}_ph".format(var.name)],
                                           axis=0), [self.batch_size, 1]) +
                        tf.matmul(self._next_state_tensor, wx_transpose))
                else:
                    b_transpose_list.append(
                        tf.tile(
                            tf.expand_dims(
                                self._dummy_network_var_ph["{}_ph".format(
                                    var.name)],
                                axis=0), [self.batch_size, 1]))

        action_tensor_center = tf.zeros(
            shape=[self.batch_size, self.action_dim])

        l_infty_norm_bound = np.max(self.action_max)
        if method == "duality_based":
            self.dual_maxq_tensor = dual_method.create_dual_approx(
                num_layers, self.batch_size, l_infty_norm_bound,
                w_transpose_list, b_transpose_list, action_tensor_center)
        elif method == "ibp":
            # ibp dual solver
            self.dual_maxq_tensor = dual_ibp_method.create_dual_ibp_approx(
                num_layers, self.batch_size, l_infty_norm_bound,
                w_transpose_list, b_transpose_list, action_tensor_center)
        else:
            # mix method
            dual_maxq_tensor = dual_method.create_dual_approx(
                num_layers, self.batch_size, l_infty_norm_bound,
                w_transpose_list, b_transpose_list, action_tensor_center)
            dual_ibp_maxq_tensor = dual_ibp_method.create_dual_ibp_approx(
                num_layers, self.batch_size, l_infty_norm_bound,
                w_transpose_list, b_transpose_list, action_tensor_center)
            # minimum of the upper-bound
            self.dual_maxq_tensor = tf.minimum(dual_maxq_tensor,
                                               dual_ibp_maxq_tensor)
コード例 #21
0
    def get(self):
        """ Provides input data to the graph. """
        # calculate size of each record (this lists what is contained in the db and how many bytes are occupied)
        record_bytes = 0

        encoding_bytes = 4
        kp_xyz_entries = 3 * self.num_kp
        record_bytes += encoding_bytes*kp_xyz_entries

        encoding_bytes = 4
        kp_uv_entries = 2 * self.num_kp
        record_bytes += encoding_bytes*kp_uv_entries

        kp_vis_entries = self.num_kp
        record_bytes += encoding_bytes*kp_vis_entries

        image_bytes = self.image_size[0] * self.image_size[1] * 3
        record_bytes += image_bytes

        """ READ DATA ITEMS"""
        # Start reader
        reader = tf.FixedLengthRecordReader(header_bytes=0, record_bytes=record_bytes)
        _, value = reader.read(tf.train.string_input_producer([self.path_to_db]))

        # decode to floats
        bytes_read = 0
        data_dict = dict()
        record_bytes_float32 = tf.decode_raw(value, tf.float32)

        # 1. Read keypoint xyz
        keypoint_xyz21 = tf.reshape(tf.slice(record_bytes_float32, [bytes_read//4], [kp_xyz_entries]), [self.num_kp, 3])
        bytes_read += encoding_bytes*kp_xyz_entries
        keypoint_xyz21 /= 1000.0  # scale to meters
        keypoint_xyz21 = self.convert_kp(keypoint_xyz21)

        # calculate wrist coord
        if self.use_wrist_coord:
            wrist_xyz = keypoint_xyz21[16, :] + 2.0*(keypoint_xyz21[0, :] - keypoint_xyz21[16, :])
            keypoint_xyz21 = tf.concat([tf.expand_dims(wrist_xyz, 0),
                                        keypoint_xyz21[1:, :]], 0)

        data_dict['keypoint_xyz21'] = keypoint_xyz21

        # 2. Read keypoint uv AND VIS
        keypoint_uv_vis21 = tf.reshape(tf.slice(record_bytes_float32, [bytes_read//4], [kp_uv_entries+kp_vis_entries]), [self.num_kp, 3])
        bytes_read += encoding_bytes*(kp_uv_entries+kp_vis_entries)
        keypoint_uv_vis21 = self.convert_kp(keypoint_uv_vis21)
        keypoint_uv21 = keypoint_uv_vis21[:, :2]
        keypoint_vis21 = tf.equal(keypoint_uv_vis21[:, 2], 1.0)

        # calculate wrist vis
        if self.use_wrist_coord:
            wrist_vis = tf.logical_or(keypoint_vis21[16], keypoint_vis21[0])
            keypoint_vis21 = tf.concat([tf.expand_dims(wrist_vis, 0),
                                        keypoint_vis21[1:]], 0)

            wrist_uv = keypoint_uv21[16, :] + 2.0*(keypoint_uv21[0, :] - keypoint_uv21[16, :])
            keypoint_uv21 = tf.concat([tf.expand_dims(wrist_uv, 0),
                                       keypoint_uv21[1:, :]], 0)

        data_dict['keypoint_vis21'] = keypoint_vis21

        if self.coord_uv_noise:
            noise = tf.truncated_normal([42, 2], mean=0.0, stddev=self.coord_uv_noise_sigma)
            keypoint_uv21 += noise

        data_dict['keypoint_uv21'] = keypoint_uv21

        # decode to uint8
        record_bytes_uint8 = tf.decode_raw(value, tf.uint8)

        # 4. Read image
        image = tf.reshape(tf.slice(record_bytes_uint8, [bytes_read], [image_bytes]),
                               [self.image_size[0], self.image_size[1], 3])
        image = tf.cast(image, tf.float32)
        bytes_read += image_bytes

        # subtract mean
        image = image / 255.0 - 0.5
        if self.hue_aug:
            image = tf.image.random_hue(image, self.hue_aug_max)
        data_dict['image'] = image

        """ CONSTANTS """
        # Camera intrinsics
        sx = 822.79041
        sy = 822.79041
        tx = 318.47345
        ty = 250.31296
        data_dict['cam_mat'] = tf.constant([[sx, 0.0, tx], [0.0, sy, ty], [0.0, 0.0, 1.0]])

        # Hand side: this dataset only contains left hands
        data_dict['hand_side'] = tf.one_hot(tf.constant(0, dtype=tf.int32), depth=2, on_value=1.0, off_value=0.0, dtype=tf.float32)

        assert bytes_read == record_bytes, "Doesnt add up."

        """ DEPENDENT DATA ITEMS: XYZ represenations. """
        # make coords relative to root joint
        kp_coord_xyz_root = keypoint_xyz21[0, :] # this is the palm coord
        kp_coord_xyz21_rel = keypoint_xyz21 - kp_coord_xyz_root  # relative coords in metric coords
        index_root_bone_length = tf.sqrt(tf.reduce_sum(tf.square(kp_coord_xyz21_rel[12, :] - kp_coord_xyz21_rel[11, :])))
        data_dict['keypoint_scale'] = index_root_bone_length
        data_dict['keypoint_xyz21_normed'] = kp_coord_xyz21_rel / index_root_bone_length  # normalized by length of 12->11

        # calculate local coordinates
        kp_coord_xyz21_local = bone_rel_trafo(data_dict['keypoint_xyz21_normed'])
        kp_coord_xyz21_local = tf.squeeze(kp_coord_xyz21_local)
        data_dict['keypoint_xyz21_local'] = kp_coord_xyz21_local

        # calculate viewpoint and coords in canonical coordinates
        kp_coord_xyz21_rel_can, rot_mat = canonical_trafo(data_dict['keypoint_xyz21_normed'])
        kp_coord_xyz21_rel_can, rot_mat = tf.squeeze(kp_coord_xyz21_rel_can), tf.squeeze(rot_mat)
        data_dict['keypoint_xyz21_can'] = kp_coord_xyz21_rel_can
        data_dict['rot_mat'] = tf.matrix_inverse(rot_mat)

        """ DEPENDENT DATA ITEMS: HAND CROP """
        if self.hand_crop:
            crop_center = keypoint_uv21[12, ::-1]

            # catch problem, when no valid kp available (happens almost never)
            crop_center = tf.cond(tf.reduce_all(tf.is_finite(crop_center)), lambda: crop_center,
                                  lambda: tf.constant([0.0, 0.0]))
            crop_center.set_shape([2, ])

            if self.crop_center_noise:
                noise = tf.truncated_normal([2], mean=0.0, stddev=self.crop_center_noise_sigma)
                crop_center += noise

            crop_scale_noise = tf.constant(1.0)
            if self.crop_scale_noise:
                    crop_scale_noise = tf.squeeze(tf.random_uniform([1], minval=1.0, maxval=1.2))

            if not self.use_wrist_coord:
                wrist_uv = keypoint_uv21[16, :] + 2.0*(keypoint_uv21[0, :] - keypoint_uv21[16, :])
                keypoint_uv21 = tf.concat([tf.expand_dims(wrist_uv, 0),
                                           keypoint_uv21[1:, :]], 0)

            # select visible coords only
            kp_coord_h = tf.boolean_mask(keypoint_uv21[:, 1], keypoint_vis21)
            kp_coord_w = tf.boolean_mask(keypoint_uv21[:, 0], keypoint_vis21)
            kp_coord_hw = tf.stack([kp_coord_h, kp_coord_w], 1)

            # determine size of crop (measure spatial extend of hw coords first)
            min_coord = tf.maximum(tf.reduce_min(kp_coord_hw, 0), 0.0)
            max_coord = tf.minimum(tf.reduce_max(kp_coord_hw, 0), self.image_size)

            # find out larger distance wrt the center of crop
            crop_size_best = 2*tf.maximum(max_coord - crop_center, crop_center - min_coord)
            crop_size_best = tf.reduce_max(crop_size_best)
            crop_size_best = tf.minimum(tf.maximum(crop_size_best, 50.0), 500.0)

            # catch problem, when no valid kp available
            crop_size_best = tf.cond(tf.reduce_all(tf.is_finite(crop_size_best)), lambda: crop_size_best,
                                  lambda: tf.constant(200.0))
            crop_size_best.set_shape([])

            # calculate necessary scaling
            scale = tf.cast(self.crop_size, tf.float32) / crop_size_best
            scale = tf.minimum(tf.maximum(scale, 1.0), 10.0)
            scale *= crop_scale_noise
            data_dict['crop_scale'] = scale

            if self.crop_offset_noise:
                noise = tf.truncated_normal([2], mean=0.0, stddev=self.crop_offset_noise_sigma)
                crop_center += noise

            # Crop image
            img_crop = crop_image_from_xy(tf.expand_dims(image, 0), crop_center, self.crop_size, scale)
            data_dict['image_crop'] = tf.squeeze(img_crop)

            # Modify uv21 coordinates
            crop_center_float = tf.cast(crop_center, tf.float32)
            keypoint_uv21_u = (data_dict['keypoint_uv21'][:, 0] - crop_center_float[1]) * scale + self.crop_size // 2
            keypoint_uv21_v = (data_dict['keypoint_uv21'][:, 1] - crop_center_float[0]) * scale + self.crop_size // 2
            keypoint_uv21 = tf.stack([keypoint_uv21_u, keypoint_uv21_v], 1)
            data_dict['keypoint_uv21'] = keypoint_uv21

            # Modify camera intrinsics
            scale = tf.reshape(scale, [1, ])
            scale_matrix = tf.dynamic_stitch([[0], [1], [2],
                                              [3], [4], [5],
                                              [6], [7], [8]], [scale, [0.0], [0.0],
                                                               [0.0], scale, [0.0],
                                                               [0.0], [0.0], [1.0]])
            scale_matrix = tf.reshape(scale_matrix, [3, 3])

            crop_center_float = tf.cast(crop_center, tf.float32)
            trans1 = crop_center_float[0] * scale - self.crop_size // 2
            trans2 = crop_center_float[1] * scale - self.crop_size // 2
            trans1 = tf.reshape(trans1, [1, ])
            trans2 = tf.reshape(trans2, [1, ])
            trans_matrix = tf.dynamic_stitch([[0], [1], [2],
                                              [3], [4], [5],
                                              [6], [7], [8]], [[1.0], [0.0], -trans2,
                                                               [0.0], [1.0], -trans1,
                                                               [0.0], [0.0], [1.0]])
            trans_matrix = tf.reshape(trans_matrix, [3, 3])

            data_dict['cam_mat'] = tf.matmul(trans_matrix, tf.matmul(scale_matrix, data_dict['cam_mat']))

        """ DEPENDENT DATA ITEMS: Scoremap from the SUBSET of 21 keypoints"""
        # create scoremaps from the subset of 2D annoataion
        keypoint_hw21 = tf.stack([keypoint_uv21[:, 1], keypoint_uv21[:, 0]], -1)

        scoremap_size = self.image_size
        
        if self.hand_crop:
            scoremap_size = (self.crop_size, self.crop_size)

        scoremap = self.create_multiple_gaussian_map(keypoint_hw21,
                                                     scoremap_size,
                                                     self.sigma,
                                                     valid_vec=keypoint_vis21)
        
        if self.scoremap_dropout:
            scoremap = tf.nn.dropout(scoremap, self.scoremap_dropout_prob,
                                        noise_shape=[1, 1, 21])
            scoremap *= self.scoremap_dropout_prob

        data_dict['scoremap'] = scoremap

        if self.random_crop_to_size:
            tensor_stack = tf.concat([data_dict['image'],
                                      tf.expand_dims(tf.cast(data_dict['hand_parts'], tf.float32), -1),
                                      tf.cast(data_dict['hand_mask'], tf.float32)], 2)
            s = tensor_stack.get_shape().as_list()
            tensor_stack_cropped = tf.random_crop(tensor_stack,
                                                  [self.random_crop_size, self.random_crop_size, s[2]])
            data_dict = dict()  # delete everything else because the random cropping makes the data invalid anyway
            data_dict['image'], data_dict['hand_parts'], data_dict['hand_mask'] = tensor_stack_cropped[:, :, :3],\
                                                                                  tf.cast(tensor_stack_cropped[:, :, 3], tf.int32),\
                                                                                  tf.cast(tensor_stack_cropped[:, :, 4:], tf.int32)

        names, tensors = zip(*data_dict.items())

        if self.shuffle:
            tensors = tf.train.shuffle_batch_join([tensors],
                                                  batch_size=self.batch_size,
                                                  capacity=100,
                                                  min_after_dequeue=50,
                                                  enqueue_many=False)
        else:
            tensors = tf.train.batch_join([tensors],
                                          batch_size=self.batch_size,
                                          capacity=100,
                                          enqueue_many=False)

        return dict(zip(names, tensors))
コード例 #22
0
def vae_model_fn(features, labels, mode, params):
    # Build mtf_features & seq length dict for getting number of microbatches
    # We need to pack inputs into a dict to pass into serialize_training_step

    H = W = params["dataset"]["image_size"]  # TODO: check equal
    mode_str = mode_to_str(mode)
    batch_size = params[f"{mode_str}_batch_size"]
    n_channels = params.get("input_channels", 3)
    model = DiscreteVAE(num_tokens=params["num_tokens"],
                        dim=params["n_embd"],
                        hidden_dim=params["hidden_dim"],
                        input_channels=n_channels,
                        convblocks=params.get("convblocks", [(3, 64), (3, 128),
                                                             (3, 256)]),
                        recompute_grad=params.get("recompute_grad", False),
                        use_bf16=params.get("use_bf16", False),
                        stack_factor=params.get("stack_factor", 1),
                        dimensions=H)

    if mode == tf.estimator.ModeKeys.PREDICT:
        raise NotImplementedError

    train_gumbel = params.get("train_gumbel_hard", True)
    eval_gumbel = params.get("eval_gumbel_hard", True)

    # We're not predicting, so we better be training or evaluating
    assert (mode == tf.estimator.ModeKeys.TRAIN
            or mode == tf.estimator.ModeKeys.EVAL)

    gumbel = train_gumbel if mode == tf.estimator.ModeKeys.TRAIN else eval_gumbel

    if params.get("temp_anneal_steps", None):
        warmup_frac = tf.cast(tf.train.get_global_step(),
                              tf.float32) / params["temp_anneal_steps"]
        warmup_frac = tf.minimum(warmup_frac, tf.constant(1.0))
        temp = params["temp_start"] - warmup_frac * (params["temp_start"] -
                                                     params["temp"])
    else:
        temp = params.get("temp", 1.0)

    # TODO: add back in microbatching
    if params.get("use_bf16", False):
        with tf.tpu.bfloat16_scope():
            with tf.variable_scope("vae"):
                loss, reconstruction = model.forward(features,
                                                     return_recon_loss=True,
                                                     temperature=temp,
                                                     hard_gumbel=gumbel)
                loss = tf.cast(loss, tf.float32)
                reconstruction = tf.cast(reconstruction, tf.float32)
    else:
        with tf.variable_scope("vae"):
            loss, reconstruction = model.forward(features,
                                                 return_recon_loss=True,
                                                 temperature=temp,
                                                 hard_gumbel=gumbel)

    optimizer = tf.train.AdamOptimizer(learning_rate=params["lr"])
    optimizer = tf.tpu.CrossShardOptimizer(optimizer)

    global_step = tf.train.get_or_create_global_step()
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss, global_step)

    def host_call_fn(gs, loss, input, reconstruction):
        gs = gs[0]
        loss = tf.math.reduce_mean(loss)
        denormalize = lambda x: (x + 1) / 2

        with tf2.summary.create_file_writer(params['model_path']).as_default():
            tf2.summary.scalar('loss', loss, step=gs)
            tf2.summary.image('input_image', denormalize(input), step=gs)
            tf2.summary.image('reconstruction_image',
                              denormalize(reconstruction),
                              step=gs)

            return tf.summary.all_v2_summary_ops()

    def metric_fn(gs, loss, input, reconstruction):
        gs = gs[0]
        loss = tf.math.reduce_mean(loss)
        denormalize = lambda x: (x + 1) / 2

        with tf2.summary.create_file_writer(params['model_path']).as_default():
            loss_op = tf.metrics.mean(loss)

            with tf2.summary.record_if(loss_op[0] < tf.constant(1e-9)):
                tf2.summary.image('eval/input_image',
                                  denormalize(input),
                                  step=gs)
                tf2.summary.image('eval/reconstruction_image',
                                  denormalize(reconstruction),
                                  step=gs)

            with tf.control_dependencies(tf.summary.all_v2_summary_ops()):
                dummy_op = tf.no_op()

            return {"_loss": loss_op, "zzz_dummy": (tf.constant(0), dummy_op)}

    # To log the loss, current learning rate, and epoch for Tensorboard, the
    # summary op needs to be run on the host CPU via host_call. host_call
    # expects [batch_size, ...] Tensors, thus reshape to introduce a batch
    # dimension. These Tensors are implicitly concatenated to
    # [params['batch_size']].
    gs_t = tf.reshape(global_step, [1])
    loss_t = tf.reshape(loss, [1])

    host_call = (host_call_fn, [gs_t, loss_t, features, reconstruction])
    metric = (metric_fn, [gs_t, loss_t, features, reconstruction])

    return tpu_estimator.TPUEstimatorSpec(
        mode,
        loss=loss,
        host_call=host_call if mode == tf.estimator.ModeKeys.TRAIN else None,
        train_op=train_op,
        eval_metrics=metric)
コード例 #23
0
def _get_classification_outputs(
    config,
    is_training,
    output_layer,
    output_layer_aggregation,
    label_ids,
    input_mask,
    table_mask,
    aggregation_function_id,
    answer,
    numeric_values,
    numeric_values_scale,
    row_ids,
    column_ids,
    classification_class_index,
):
    """Creates a classification model.

  Args:
    config: Configuration for Tapas model.
    is_training: Whether the model is training.
    output_layer: <float32>[batch_size, seq_length, hidden_size]
    output_layer_aggregation: <float32>[batch_size, hidden_size]
    label_ids: <int32>[batch_size, seq_length]
    input_mask: <int32>[batch_size, seq_length]
    table_mask: <int32>[batch_size, seq_length]
    aggregation_function_id: <int32>[batch_size]
    answer: <float32>[batch_size]
    numeric_values: <float32>[batch_size, seq_length]
    numeric_values_scale: <float32>[batch_size, seq_length]
    row_ids: <int32>[batch_size, seq_length]
    column_ids: <int32>[batch_size, seq_length]
    classification_class_index: <int32>[batch]

  Returns:
    Outputs
  """
    if is_training:
        # I.e., 0.1 dropout
        output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    # Construct indices for the table.
    row_index = segmented_tensor.IndexMap(indices=tf.minimum(
        row_ids, config.max_num_rows - 1),
                                          num_segments=config.max_num_rows,
                                          batch_dims=1)
    col_index = segmented_tensor.IndexMap(indices=tf.minimum(
        column_ids, config.max_num_columns - 1),
                                          num_segments=config.max_num_columns,
                                          batch_dims=1)
    cell_index = segmented_tensor.ProductIndexMap(row_index, col_index)

    # Masks.
    # <float32>[batch_size, seq_length]
    input_mask_float = tf.cast(input_mask, tf.float32)
    table_mask_float = tf.cast(table_mask, tf.float32)
    # Mask for cells that exist in the table (i.e. that are not padding).
    cell_mask, _ = segmented_tensor.reduce_mean(input_mask_float, cell_index)

    # Compute logits per token. These are used to select individual cells.
    logits = compute_token_logits(
        output_layer=output_layer,
        temperature=config.temperature,
        init_cell_selection_weights_to_zero=\
          config.init_cell_selection_weights_to_zero)

    # Compute logits per column. These are used to select a column.
    if config.select_one_column:
        column_logits = utils.compute_column_logits(
            output_layer=output_layer,
            cell_index=cell_index,
            cell_mask=cell_mask,
            init_cell_selection_weights_to_zero=\
              config.init_cell_selection_weights_to_zero,
            allow_empty_column_selection=config.allow_empty_column_selection)

    # TODO(pawelnow): Extract this into a function.
    # Compute aggregation function logits.
    do_model_aggregation = config.num_aggregation_labels > 0
    if do_model_aggregation:
        hidden_size_agg = output_layer_aggregation.shape[-1].value
        output_weights_agg = tf.get_variable(
            "output_weights_agg",
            shape=[config.num_aggregation_labels, hidden_size_agg],
            initializer=_classification_initializer())
        output_bias_agg = tf.get_variable(
            "output_bias_agg",
            shape=[config.num_aggregation_labels],
            initializer=tf.zeros_initializer())

    do_model_classification = config.num_classification_labels > 0
    logits_cls = None
    if do_model_classification:
        logits_cls = compute_classification_logits(
            config.num_classification_labels, output_layer_aggregation)

    with tf.variable_scope("loss"):
        total_loss = 0.0
        is_supervised = not do_model_aggregation or \
            not config.use_answer_as_supervision

        ### Semi-supervised cell selection in case of no aggregation
        #############################################################

        # If the answer (the denotation) appears directly in the table we might
        # select the answer without applying any aggregation function. There are
        # some ambiguous cases, see _calculate_aggregate_mask for more info.
        # `aggregate_mask` is 1 for examples where we chose to aggregate and 0
        #  for examples where we chose to select the answer directly.
        # `label_ids` encodes the positions of the answer appearing in the table.
        if is_supervised:
            aggregate_mask = None
        else:
            # <float32>[batch_size]
            aggregate_mask = _calculate_aggregate_mask(
                answer=answer,
                output_layer_aggregation=output_layer_aggregation,
                output_bias_agg=output_bias_agg,
                output_weights_agg=output_weights_agg,
                cell_select_pref=config.cell_select_pref,
                label_ids=label_ids)

        ### Cell selection log-likelihood
        ###################################

        if config.average_logits_per_cell:
            logits_per_cell, _ = segmented_tensor.reduce_mean(
                logits, cell_index)
            logits = segmented_tensor.gather(logits_per_cell, cell_index)
        dist_per_token = tfp.distributions.Bernoulli(logits=logits)

        selection_loss_per_example = None
        if not config.select_one_column:
            weight = tf.where(
                label_ids == 0, tf.ones_like(label_ids, dtype=tf.float32),
                config.positive_weight *\
                tf.ones_like(label_ids, dtype=tf.float32))
            selection_loss_per_token = -dist_per_token.log_prob(
                label_ids) * weight
            selection_loss_per_example = (
                tf.reduce_sum(selection_loss_per_token * input_mask_float,
                              axis=1) /
                (tf.reduce_sum(input_mask_float, axis=1) +
                 _EPSILON_ZERO_DIVISION))
        else:
            selection_loss_per_example, logits = _single_column_cell_selection_loss(
                token_logits=logits,
                column_logits=column_logits,
                label_ids=label_ids,
                cell_index=cell_index,
                col_index=col_index,
                cell_mask=cell_mask)
            dist_per_token = tfp.distributions.Bernoulli(logits=logits)

        ### Logits for the aggregation function
        #########################################

        logits_aggregation = None
        if do_model_aggregation:
            logits_aggregation = _calculate_aggregation_logits(
                output_layer_aggregation, output_weights_agg, output_bias_agg)

        ### Classification loss
        ###############################
        if do_model_classification:
            one_hot_labels = tf.one_hot(classification_class_index,
                                        depth=config.num_classification_labels,
                                        dtype=tf.float32)
            log_probs = tf.nn.log_softmax(logits_cls, axis=-1)
            # <float32>[batch_size]
            per_example_classification_intermediate = -tf.reduce_sum(
                one_hot_labels * log_probs, axis=-1)

            cls_loss = tf.reduce_mean(per_example_classification_intermediate)
            total_loss += cls_loss

        ### Supervised cell selection
        ###############################

        span_indexes = None
        span_logits = None
        if config.span_prediction != SpanPredictionMode.NONE:
            (
                span_indexes,
                span_logits,
                span_loss,
            ) = span_prediction_utils.get_span_logits_by_mode(
                config.span_prediction,
                output_layer,
                label_ids,
                column_ids,
                row_ids,
                max_span_length=10,
            )
            total_loss += span_loss
        elif config.disable_per_token_loss:
            pass
        elif is_supervised:
            total_loss += tf.reduce_mean(selection_loss_per_example)
        else:
            # For the not supervissed case, do not assign loss for cell selection
            total_loss += tf.reduce_mean(selection_loss_per_example *
                                         (1.0 - aggregate_mask))

        ### Semi-supervised regression loss and supervised loss for aggregations
        #########################################################################

        if do_model_aggregation:
            # Note that `aggregate_mask` is None if the setting is supervised.
            per_example_additional_loss = _calculate_aggregation_loss(
                logits_aggregation, aggregate_mask, aggregation_function_id,
                config)

            if config.use_answer_as_supervision:
                # Add regression loss for numeric answers which require aggregation.
                answer_loss, large_answer_loss_mask = _calculate_regression_loss(
                    answer, aggregate_mask, dist_per_token, numeric_values,
                    numeric_values_scale, table_mask_float, logits_aggregation,
                    config)
                per_example_additional_loss += answer_loss
                # Zero loss for examples with answer_loss > cutoff.
                per_example_additional_loss *= large_answer_loss_mask

            total_loss += tf.reduce_mean(per_example_additional_loss)

        return Outputs(
            total_loss=total_loss,
            logits=logits,
            probs=_get_probs(dist_per_token) * input_mask_float,
            logits_aggregation=logits_aggregation,
            logits_cls=logits_cls,
            span_indexes=span_indexes,
            span_logits=span_logits,
        )
コード例 #24
0
def _generate_detections_per_image(boxes,
                                   scores,
                                   attributes,
                                   max_total_size=100,
                                   nms_iou_threshold=0.3,
                                   score_threshold=0.05,
                                   pre_nms_num_boxes=5000):
    """Generate the final detections per image given the model outputs.
  Args:
    boxes: a tensor with shape [N, num_classes, 4] or [N, 1, 4], which box
      predictions on all feature levels. The N is the number of total anchors on
      all levels.
    scores: a tensor with shape [N, num_classes], which stacks class probability
      on all feature levels. The N is the number of total anchors on all levels.
      The num_classes is the number of classes predicted by the model. Note that
      the class_outputs here is the raw score.
    attributes: a tensor with shape [N, num_attributes], which stacks attribute
      probability on all feature levels.
    max_total_size: a scalar representing maximum number of boxes retained over
      all classes.
    nms_iou_threshold: a float representing the threshold for deciding whether
      boxes overlap too much with respect to IOU.
    score_threshold: a float representing the threshold for deciding when to
      remove boxes based on score.
    pre_nms_num_boxes: an int number of top candidate detections per class
      before NMS.
  Returns:
    nmsed_boxes: `float` Tensor of shape [max_total_size, 4] representing top
      detected boxes in [y1, x1, y2, x2].
    nmsed_scores: `float` Tensor of shape [max_total_size] representing sorted
      confidence scores for detected boxes. The values are between [0, 1].
    nmsed_classes: `int` Tensor of shape [max_total_size] representing classes
      for detected boxes.
    nmsed_attributes: `int` Tensor of shape [max_total_size, num_attributes]
      representing attributes for detected boxes.
    valid_detections: `int` Tensor of shape [1] only the top `valid_detections`
      boxes are valid detections.
  """
    nmsed_boxes = []
    nmsed_scores = []
    nmsed_classes = []
    nmsed_attributes = []
    num_classes_for_box = boxes.get_shape().as_list()[1]
    num_classes = scores.get_shape().as_list()[1]
    for i in range(num_classes):
        boxes_i = boxes[:, min(num_classes_for_box - 1, i)]
        scores_i = scores[:, i]

        # Obtains pre_nms_num_boxes before running NMS.
        scores_i, indices = tf.nn.top_k(scores_i,
                                        k=tf.minimum(
                                            tf.shape(scores_i)[-1],
                                            pre_nms_num_boxes))
        boxes_i = tf.gather(boxes_i, indices)
        attributes_i = tf.gather(attributes, indices)

        (nmsed_indices_i,
         nmsed_num_valid_i) = tf.image.non_max_suppression_padded(
             tf.cast(boxes_i, tf.float32),
             tf.cast(scores_i, tf.float32),
             max_total_size,
             iou_threshold=nms_iou_threshold,
             score_threshold=score_threshold,
             pad_to_max_output_size=True,
             name='nms_detections_' + str(i))
        nmsed_boxes_i = tf.gather(boxes_i, nmsed_indices_i)
        nmsed_scores_i = tf.gather(scores_i, nmsed_indices_i)
        nmsed_attributes_i = tf.gather(attributes_i, nmsed_indices_i)
        # Sets scores of invalid boxes to -1.
        nmsed_scores_i = tf.where(
            tf.less(tf.range(max_total_size), [nmsed_num_valid_i]),
            nmsed_scores_i, -tf.ones_like(nmsed_scores_i))
        nmsed_classes_i = tf.fill([max_total_size], i)
        nmsed_boxes.append(nmsed_boxes_i)
        nmsed_scores.append(nmsed_scores_i)
        nmsed_classes.append(nmsed_classes_i)
        nmsed_attributes.append(nmsed_attributes_i)

    # Concats results from all classes and sort them.
    nmsed_boxes = tf.concat(nmsed_boxes, axis=0)
    nmsed_scores = tf.concat(nmsed_scores, axis=0)
    nmsed_classes = tf.concat(nmsed_classes, axis=0)
    nmsed_attributes = tf.concat(nmsed_attributes, axis=0)
    nmsed_scores, indices = tf.nn.top_k(nmsed_scores,
                                        k=max_total_size,
                                        sorted=True)
    nmsed_boxes = tf.gather(nmsed_boxes, indices)
    nmsed_classes = tf.gather(nmsed_classes, indices)
    nmsed_attributes = tf.gather(nmsed_attributes, indices)
    valid_detections = tf.reduce_sum(
        tf.cast(tf.greater(nmsed_scores, -1), tf.int32))
    return (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_attributes,
            valid_detections)
コード例 #25
0
def dot_product_area_attention(q,
                               k,
                               v,
                               bias,
                               dropout_rate=0.0,
                               image_shapes=None,
                               name=None,
                               attention_image_summary=None,
                               save_weights_to=None,
                               dropout_broadcast_dims=None,
                               max_area_width=1,
                               max_area_height=1,
                               memory_height=1,
                               area_key_mode="mean",
                               area_value_mode="sum",
                               top_k_areas=0,
                               area_temperature=1.0,
                               training=True):
    """Dot-product area attention.

  Args:
    q: Tensor with shape [..., length_q, depth_k].
    k: Tensor with shape [..., length_kv, depth_k]. Leading dimensions must
      match with q.
    v: Tensor with shape [..., length_kv, depth_v] Leading dimensions must
      match with q.
    bias: bias Tensor (see attention_bias())
    dropout_rate: a float.
    image_shapes: optional tuple of integer scalars.
      see comments for attention_image_summary()
    name: an optional string
    attention_image_summary: the callback for making image summary of attention.
    save_weights_to: an optional dictionary to capture attention weights
      for visualization; the weights tensor will be appended there under
      a string key created from the variable scope (including name).
    dropout_broadcast_dims: an optional list of integers less than rank of q.
      Specifies in which dimensions to broadcast the dropout decisions.
    max_area_width: the max width allowed for an area.
    max_area_height: the max height allowed for an area.
    memory_height: the height of the memory.
    area_key_mode: the mode for computing area keys, which can be "mean",
      "concat", "sum", "sample_concat", and "sample_sum".
    area_value_mode: the mode for computing area values, which can be either
      "mean", or "sum".
    top_k_areas: Use the top key areas for attention.
    area_temperature: the temperature for attention softmax.
    training: indicating if it is in the training mode.
  Returns:
    Tensor with shape [..., length_q, depth_v].
  """

    tf.logging.info(
        "dot_product_area_attention: "
        "area_h=%d, area_w=%d, mem_h=%d, "
        "area_key_mode=%s, area_value_mode=%s, "
        "area_temperature=%f", max_area_height, max_area_width, memory_height,
        area_key_mode, area_value_mode, area_temperature)
    with tf.variable_scope(name,
                           default_name="dot_product_area_attention",
                           values=[q, k, v]) as scope:
        mem_shape = common_layers.shape_list(k)
        batch_size = mem_shape[0]
        head_size = mem_shape[1]
        length = mem_shape[2]
        depth = mem_shape[3]
        k_area = compute_area_key(tf.reshape(k, [-1, length, depth]),
                                  max_area_width=max_area_width,
                                  max_area_height=max_area_height,
                                  height=memory_height,
                                  mode=area_key_mode,
                                  training=training)
        if area_value_mode == "mean":
            v_area, _, _, _, _ = compute_area_features(
                tf.reshape(v, [-1, length, depth]),
                max_area_width=max_area_width,
                max_area_height=max_area_height,
                height=memory_height)
        elif area_value_mode == "max":
            v_area, _, _ = basic_pool(tf.reshape(v, [-1, length, depth]),
                                      max_area_width=max_area_width,
                                      max_area_height=max_area_height,
                                      height=memory_height,
                                      fn=tf.reduce_max)
        elif area_value_mode == "sum":
            _, _, v_area, _, _ = compute_area_features(
                tf.reshape(v, [-1, length, depth]),
                max_area_width=max_area_width,
                max_area_height=max_area_height,
                height=memory_height)
        else:
            raise ValueError("Unsupported area value mode=%s" %
                             area_value_mode)
        k = tf.reshape(k_area, [batch_size, head_size, -1, depth])
        v = tf.reshape(v_area, [batch_size, head_size, -1, depth])
        logits = tf.matmul(q, k,
                           transpose_b=True)  # [..., length_q, length_kv]
        if bias is not None:
            bias = common_layers.cast_like(bias, logits)
            with tf.name_scope("compute_area_att_bias", values=[bias]):
                bias_shape = common_layers.shape_list(bias)
                mem_length = bias_shape[-1]
                bias_values = tf.reshape(tf.to_float(tf.less(bias, -1)),
                                         [-1, mem_length, 1])
                _, _, padding_sum, _, _ = compute_area_features(
                    bias_values,
                    max_area_width=max_area_width,
                    max_area_height=max_area_height,
                    height=memory_height)
                bias = tf.where(tf.cast(tf.to_int32(padding_sum), tf.bool),
                                tf.fill(tf.shape(padding_sum), -np.inf),
                                tf.zeros_like(padding_sum, dtype=tf.float32))
                bias = tf.reshape(
                    bias, [bias_shape[0], bias_shape[1], bias_shape[2], -1])
            logits += bias
        logits = logits / area_temperature
        weights = tf.nn.softmax(logits, name="attention_weights")
        if top_k_areas > 0:
            tf.logging.info("area_attention top_k_areas=%d", top_k_areas)
            top_k = tf.minimum(
                common_layers.shape_list(weights)[-1], top_k_areas)
            top_weights, _ = tf.nn.top_k(weights, k=top_k)
            min_values = tf.reduce_min(top_weights, -1, keepdims=True)
            weights = tf.where(tf.greater_equal(weights, min_values), weights,
                               tf.zeros_like(weights))
            weights = tf.div(weights, tf.reduce_sum(weights, -1,
                                                    keepdims=True))
        if save_weights_to is not None:
            save_weights_to[scope.name] = weights
            save_weights_to[scope.name + "/logits"] = logits
        # Drop out attention links for each head.
        weights = common_layers.dropout_with_broadcast_dims(
            weights, 1.0 - dropout_rate, broadcast_dims=dropout_broadcast_dims)
        if common_layers.should_generate_summaries(
        ) and attention_image_summary:
            attention_image_summary(weights, image_shapes)
        return tf.matmul(weights, v)
コード例 #26
0
    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        beta_1_t = self._get_hyper('beta_1', var_dtype)
        beta_2_t = self._get_hyper('beta_2', var_dtype)
        epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        beta_1_power = math_ops.pow(beta_1_t, local_step)
        beta_2_power = math_ops.pow(beta_2_t, local_step)

        if self._initial_total_steps > 0:
            total_steps = self._get_hyper('total_steps', var_dtype)
            warmup_steps = total_steps * self._get_hyper(
                'warmup_proportion', var_dtype)
            decay_steps = total_steps - warmup_steps
            lr_t = tf.where(
                local_step <= warmup_steps,
                lr_t * (local_step / warmup_steps),
                lr_t *
                (1.0 - tf.minimum(local_step, decay_steps) / decay_steps),
            )

        sma_inf = 2.0 / (1.0 - beta_2_t) - 1.0
        sma_t = sma_inf - 2.0 * local_step * beta_2_power / (1.0 -
                                                             beta_2_power)

        m_t = state_ops.assign(m,
                               beta_1_t * m + (1.0 - beta_1_t) * grad,
                               use_locking=self._use_locking)
        m_corr_t = m_t / (1.0 - beta_1_power)

        v_t = state_ops.assign(v,
                               beta_2_t * v +
                               (1.0 - beta_2_t) * math_ops.square(grad),
                               use_locking=self._use_locking)
        if self.amsgrad:
            vhat = self.get_slot(var, 'vhat')
            vhat_t = state_ops.assign(vhat,
                                      math_ops.maximum(vhat, v_t),
                                      use_locking=self._use_locking)
            v_corr_t = math_ops.sqrt(vhat_t / (1.0 - beta_2_power) + epsilon_t)
        else:
            v_corr_t = math_ops.sqrt(v_t / (1.0 - beta_2_power) + epsilon_t)

        r_t = math_ops.sqrt((sma_t - 4.0) / (sma_inf - 4.0) * (sma_t - 2.0) /
                            (sma_inf - 2.0) * sma_inf / sma_t)

        var_t = tf.where(sma_t > 5.0, r_t * m_corr_t / v_corr_t, m_corr_t)

        if self._initial_weight_decay > 0.0:
            var_t += self._get_hyper('weight_decay', var_dtype) * var

        var_update = state_ops.assign_sub(var,
                                          lr_t * var_t,
                                          use_locking=self._use_locking)

        updates = [var_update, m_t, v_t]
        if self.amsgrad:
            updates.append(vhat_t)
        return control_flow_ops.group(*updates)
コード例 #27
0
    def _finish(self, state):

        update_ops = []

        grads_at_prev_iterate = self._recompute_gradients(state)

        for var, grad, grad_at_prev_iterate in zip(self.vars, self.grads,
                                                   grads_at_prev_iterate):
            sum_grad_squared = state.get_slot(var, SUM_GRAD_SQUARED)
            previous_iterate = state.get_slot(var, PREVIOUS_ITERATE)
            maximum_gradient = state.get_slot(var, MAXIMUM_GRADIENT)
            sum_estimates_squared = state.get_slot(var, SUM_ESTIMATES_SQUARED)

            maximum_gradient_updated = tf.assign(
                maximum_gradient, tf.maximum(maximum_gradient, tf.norm(grad)))
            update_ops.append(maximum_gradient_updated)

            sum_grad_squared_updated = tf.assign_add(sum_grad_squared,
                                                     tf.pow(tf.abs(grad), 2.0))
            update_ops.append(sum_grad_squared_updated)

            smoothness = tf.norm(grad - grad_at_prev_iterate) / (
                0.0001 + tf.norm(var - previous_iterate))
            eta = self.lr * tf.pow(self.eta + sum_grad_squared_updated,
                                   -1.0 / 3.0)

            beta = tf.minimum(1.0, self.momentum * tf.square(eta))

            grad_estimate = state.get_slot(var, GRAD_ESTIMATE)

            new_grad_estimate = grad + (1.0 - beta) * (grad_estimate -
                                                       grad_at_prev_iterate)
            new_grad_estimate = tf.clip_by_value(new_grad_estimate,
                                                 -maximum_gradient_updated,
                                                 maximum_gradient_updated)

            if self.output_summaries:
                tf.summary.scalar(self._name + "/smoothness/" + var.name,
                                  smoothness)
                tf.summary.scalar(self._name + "/max_grad/" + var.name,
                                  maximum_gradient_updated)
                tf.summary.scalar(self._name + "/average_beta/" + var.name,
                                  tf.reduce_mean(beta))
                tf.summary.scalar(self._name + "/iterate_diff/" + var.name,
                                  tf.norm(var - previous_iterate))
                tf.summary.scalar(self._name + "/grad_diff/" + var.name,
                                  tf.norm(grad - grad_at_prev_iterate))
                tf.summary.scalar(
                    self._name + "/vr_grad_estimate_norm/" + var.name,
                    tf.norm(new_grad_estimate))
                tf.summary.scalar(self._name + "/grad_norm/" + var.name,
                                  tf.norm(grad))

            grad_estimate_updated = tf.assign(grad_estimate, new_grad_estimate)
            update_ops.append(grad_estimate_updated)

            sum_estimates_squared_updated = tf.assign_add(
                sum_estimates_squared, tf.square(new_grad_estimate))
            update_ops.append(sum_estimates_squared_updated)

            with tf.control_dependencies([grad_at_prev_iterate]):
                previous_iterate_updated = tf.assign(previous_iterate, var)
                update_ops.append(previous_iterate_updated)

            step = -eta * grad_estimate_updated

            with tf.control_dependencies([previous_iterate_updated]):
                var_updated = tf.assign_add(var, step)
                update_ops.append(var_updated)

        return tf.group(*update_ops)
コード例 #28
0
def main(argv):
    del argv  # unused
    if tf.io.gfile.exists(FLAGS.model_dir):
        tf.compat.v1.logging.warning(
            "Warning: deleting old log directory at {}".format(
                FLAGS.model_dir))
        tf.io.gfile.rmtree(FLAGS.model_dir)
    tf.io.gfile.makedirs(FLAGS.model_dir)

    if FLAGS.fake_data:
        (x_train, y_train), (x_test, y_test) = build_fake_data()
    else:
        (x_train, y_train), (x_test,
                             y_test) = tf.keras.datasets.cifar10.load_data()

    (images, labels, handle, training_iterator,
     heldout_iterator) = build_input_pipeline(x_train, x_test, y_train, y_test,
                                              FLAGS.batch_size, 500)

    if FLAGS.architecture == "resnet":
        model_fn = bayesian_resnet
    else:
        model_fn = bayesian_vgg

    model = model_fn(
        IMAGE_SHAPE,
        num_classes=10,
        kernel_posterior_scale_mean=FLAGS.kernel_posterior_scale_mean,
        kernel_posterior_scale_constraint=FLAGS.
        kernel_posterior_scale_constraint)
    logits = model(images)
    labels_distribution = tfd.Categorical(logits=logits)

    # Perform KL annealing. The optimal number of annealing steps
    # depends on the dataset and architecture.
    t = tf.compat.v2.Variable(0.0)
    kl_regularizer = t / (FLAGS.kl_annealing * len(x_train) / FLAGS.batch_size)

    # Compute the -ELBO as the loss. The kl term is annealed from 0 to 1 over
    # the epochs specified by the kl_annealing flag.
    log_likelihood = labels_distribution.log_prob(labels)
    neg_log_likelihood = -tf.reduce_mean(input_tensor=log_likelihood)
    kl = sum(model.losses) / len(x_train) * tf.minimum(1.0, kl_regularizer)
    loss = neg_log_likelihood + kl

    # Build metrics for evaluation. Predictions are formed from a single forward
    # pass of the probabilistic layers. They are cheap but noisy
    # predictions.
    predictions = tf.argmax(input=logits, axis=1)
    with tf.compat.v1.name_scope("train"):
        train_accuracy, train_accuracy_update_op = tf.compat.v1.metrics.accuracy(
            labels=labels, predictions=predictions)
        opt = tf.compat.v1.train.AdamOptimizer(FLAGS.learning_rate)
        train_op = opt.minimize(loss)
        update_step_op = tf.compat.v1.assign(t, t + 1)

    with tf.compat.v1.name_scope("valid"):
        valid_accuracy, valid_accuracy_update_op = tf.compat.v1.metrics.accuracy(
            labels=labels, predictions=predictions)

    init_op = tf.group(tf.compat.v1.global_variables_initializer(),
                       tf.compat.v1.local_variables_initializer())

    stream_vars_valid = [
        v for v in tf.compat.v1.local_variables() if "valid/" in v.name
    ]
    reset_valid_op = tf.compat.v1.variables_initializer(stream_vars_valid)

    with tf.compat.v1.Session() as sess:
        sess.run(init_op)

        # Run the training loop
        train_handle = sess.run(training_iterator.string_handle())
        heldout_handle = sess.run(heldout_iterator.string_handle())
        training_steps = int(
            round(FLAGS.epochs * (len(x_train) / FLAGS.batch_size)))
        for step in range(training_steps):
            _ = sess.run([train_op, train_accuracy_update_op, update_step_op],
                         feed_dict={handle: train_handle})

            # Manually print the frequency
            if step % 100 == 0:
                loss_value, accuracy_value, kl_value = sess.run(
                    [loss, train_accuracy, kl],
                    feed_dict={handle: train_handle})
                print("Step: {:>3d} Loss: {:.3f} Accuracy: {:.3f} KL: {:.3f}".
                      format(step, loss_value, accuracy_value, kl_value))

            if (step + 1) % FLAGS.eval_freq == 0:
                # Compute log prob of heldout set by averaging draws from the model:
                # p(heldout | train) = int_model p(heldout|model) p(model|train)
                #                   ~= 1/n * sum_{i=1}^n p(heldout | model_i)
                # where model_i is a draw from the posterior
                # p(model|train).
                probs = np.asarray([
                    sess.run((labels_distribution.probs),
                             feed_dict={handle: heldout_handle})
                    for _ in range(FLAGS.num_monte_carlo)
                ])
                mean_probs = np.mean(probs, axis=0)

                _, label_vals = sess.run((images, labels),
                                         feed_dict={handle: heldout_handle})
                heldout_lp = np.mean(
                    np.log(mean_probs[np.arange(mean_probs.shape[0]),
                                      label_vals.flatten()]))
                print(" ... Held-out nats: {:.3f}".format(heldout_lp))

                # Calculate validation accuracy
                for _ in range(20):
                    sess.run(valid_accuracy_update_op,
                             feed_dict={handle: heldout_handle})
                valid_value = sess.run(valid_accuracy,
                                       feed_dict={handle: heldout_handle})

                print(" ... Validation Accuracy: {:.3f}".format(valid_value))

                sess.run(reset_valid_op)
コード例 #29
0
def get_warmed_up_lr(max_lr, warmup, global_step):
    if warmup == 0:
        return max_lr
    return max_lr * tf.minimum(
        tf.cast(global_step, tf.float32) / float(warmup), 1.0)
コード例 #30
0
def ae_transformer_internal(inputs, targets, target_space, hparams, cache=None):
  """Main step used for training."""
  # Encoder.
  inputs = common_layers.flatten4d3d(inputs)
  inputs, ed = encode(inputs, target_space, hparams, "input_enc")

  # Autoencoding.
  losses = {"extra": tf.constant(0.0), "latent_pred": tf.constant(0.0)}

  max_targets_len_from_inputs = tf.concat([inputs, inputs], axis=1)
  targets, _ = common_layers.pad_to_same_length(
      targets,
      max_targets_len_from_inputs,
      final_length_divisible_by=2**hparams.num_compress_steps)
  targets_c = compress(targets, hparams, "compress")
  if hparams.mode != tf.estimator.ModeKeys.PREDICT:
    # Compress and bottleneck.
    latents_discrete_hot, extra_loss = vq_discrete_bottleneck(
        x=targets_c, hparams=hparams)
    latents_dense = vq_discrete_unbottleneck(
        latents_discrete_hot, hparams=hparams)
    latents_dense = targets_c + tf.stop_gradient(latents_dense - targets_c)
    latents_discrete = tf.argmax(latents_discrete_hot, axis=-1)
    tf.summary.histogram("codes", tf.reshape(latents_discrete[:, 0, :], [-1]))
    losses["extra"] = extra_loss

    # Extra loss predicting latent code from input.
    latents_pred = decode_transformer(inputs, ed, latents_dense, hparams,
                                      "extra")
    latent_pred_loss = get_latent_pred_loss(latents_pred, latents_discrete_hot,
                                            hparams)
    losses["latent_pred"] = tf.reduce_mean(latent_pred_loss)
  else:
    latent_len = common_layers.shape_list(targets_c)[1]
    embed = functools.partial(vq_discrete_unbottleneck, hparams=hparams)
    latents_dense = tf.zeros_like(targets_c[:, :latent_len, :, :])
    if cache is None:
      cache = ae_latent_sample_beam(latents_dense, inputs, ed, embed,
                                    hparams)
    cache_hot = tf.one_hot(cache, depth=2**hparams.bottleneck_bits)
    latents_dense = embed(cache_hot)

  # Postprocess.
  d = latents_dense
  pos = tf.get_variable("pos", [1, 1000, 1, hparams.hidden_size])
  pos = pos[:, :common_layers.shape_list(latents_dense)[1] + 1, :, :]
  latents_dense = tf.pad(latents_dense, [[0, 0], [1, 0], [0, 0], [0, 0]]) + pos

  # Decompressing the dense latents
  for i in range(hparams.num_compress_steps):
    j = hparams.num_compress_steps - i - 1
    d = residual_conv(d, 1, (3, 1), hparams, "decompress_rc_%d" % j)
    d = decompress_step(d, hparams, i > 0, "decompress_%d" % j)

  masking = common_layers.inverse_lin_decay(hparams.mask_startup_steps)
  masking *= common_layers.inverse_exp_decay(
      hparams.mask_startup_steps // 4)  # Not much at start.
  masking = tf.minimum(tf.maximum(masking, 0.0), 1.0)
  if hparams.mode == tf.estimator.ModeKeys.PREDICT:
    masking = 1.0
  mask = tf.less(masking,
                 tf.random_uniform(common_layers.shape_list(targets)[:-1]))
  mask = tf.expand_dims(tf.to_float(mask), 3)

  # targets is always [batch, length, 1, depth]
  targets = mask * targets + (1.0 - mask) * d

  res = decode_transformer(inputs, ed, targets, hparams, "decoder")
  latent_time = tf.less(hparams.mask_startup_steps,
                        tf.to_int32(tf.train.get_global_step()))
  losses["latent_pred"] *= tf.to_float(latent_time)
  return res, losses, cache