Beispiel #1
0
def boolean_mask(boxlist, indicator, fields=None, scope=None,
                 use_static_shapes=False, indicator_sum=None):
  """Select boxes from BoxList according to indicator and return new BoxList.

  `boolean_mask` returns the subset of boxes that are marked as "True" by the
  indicator tensor. By default, `boolean_mask` returns boxes corresponding to
  the input index list, as well as all additional fields stored in the boxlist
  (indexing into the first dimension).  However one can optionally only draw
  from a subset of fields.

  Args:
    boxlist: BoxList holding N boxes
    indicator: a rank-1 boolean tensor
    fields: (optional) list of fields to also gather from.  If None (default),
      all fields are gathered from.  Pass an empty fields list to only gather
      the box coordinates.
    scope: name scope.
    use_static_shapes: Whether to use an implementation with static shape
      gurantees.
    indicator_sum: An integer containing the sum of `indicator` vector. Only
      required if `use_static_shape` is True.

  Returns:
    subboxlist: a BoxList corresponding to the subset of the input BoxList
      specified by indicator
  Raises:
    ValueError: if `indicator` is not a rank-1 boolean tensor.
  """
  with tf.name_scope(scope, 'BooleanMask'):
    if indicator.shape.ndims != 1:
      raise ValueError('indicator should have rank 1')
    if indicator.dtype != tf.bool:
      raise ValueError('indicator should be a boolean tensor')
    if use_static_shapes:
      if not (indicator_sum and isinstance(indicator_sum, int)):
        raise ValueError('`indicator_sum` must be a of type int')
      selected_positions = tf.to_float(indicator)
      indexed_positions = tf.cast(
          tf.multiply(
              tf.cumsum(selected_positions), selected_positions),
          dtype=tf.int32)
      one_hot_selector = tf.one_hot(
          indexed_positions - 1, indicator_sum, dtype=tf.float32)
      sampled_indices = tf.cast(
          tf.tensordot(
              tf.to_float(tf.range(tf.shape(indicator)[0])),
              one_hot_selector,
              axes=[0, 0]),
          dtype=tf.int32)
      return gather(boxlist, sampled_indices, use_static_shapes=True)
    else:
      subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
      if fields is None:
        fields = boxlist.get_extra_fields()
      for field in fields:
        if not boxlist.has_field(field):
          raise ValueError('boxlist must contain all specified fields')
        subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
        subboxlist.add_field(field, subfieldlist)
      return subboxlist
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = .6):
    """Filters YOLO boxes by thresholding on object and class confidence.
    
    Arguments:
    box_confidence -- tensor of shape (19, 19, 5, 1)
    boxes -- tensor of shape (19, 19, 5, 4)
    box_class_probs -- tensor of shape (19, 19, 5, 80)
    threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box
    
    Returns:
    scores -- tensor of shape (None,), containing the class probability score for selected boxes
    boxes -- tensor of shape (None, 4), containing (b_x, b_y, b_h, b_w) coordinates of selected boxes
    classes -- tensor of shape (None,), containing the index of the class detected by the selected boxes
    
    Note: "None" is here because you don't know the exact number of selected boxes, as it depends on the threshold. 
    For example, the actual output size of scores would be (10,) if there are 10 boxes.
    """
    
    # Step 1: Compute box scores
    box_scores = box_confidence * box_class_probs  # [19, 19, 5, 1] * [19, 19, 5, 80] = [19, 19, 5, 80]
    
    # Step 2: Find the box_classes thanks to the max box_scores, keep track of the corresponding score
    box_classes      = K.argmax(box_scores, axis=-1)
    box_class_scores = K.max(box_scores, axis = -1, keepdims = False)
        
    # Step 3: Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the
    # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold)
    filtering_mask = box_class_scores >= threshold
        
    # Step 4: Apply the mask to scores, boxes and classes
    scores  = tf.boolean_mask(box_class_scores, filtering_mask) 
    boxes   = tf.boolean_mask(boxes, filtering_mask) 
    classes = tf.boolean_mask(box_classes, filtering_mask) 
        
    return scores, boxes, classes
    def remap_keys(sparse_tensor):
        # Current indices of our SparseTensor that we need to fix
        bad_indices = sparse_tensor.indices # shape = (current_batch_size * (number_of_items/users[i] + 1), 2)
        # Current values of our SparseTensor that we need to fix
        bad_values = sparse_tensor.values # shape = (current_batch_size * (number_of_items/users[i] + 1),)

        # Since batch is ordered, the last value for a batch index is the user
        # Find where the batch index chages to extract the user rows
        # 1 where user, else 0
        user_mask = tf.concat(values = [bad_indices[1:,0] - bad_indices[:-1,0], tf.constant(value = [1], dtype = tf.int64)], axis = 0) # shape = (current_batch_size * (number_of_items/users[i] + 1), 2)

        # Mask out the user rows from the values
        good_values = tf.boolean_mask(tensor = bad_values, mask = tf.equal(x = user_mask, y = 0)) # shape = (current_batch_size * number_of_items/users[i],)
        item_indices = tf.boolean_mask(tensor = bad_indices, mask = tf.equal(x = user_mask, y = 0)) # shape = (current_batch_size * number_of_items/users[i],)
        user_indices = tf.boolean_mask(tensor = bad_indices, mask = tf.equal(x = user_mask, y = 1))[:, 1] # shape = (current_batch_size,)

        good_user_indices = tf.gather(params = user_indices, indices = item_indices[:,0]) # shape = (current_batch_size * number_of_items/users[i],)

        # User and item indices are rank 1, need to make rank 1 to concat
        good_user_indices_expanded = tf.expand_dims(input = good_user_indices, axis = -1) # shape = (current_batch_size * number_of_items/users[i], 1)
        good_item_indices_expanded = tf.expand_dims(input = item_indices[:, 1], axis = -1) # shape = (current_batch_size * number_of_items/users[i], 1)
        good_indices = tf.concat(values = [good_user_indices_expanded, good_item_indices_expanded], axis = 1) # shape = (current_batch_size * number_of_items/users[i], 2)

        remapped_sparse_tensor = tf.SparseTensor(indices = good_indices, values = good_values, dense_shape = sparse_tensor.dense_shape)
        return remapped_sparse_tensor
Beispiel #4
0
def roc_auc_score(y_pred, y_true):
    """ ROC AUC Score.

    Approximates the Area Under Curve score, using approximation based on
    the Wilcoxon-Mann-Whitney U statistic.

    Yan, L., Dodier, R., Mozer, M. C., & Wolniewicz, R. (2003).
    Optimizing Classifier Performance via an Approximation to the Wilcoxon-Mann-Whitney Statistic.

    Measures overall performance for a full range of threshold levels.

    Arguments:
        y_pred: `Tensor`. Predicted values.
        y_true: `Tensor` . Targets (labels), a probability distribution.

    """
    with tf.name_scope("RocAucScore"):

        pos = tf.boolean_mask(y_pred, tf.cast(y_true, tf.bool))
        neg = tf.boolean_mask(y_pred, ~tf.cast(y_true, tf.bool))

        pos = tf.expand_dims(pos, 0)
        neg = tf.expand_dims(neg, 1)

        # original paper suggests performance is robust to exact parameter choice
        gamma = 0.2
        p     = 3

        difference = tf.zeros_like(pos * neg) + pos - neg - gamma

        masked = tf.boolean_mask(difference, difference < 0.0)

        return tf.reduce_sum(tf.pow(-masked, p))
    def loss(self, logits, labels, regularization):
        """Adds to the inference model the layers required to generate loss."""
        with tf.name_scope('loss'):
            with tf.name_scope('var_loss'):
                labels = tf.cast(labels, tf.float32)
                shape = labels.get_shape()

                same_class = tf.boolean_mask(logits, tf.equal(labels, tf.ones(shape)))
                diff_class = tf.boolean_mask(logits, tf.not_equal(labels, tf.ones(shape)))
                same_mean, same_var = tf.nn.moments(same_class, [0])
                diff_mean, diff_var = tf.nn.moments(diff_class, [0])
                var_loss = same_var + diff_var

            with tf.name_scope('mean_loss'):
                mean_loss = self.lamda * tf.where(tf.greater(self.mu - (same_mean - diff_mean), 0),
                                                  self.mu - (same_mean - diff_mean), 0)

            with tf.name_scope('regularization'):
                regularization *= tf.add_n(self.regularizers)

            loss = var_loss + mean_loss + regularization

            # Summaries for TensorBoard.
            tf.summary.scalar('loss/total', loss)
            with tf.name_scope('averages'):
                averages = tf.train.ExponentialMovingAverage(0.9)
                op_averages = averages.apply([var_loss, mean_loss, regularization, loss])
                tf.summary.scalar('loss/avg/var_loss', averages.average(var_loss))
                tf.summary.scalar('loss/avg/mean_loss', averages.average(mean_loss))
                tf.summary.scalar('loss/avg/regularization', averages.average(regularization))
                tf.summary.scalar('loss/avg/total', averages.average(loss))
                with tf.control_dependencies([op_averages]):
                    loss_average = tf.identity(averages.average(loss), name='control')
            return loss, loss_average
def yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = .6):
    """Filters YOLO boxes by thresholding on object and class confidence.

    Arguments:
    box_confidence -- tensor of shape (19, 19, 5, 1)
    boxes -- tensor of shape (19, 19, 5, 4)
    box_class_probs -- tensor of shape (19, 19, 5, 80)
    threshold -- real value, if [ highest class probability score < threshold], then get rid of the corresponding box

    Returns:
    scores -- tensor of shape (None,), containing the class probability score for selected boxes
    boxes -- tensor of shape (None, 4), containing (b_x, b_y, b_h, b_w) coordinates of selected boxes
    classes -- tensor of shape (None,), containing the index of the class detected by the selected boxes

    """

    # Step 1: Compute box scores
    box_scores = box_confidence*box_class_probs

    # Step 2: Find the box_classes thanks to the max box_scores, keep track of the corresponding score
    box_classes = K.argmax(box_scores, axis=-1)
    box_class_scores = K.max(box_scores, axis=-1)

    # Step 3: Create a filtering mask based on "box_class_scores" by using "threshold". The mask should have the
    # same dimension as box_class_scores, and be True for the boxes you want to keep (with probability >= threshold)
    filtering_mask = box_class_scores >= threshold

    # Step 4: Apply the mask to scores, boxes and classes
    scores = tf.boolean_mask(box_class_scores,filtering_mask)
    boxes = tf.boolean_mask(boxes,filtering_mask)
    classes = tf.boolean_mask(box_classes,filtering_mask)

    return scores, boxes, classes
Beispiel #7
0
    def build_detector(self):
        img_size = self.config['image_size']
        self.image_ph = tf.placeholder(shape=[None, None, 3],
                                       dtype=tf.float32, name='img_ph')
        self.seg_ph = tf.placeholder(shape=[None, None], dtype=tf.int32, name='seg_ph')

        img = tf.image.resize_bilinear(tf.expand_dims(self.image_ph, 0),
                                       (img_size, img_size))
        self.net.create_trunk(img)

        if args.detect:
            self.net.create_multibox_head(self.loader.num_classes)
            confidence = tf.nn.softmax(tf.squeeze(self.net.outputs['confidence']))
            location = tf.squeeze(self.net.outputs['location'])
            self.nms(location, confidence, self.bboxer.tiling)

        if args.segment:
            self.net.create_segmentation_head(self.loader.num_classes)
            self.segmentation = self.net.outputs['segmentation']
            seg_shape = tf.shape(self.image_ph)[:2]
            self.segmentation = tf.image.resize_bilinear(self.segmentation, seg_shape)

            self.segmentation = tf.cast(tf.argmax(tf.squeeze(self.segmentation), axis=-1), tf.int32)
            self.segmentation = tf.reshape(self.segmentation, seg_shape)
            self.segmentation.set_shape([None, None])

            if not self.no_gt:
                easy_mask = self.seg_ph <= self.loader.num_classes
                predictions = tf.boolean_mask(self.segmentation, easy_mask)
                labels = tf.boolean_mask(self.seg_ph, easy_mask)
                self.mean_iou, self.iou_update = mean_iou(predictions, labels, self.loader.num_classes)
            else:
                self.mean_iou = tf.constant(0)
                self.iou_update = tf.constant(0)
Beispiel #8
0
    def nms(self, localization, confidence, tiling):
        good_bboxes = decode_bboxes(localization, tiling)

        not_crap_mask = tf.reduce_max(confidence[:, 1:], axis=-1) >= args.conf_thresh
        good_bboxes = tf.boolean_mask(good_bboxes, not_crap_mask)
        confidence = tf.boolean_mask(confidence, not_crap_mask)

        self.detection_list = []
        self.score_list = []
        for i in range(1, self.loader.num_classes):
            class_mask = tf.greater(confidence[:, i], args.conf_thresh)
            class_scores = tf.boolean_mask(confidence[:, i], class_mask)
            class_bboxes = tf.boolean_mask(good_bboxes, class_mask)

            K = tf.minimum(tf.size(class_scores), args.top_k_nms)
            _, top_k_inds = tf.nn.top_k(class_scores, K)
            top_class_scores = tf.gather(class_scores, top_k_inds)
            top_class_bboxes = tf.gather(class_bboxes, top_k_inds)

            final_inds = tf.image.non_max_suppression(top_class_bboxes,
                                                        top_class_scores,
                                                        max_output_size=args.top_k_after_nms,
                                                        iou_threshold=args.nms_thresh)

            final_class_bboxes = tf.gather(top_class_bboxes, final_inds)
            final_scores = tf.gather(top_class_scores, final_inds)
            self.detection_list.append(final_class_bboxes)
            self.score_list.append(final_scores)
    def __init__(self,
                 prev_actions_logp,
                 actions_logp,
                 action_kl,
                 actions_entropy,
                 values,
                 valid_mask,
                 advantages,
                 value_targets,
                 vf_loss_coeff=0.5,
                 entropy_coeff=-0.01,
                 clip_param=0.3):

        logp_ratio = tf.exp(actions_logp - prev_actions_logp)

        surrogate_loss = tf.minimum(
            advantages * logp_ratio,
            advantages * tf.clip_by_value(logp_ratio, 1 - clip_param,
                                          1 + clip_param))

        self.mean_kl = tf.reduce_mean(action_kl)
        self.pi_loss = -tf.reduce_sum(surrogate_loss)

        # The baseline loss
        delta = tf.boolean_mask(values - value_targets, valid_mask)
        self.value_targets = value_targets
        self.vf_loss = 0.5 * tf.reduce_sum(tf.square(delta))

        # The entropy loss
        self.entropy = tf.reduce_sum(
            tf.boolean_mask(actions_entropy, valid_mask))

        # The summed weighted loss
        self.total_loss = (self.pi_loss + self.vf_loss * vf_loss_coeff +
                           self.entropy * entropy_coeff)
Beispiel #10
0
    def add_loss_op(self, preds):
        """Adds Ops for the loss function to the computational graph.

        TODO: Compute averaged cross entropy loss for the predictions.
        Importantly, you must ignore the loss for any masked tokens.

        Hint: You might find tf.boolean_mask useful to mask the losses on masked tokens.
        Hint: You can use tf.nn.sparse_softmax_cross_entropy_with_logits to simplify your
                    implementation. You might find tf.reduce_mean useful.
        Args:
            pred: A tensor of shape (batch_size, max_length, n_classes) containing the output of the neural
                  network before the softmax layer.
        Returns:
            loss: A 0-d tensor (scalar)
        """
        ### YOUR CODE HERE (~2-4 lines)
        logits=tf.boolean_mask(preds,self.mask_placeholder)
        labels=tf.boolean_mask(self.labels_placeholder,self.mask_placeholder)
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits,
                labels=labels)

        loss = tf.reduce_mean(loss)
        ### END YOUR CODE
        return loss
def lamb_func(logit, logic, lamb):
    logit_pos = tf.boolean_mask(logit, logic)
    logit_neg = tf.boolean_mask(logit, tf.logical_not(logic))
    logit_neg_exp = tf.exp(logit_neg * lamb)
    z = tf.reduce_mean(logit_neg_exp)
    left = tf.truediv(tf.reduce_mean(logit_neg * logit_neg_exp), z)
    right = tf.reduce_mean(logit_pos)
    return left, right
Beispiel #12
0
 def shortlist_insert():
   larger_ids = tf.boolean_mask(tf.to_int64(ids), larger_scores)
   larger_score_values = tf.boolean_mask(scores, larger_scores)
   shortlist_ids, new_ids, new_scores = self.ops.top_n_insert(
       self.sl_ids, self.sl_scores, larger_ids, larger_score_values)
   u1 = tf.scatter_update(self.sl_ids, shortlist_ids, new_ids)
   u2 = tf.scatter_update(self.sl_scores, shortlist_ids, new_scores)
   return tf.group(u1, u2)
Beispiel #13
0
def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
    """
    Args:
        anchor_labels: fHxfWxNA
        anchor_boxes: fHxfWxNAx4, encoded
        label_logits:  fHxfWxNA
        box_logits: fHxfWxNAx4

    Returns:
        label_loss, box_loss
    """
    with tf.device('/cpu:0'):
        valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1))
        pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1))
        nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask, dtype=tf.int32), name='num_valid_anchor')
        nr_pos = tf.count_nonzero(pos_mask, dtype=tf.int32, name='num_pos_anchor')

        valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask)
    valid_label_logits = tf.boolean_mask(label_logits, valid_mask)

    with tf.name_scope('label_metrics'):
        valid_label_prob = tf.nn.sigmoid(valid_label_logits)
        summaries = []
        with tf.device('/cpu:0'):
            for th in [0.5, 0.2, 0.1]:
                valid_prediction = tf.cast(valid_label_prob > th, tf.int32)
                nr_pos_prediction = tf.reduce_sum(valid_prediction, name='num_pos_prediction')
                pos_prediction_corr = tf.count_nonzero(
                    tf.logical_and(
                        valid_label_prob > th,
                        tf.equal(valid_prediction, valid_anchor_labels)),
                    dtype=tf.int32)
                summaries.append(tf.truediv(
                    pos_prediction_corr,
                    nr_pos, name='recall_th{}'.format(th)))
                precision = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos_prediction))
                precision = tf.where(tf.equal(nr_pos_prediction, 0), 0.0, precision, name='precision_th{}'.format(th))
                summaries.append(precision)
        add_moving_summary(*summaries)

    label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
    label_loss = tf.reduce_mean(label_loss, name='label_loss')

    pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask)
    pos_box_logits = tf.boolean_mask(box_logits, pos_mask)
    delta = 1.0 / 9
    box_loss = tf.losses.huber_loss(
        pos_anchor_boxes, pos_box_logits, delta=delta,
        reduction=tf.losses.Reduction.SUM) / delta
    box_loss = tf.div(
        box_loss,
        tf.cast(nr_valid, tf.float32), name='box_loss')

    add_moving_summary(label_loss, box_loss, nr_valid, nr_pos)
    return label_loss, box_loss
  def remap_keys(sparse_tensor):
    # Current indices of our SparseTensor that we need to fix
    bad_indices = sparse_tensor.indices
    # Current values of our SparseTensor that we need to fix
    bad_values = sparse_tensor.values 
  
    # Group by the batch_indices and get the count for each  
    size = tf.segment_sum(data = tf.ones_like(bad_indices[:,0], dtype = tf.int64), segment_ids = bad_indices[:,0]) - 1
    # The number of batch_indices (this should be batch_size unless it is a partially full batch)
    length = tf.shape(size, out_type = tf.int64)[0]
    # Finds the cumulative sum which we can use for indexing later
    cum = tf.cumsum(size)
    # The offsets between each example in the batch due to our concatentation of the keys in the decode_example method
    length_range = tf.range(start = 0, limit = length, delta = 1, dtype = tf.int64)
    # Indices of the SparseTensor's indices member of the rows we added by the concatentation of our keys in the decode_example method
    cum_range = cum + length_range

    # The keys that we have extracted back out of our concatentated SparseTensor
    gathered_indices = tf.squeeze(tf.gather(bad_indices, cum_range)[:,1])

    # The enumerated row indices of the SparseTensor's indices member
    sparse_indices_range = tf.range(tf.shape(bad_indices, out_type = tf.int64)[0], dtype = tf.int64)

    # We want to find here the row indices of the SparseTensor's indices member that are of our actual data and not the concatentated rows
    # So we want to find the intersection of the two sets and then take the opposite of that
    x = sparse_indices_range
    s = cum_range

    # Number of multiples we are going to tile x, which is our sparse_indices_range
    tile_multiples = tf.concat([tf.ones(tf.shape(tf.shape(x)), dtype=tf.int64), tf.shape(s, out_type = tf.int64)], axis = 0)
    # Expands x, our sparse_indices_range, into a rank 2 tensor and then multiplies the rows by 1 (no copying) and the columns by the number of examples in the batch
    x_tile = tf.tile(tf.expand_dims(x, -1), tile_multiples)
    # Essentially a vectorized logical or, that we then negate
    x_not_in_s = ~tf.reduce_any(tf.equal(x_tile, s), -1)

    # The SparseTensor's indices that are our actual data by using the boolean_mask we just made above applied to the entire indices member of our SparseTensor
    selected_indices = tf.boolean_mask(tensor = bad_indices, mask = x_not_in_s, axis = 0)
    # Apply the same boolean_mask to the entire values member of our SparseTensor to get the actual values data
    selected_values = tf.boolean_mask(tensor = bad_values, mask = x_not_in_s, axis = 0)

    # Need to replace the first column of our selected_indices with keys, so we first need to tile our gathered_indices
    tiling = tf.tile(input = tf.expand_dims(gathered_indices[0], -1), multiples = tf.expand_dims(size[0] , -1))
    
    # We have to repeatedly apply the tiling to each example in the batch
    # Since it is jagged we cannot use tf.map_fn due to the stacking of the TensorArray, so we have to create our own custom version
    def loop_body(i, tensor_grow):
      return i + 1, tf.concat(values = [tensor_grow, tf.tile(input = tf.expand_dims(gathered_indices[i], -1), multiples = tf.expand_dims(size[i] , -1))], axis = 0)

    _, result = tf.while_loop(lambda i, tensor_grow: i < length, loop_body, [tf.constant(1, dtype = tf.int64), tiling])
    
    # Concatenate tiled keys with the 2nd column of selected_indices
    selected_indices_fixed = tf.concat([tf.expand_dims(result, -1), tf.expand_dims(selected_indices[:, 1], -1)], axis = 1)
    
    # Combine everything together back into a SparseTensor
    remapped_sparse_tensor = tf.SparseTensor(indices = selected_indices_fixed, values = selected_values, dense_shape = sparse_tensor.dense_shape)
    return remapped_sparse_tensor
Beispiel #15
0
    def _build_detector(self):  # 解析网络的预测结果, 这里采用了判断预测框类别, 再 NMS 的预测策略
        """Interpret the net output and get the predicted boxes"""
        # the width and height of orignal image
        self.width = tf.placeholder(tf.float32, name="img_w")
        self.height = tf.placeholder(tf.float32, name="img_h")
        # get class prob, confidence, boxes from net output
        idx1 = self.S * self.S * self.C
        idx2 = idx1 + self.S * self.S * self.B
        # class prediction; 具体的位置都是自己设置的, 因为输出是一维的, 所以直接切出来合适的大小, 通过反向传播来学习
        class_probs = tf.reshape(self.predicts[0, :idx1], [self.S, self.S, self.C])
        # confidence
        confs = tf.reshape(self.predicts[0, idx1:idx2], [self.S, self.S, self.B])
        # boxes -> (x, y, w, h)
        boxes = tf.reshape(self.predicts[0, idx2:], [self.S, self.S, self.B, 4])
        # 为什么是二维的呢, 输出不应该是一维的吗

        # convert the x, y to the coordinates relative to the top left point of the image
        # the predictions of w, h are the square root
        # multiply the width and height of image;
        # 这里是 decode 过程 (得到 box 的真实位置), 可以如下:
        # 就是把预测值加上 offset, 除以 self.S 将坐标转换为 [0, 1] 范围, 乘以 self.width 是转化为实际位置
        boxes = tf.stack([(boxes[:, :, :, 0] + tf.constant(self.x_offset, dtype=tf.float32)) / self.S * self.width,
                          (boxes[:, :, :, 1] + tf.constant(self.y_offset, dtype=tf.float32)) / self.S * self.height,
                          tf.square(boxes[:, :, :, 2]) * self.width,
                          tf.square(boxes[:, :, :, 3]) * self.height], axis=3)

        # class-specific confidence scores [S, S, B, C]
        scores = tf.expand_dims(confs, -1) * tf.expand_dims(class_probs, 2)  # 7x7x2x1 * 7x7x1x20 = 7x7x2x20; 好神奇

        scores = tf.reshape(scores, [-1, self.C])  # [S*S*B, C]
        boxes = tf.reshape(boxes, [-1, 4])  # [S*S*B, 4]; 这里用这种方式实现了论文里的思路

        # find each box class, only select the max score
        box_classes = tf.argmax(scores, axis=1)  # 求出每个 score 20 个分类中最大值的索引
        box_class_scores = tf.reduce_max(scores, axis=1)  # 找到对应维中的最大值
        # print(sess.run(tf.argmax([[1, 2], [3, 4]], axis=1)))  # [1 1]
        # print(sess.run(tf.reduce_max([[1, 2], [3, 4]], axis=1)))  # [2 4]

        # filter the boxes by the score threshold
        filter_mask = box_class_scores >= self.threshold
        scores = tf.boolean_mask(box_class_scores, filter_mask)
        boxes = tf.boolean_mask(boxes, filter_mask)
        box_classes = tf.boolean_mask(box_classes, filter_mask)

        # non max suppression (do not distinguish different classes)
        # ref: https://tensorflow.google.cn/api_docs/python/tf/image/non_max_suppression
        # box (x, y, w, h) -> box (x1, y1, x2, y2)
        _boxes = tf.stack([boxes[:, 0] - 0.5 * boxes[:, 2], boxes[:, 1] - 0.5 * boxes[:, 3],
                           boxes[:, 0] + 0.5 * boxes[:, 2], boxes[:, 1] + 0.5 * boxes[:, 3]], axis=1)
        nms_indices = tf.image.non_max_suppression(_boxes, scores,
                                                   self.max_output_size, self.iou_threshold)
        self.scores = tf.gather(scores, nms_indices)
        self.boxes = tf.gather(boxes, nms_indices)
        self.box_classes = tf.gather(box_classes, nms_indices)
def get_detailed_assigned_priors_summary(assigned_priors, priors_info, name):
  """
  Get assigned priors 1D tensors by SSD heads and priors type.

  Args:
    assigned_priors: Assigned priors, tensor of shape (num_priors).
    priors_info: Information about priors, list of pairs for every ssd head: tensor_dimensions, num_priors_per_pixel.
    name: Output name.

  Returns:
    detailed_assigned_priors: Dictionary with tensors for every SSD head and prior type.
  """
  assert len(assigned_priors.shape) == 1

  detailed_assigned_priors = dict()
  detailed_assigned_priors['priors/{0}'.format(name)] = assigned_priors

  start = 0
  total_priors_number = int(assigned_priors.shape[0])

  for head_id, (tensor_dimensions, num_priors_per_pixel) in enumerate(priors_info):
    priors_per_type = np.prod(tensor_dimensions)
    priors_count = np.prod(tensor_dimensions) * num_priors_per_pixel

    prior_map = np.zeros(shape=total_priors_number, dtype=np.bool)
    for i in range(priors_count):
      prior_map[start + i] = True

    if isinstance(assigned_priors, tf.Tensor):
      assigned_priors_head = tf.boolean_mask(assigned_priors, prior_map)
      assigned_priors_head = tf.reshape(assigned_priors_head, [priors_count])
    else:
      assigned_priors_head = assigned_priors[prior_map]

    detailed_assigned_priors['priors_by_head/{0}/head_{1}'.format(name, head_id)] = assigned_priors_head

    for offset in range(num_priors_per_pixel):
      prior_map = np.zeros(shape=total_priors_number, dtype=np.bool)
      for i in range(priors_per_type):
        prior_map[start + offset + i * num_priors_per_pixel] = True

      if isinstance(assigned_priors, tf.Tensor):
        assigned_priors_head_type = tf.boolean_mask(assigned_priors, prior_map)
        assigned_priors_head_type = tf.reshape(assigned_priors_head_type, [priors_per_type])
      else:
        assigned_priors_head_type = assigned_priors[prior_map]

      assigned_priors_head_type_name = 'priors_by_head_and_type/{0}/head_{1}/prior_{2}'.format(name, head_id,
                                                                                               offset)
      detailed_assigned_priors[assigned_priors_head_type_name] = assigned_priors_head_type

    start += priors_count

  return detailed_assigned_priors
Beispiel #17
0
 def map_box_encodings(i):
   """Produces box K-hot and score encodings for each class index."""
   box_mask = tf.equal(
       unique_indices, i * tf.ones(num_boxes, dtype=tf.int32))
   box_mask = tf.reshape(box_mask, [-1])
   box_indices = tf.boolean_mask(classes, box_mask)
   box_confidences = tf.boolean_mask(confidences, box_mask)
   box_class_encodings = tf.sparse_to_dense(
       box_indices, [num_classes], 1, validate_indices=False)
   box_confidence_encodings = tf.sparse_to_dense(
       box_indices, [num_classes], box_confidences, validate_indices=False)
   return box_class_encodings, box_confidence_encodings
Beispiel #18
0
def yolo_filter_boxes(boxes, box_confidence, box_class_probs, threshold=.6):
    """Filter YOLO boxes based on object and class confidence."""
    box_scores = box_confidence * box_class_probs
    box_classes = K.argmax(box_scores, axis=-1)
    box_class_scores = K.max(box_scores, axis=-1)
    prediction_mask = box_class_scores >= threshold

    # TODO: Expose tf.boolean_mask to Keras backend?
    boxes = tf.boolean_mask(boxes, prediction_mask)
    scores = tf.boolean_mask(box_class_scores, prediction_mask)
    classes = tf.boolean_mask(box_classes, prediction_mask)
    return boxes, scores, classes
Beispiel #19
0
def spread_loss(labels, activations, margin):
    activations_shape = activations.get_shape().as_list()
    mask_t = tf.equal(labels, 1)
    mask_i = tf.equal(labels, 0)    
    activations_t = tf.reshape(
      tf.boolean_mask(activations, mask_t), [activations_shape[0], 1]
    )    
    activations_i = tf.reshape(
      tf.boolean_mask(activations, mask_i), [activations_shape[0], activations_shape[1] - 1]
    )    
    gap_mit = tf.reduce_sum(tf.square(tf.nn.relu(margin - (activations_t - activations_i))))
    return gap_mit        
Beispiel #20
0
def spread_loss(labels, activations, iterations_per_epoch, global_step, name):
    """Spread loss

    :param labels: (24, 10] in one-hot vector
    :param activations: [24, 10], activation for each class
    :param margin: increment from 0.2 to 0.9 during training

    :return: spread loss
    """

    # Margin schedule
    # Margin increase from 0.2 to 0.9 by an increment of 0.1 for every epoch
    margin = tf.train.piecewise_constant(
        tf.cast(global_step, dtype=tf.int32),
        boundaries=[
            (iterations_per_epoch * x) for x in range(1, 8)
        ],
        values=[
            x / 10.0 for x in range(2, 10)
        ]
    )

    activations_shape = activations.get_shape().as_list()

    with tf.variable_scope(name) as scope:
        # mask_t, mask_f Tensor (?, 10)
        mask_t = tf.equal(labels, 1)      # Mask for the true label
        mask_i = tf.equal(labels, 0)      # Mask for the non-true label

        # Activation for the true label
        # activations_t (?, 1)
        activations_t = tf.reshape(
            tf.boolean_mask(activations, mask_t), shape=(tf.shape(activations)[0], 1)
        )

        # Activation for the other classes
        # activations_i (?, 9)
        activations_i = tf.reshape(
            tf.boolean_mask(activations, mask_i), [tf.shape(activations)[0], activations_shape[1] - 1]
        )

        l = tf.reduce_sum(
            tf.square(
                tf.maximum(
                    0.0,
                    margin - (activations_t - activations_i)
                )
            )
        )
        tf.losses.add_loss(l)

        return l
def flatten_binary_scores(scores, labels, ignore=None):
    """
    Flattens predictions in the batch (binary case)
    Remove labels equal to 'ignore'
    """
    scores = tf.reshape(scores, (-1,))
    labels = tf.reshape(labels, (-1,))
    if ignore is None:
        return scores, labels
    valid = tf.not_equal(labels, ignore)
    vscores = tf.boolean_mask(scores, valid, name='valid_scores')
    vlabels = tf.boolean_mask(labels, valid, name='valid_labels')
    return vscores, vlabels
Beispiel #22
0
    def _build_detector(self):
        """Interpret the net output and get the predicted boxes"""
        # the width and height of orignal image
        self.width = tf.placeholder(tf.float32, name="img_w")
        self.height = tf.placeholder(tf.float32, name="img_h")
        # get class prob, confidence, boxes from net output
        idx1 = self.S * self.S * self.C# 总 类别预测数量  7*7*20 = 980
        idx2 = idx1 + self.S * self.S * self.B# 总边框数量 + 总 类别预测数量
        # class prediction  类别预测概率   7*7*2=98
        class_probs = tf.reshape(self.predicts[0, :idx1], [self.S, self.S, self.C])
        # confidence        置信度  0/1 * 交并比
        confs = tf.reshape(self.predicts[0, idx1:idx2], [self.S, self.S, self.B])
        # boxes -> (x, y, w, h)  7*7*1*4 + 7*7*1*4 = 196
        boxes = tf.reshape(self.predicts[0, idx2:], [self.S, self.S, self.B, 4])# (x,y,w,h)

        # convert the x, y to the coordinates relative to the top left point of the image
        # the predictions of w, h are the square root
        # multiply the width and height of image
        # 得到真实 矩形框 坐标中心 和 长宽尺寸
        boxes = tf.stack([(boxes[:, :, :, 0] + tf.constant(self.x_offset, dtype=tf.float32)) / self.S * self.width,#x小格子占比
                          (boxes[:, :, :, 1] + tf.constant(self.y_offset, dtype=tf.float32)) / self.S * self.height,#y
                          tf.square(boxes[:, :, :, 2]) * self.width,#w 0~1 * 图片尺寸
                          tf.square(boxes[:, :, :, 3]) * self.height], axis=3)#h  0~1 * 图片尺寸

        ## 最终得分 置信度*类别预测概率  class-specific confidence scores [S, S, B, C]
        scores = tf.expand_dims(confs, -1) * tf.expand_dims(class_probs, 2)#增加一维

        scores = tf.reshape(scores, [-1, self.C])  # [S*S*B, C]#98个框 每个框 20个预测得分
        boxes = tf.reshape(boxes, [-1, 4])  # [S*S*B, 4]#98个框 每个框 四个 边框参数 坐标中心 和 长宽尺寸

        # find each box class, only select the max score
        box_classes = tf.argmax(scores, axis=1)# 在98个框中找到 20个得分中最高的 类别
        box_class_scores = tf.reduce_max(scores, axis=1)#最高的 得分

        # filter the boxes by the score threshold
        filter_mask = box_class_scores >= self.threshold#大于得分显示阈值的
        scores = tf.boolean_mask(box_class_scores, filter_mask)# 对应最终的得分
        boxes = tf.boolean_mask(boxes, filter_mask)#框的位置
        box_classes = tf.boolean_mask(box_classes, filter_mask)#类别

        # non max suppression (do not distinguish different classes)
        # ref: https://tensorflow.google.cn/api_docs/python/tf/image/non_max_suppression
        # box (x, y, w, h) -> box (x1, y1, x2, y2)  得到边框 上四条边的中心点
        _boxes = tf.stack([boxes[:, 0] - 0.5 * boxes[:, 2], boxes[:, 1] - 0.5 * boxes[:, 3],# x-0.5*w
                           boxes[:, 0] + 0.5 * boxes[:, 2], boxes[:, 1] + 0.5 * boxes[:, 3]], axis=1)
        #非极大值抑制 筛选 剔除 重叠度高的边框
        nms_indices = tf.image.non_max_suppression(_boxes, scores,
                                                   self.max_output_size, self.iou_threshold)
        self.scores = tf.gather(scores, nms_indices)
        self.boxes = tf.gather(boxes, nms_indices)
        self.box_classes = tf.gather(box_classes, nms_indices)
Beispiel #23
0
def generate_rpn_proposals(boxes, scores, img_shape,
                           pre_nms_topk, post_nms_topk=None):
    """
    Sample RPN proposals by the following steps:
    1. Pick top k1 by scores
    2. NMS them
    3. Pick top k2 by scores. Default k2 == k1, i.e. does not filter the NMS output.

    Args:
        boxes: nx4 float dtype, the proposal boxes. Decoded to floatbox already
        scores: n float, the logits
        img_shape: [h, w]
        pre_nms_topk, post_nms_topk (int): See above.

    Returns:
        boxes: kx4 float
        scores: k logits
    """
    assert boxes.shape.ndims == 2, boxes.shape
    if post_nms_topk is None:
        post_nms_topk = pre_nms_topk

    topk = tf.minimum(pre_nms_topk, tf.size(scores))
    topk_scores, topk_indices = tf.nn.top_k(scores, k=topk, sorted=False)
    topk_boxes = tf.gather(boxes, topk_indices)
    topk_boxes = clip_boxes(topk_boxes, img_shape)

    topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
    topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
    # nx1x2 each
    wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
    valid = tf.reduce_all(wbhb > cfg.RPN.MIN_SIZE, axis=1)  # n,
    topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid)
    topk_valid_scores = tf.boolean_mask(topk_scores, valid)

    # TODO not needed
    topk_valid_boxes_y1x1y2x2 = tf.reshape(
        tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]),
        (-1, 4), name='nms_input_boxes')
    nms_indices = tf.image.non_max_suppression(
        topk_valid_boxes_y1x1y2x2,
        # TODO use exp to work around a bug in TF1.9: https://github.com/tensorflow/tensorflow/issues/19578
        tf.exp(topk_valid_scores),
        max_output_size=post_nms_topk,
        iou_threshold=cfg.RPN.PROPOSAL_NMS_THRESH)

    topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
    final_boxes = tf.gather(topk_valid_boxes, nms_indices)
    final_scores = tf.gather(topk_valid_scores, nms_indices)
    tf.sigmoid(final_scores, name='probs')  # for visualization
    return tf.stop_gradient(final_boxes, name='boxes'), tf.stop_gradient(final_scores, name='scores')
Beispiel #24
0
    def _build_detector(self):
        """Interpret the net output and get the predicted boxes"""
        # the width and height of orignal image
        self.width = tf.placeholder(tf.float32, name="img_w")
        self.height = tf.placeholder(tf.float32, name="img_h")
        # get class prob, confidence, boxes from net output
        idx1 = self.S * self.S * self.C
        idx2 = idx1 + self.S * self.S * self.B
        # class prediction
        class_probs = tf.reshape(self.predicts[0, :idx1], [self.S, self.S, self.C])
        # confidence
        confs = tf.reshape(self.predicts[0, idx1:idx2], [self.S, self.S, self.B])
        # boxes -> (x, y, w, h)
        boxes = tf.reshape(self.predicts[0, idx2:], [self.S, self.S, self.B, 4])

        # convert the x, y to the coordinates relative to the top left point of the image
        # the predictions of w, h are the square root
        # multiply the width and height of image
        boxes = tf.stack([(boxes[:, :, :, 0] + tf.constant(self.x_offset, dtype=tf.float32)) / self.S * self.width,
                          (boxes[:, :, :, 1] + tf.constant(self.y_offset, dtype=tf.float32)) / self.S * self.height,
                          tf.square(boxes[:, :, :, 2]) * self.width,
                          tf.square(boxes[:, :, :, 3]) * self.height], axis=3)

        # class-specific confidence scores [S, S, B, C]
        scores = tf.expand_dims(confs, -1) * tf.expand_dims(class_probs, 2)

        scores = tf.reshape(scores, [-1, self.C])  # [S*S*B, C]
        boxes = tf.reshape(boxes, [-1, 4])  # [S*S*B, 4]

        # find each box class, only select the max score
        box_classes = tf.argmax(scores, axis=1)
        box_class_scores = tf.reduce_max(scores, axis=1)

        # filter the boxes by the score threshold
        filter_mask = box_class_scores >= self.threshold
        scores = tf.boolean_mask(box_class_scores, filter_mask)
        boxes = tf.boolean_mask(boxes, filter_mask)
        box_classes = tf.boolean_mask(box_classes, filter_mask)

        # non max suppression (do not distinguish different classes)
        # ref: https://tensorflow.google.cn/api_docs/python/tf/image/non_max_suppression
        # box (x, y, w, h) -> box (x1, y1, x2, y2)
        _boxes = tf.stack([boxes[:, 0] - 0.5 * boxes[:, 2], boxes[:, 1] - 0.5 * boxes[:, 3],
                           boxes[:, 0] + 0.5 * boxes[:, 2], boxes[:, 1] + 0.5 * boxes[:, 3]], axis=1)
        nms_indices = tf.image.non_max_suppression(_boxes, scores,
                                                   self.max_output_size, self.iou_threshold)
        self.scores = tf.gather(scores, nms_indices)
        self.boxes = tf.gather(boxes, nms_indices)
        self.box_classes = tf.gather(box_classes, nms_indices)
def bboxes_filter_overlap(labels, bboxes, threshold=0.5,
                          scope=None):
    """Filter out bounding boxes based on overlap with reference
    box [0, 0, 1, 1].

    Return:
      labels, bboxes: Filtered elements.
    """
    with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
        scores = bboxes_intersection(tf.constant([0, 0, 1, 1], bboxes.dtype),
                                     bboxes)
        mask = scores > threshold
        labels = tf.boolean_mask(labels, mask)
        bboxes = tf.boolean_mask(bboxes, mask)
        return labels, bboxes
Beispiel #26
0
    def segmentation_loss(seg_logits, seg_gt, config):
        mask = seg_gt <= dataset.num_classes
        seg_logits = tf.boolean_mask(seg_logits, mask)
        seg_gt = tf.boolean_mask(seg_gt, mask)
        seg_predictions = tf.argmax(seg_logits, axis=1)

        seg_loss_local = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=seg_logits,
                                                                        labels=seg_gt)
        seg_loss = tf.reduce_mean(seg_loss_local)
        tf.summary.scalar('loss/segmentation', seg_loss)

        mean_iou, update_mean_iou = streaming_mean_iou(seg_predictions, seg_gt,
                                                       dataset.num_classes)
        tf.summary.scalar('accuracy/mean_iou', mean_iou)
        return seg_loss, mean_iou, update_mean_iou
Beispiel #27
0
def generate_rpn_proposals(boxes, scores, img_shape):
    """
    Args:
        boxes: nx4 float dtype, decoded to floatbox already
        scores: n float, the logits
        img_shape: [h, w]

    Returns:
        boxes: kx4 float
        scores: k logits
    """
    assert boxes.shape.ndims == 2, boxes.shape
    if get_current_tower_context().is_training:
        PRE_NMS_TOPK = config.TRAIN_PRE_NMS_TOPK
        POST_NMS_TOPK = config.TRAIN_POST_NMS_TOPK
    else:
        PRE_NMS_TOPK = config.TEST_PRE_NMS_TOPK
        POST_NMS_TOPK = config.TEST_POST_NMS_TOPK

    topk = tf.minimum(PRE_NMS_TOPK, tf.size(scores))
    topk_scores, topk_indices = tf.nn.top_k(scores, k=topk, sorted=False)
    topk_boxes = tf.gather(boxes, topk_indices)
    topk_boxes = clip_boxes(topk_boxes, img_shape)

    topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
    topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
    # nx1x2 each
    wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
    valid = tf.reduce_all(wbhb > config.RPN_MIN_SIZE, axis=1)  # n,
    topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid)
    topk_valid_scores = tf.boolean_mask(topk_scores, valid)

    topk_valid_boxes_y1x1y2x2 = tf.reshape(
        tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]),
        (-1, 4), name='nms_input_boxes')
    nms_indices = tf.image.non_max_suppression(
        topk_valid_boxes_y1x1y2x2,
        topk_valid_scores,
        max_output_size=POST_NMS_TOPK,
        iou_threshold=config.RPN_PROPOSAL_NMS_THRESH)

    topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
    final_boxes = tf.gather(
        topk_valid_boxes,
        nms_indices, name='boxes')
    final_scores = tf.gather(topk_valid_scores, nms_indices, name='scores')
    tf.sigmoid(final_scores, name='probs')  # for visualization
    return final_boxes, final_scores
 def make_net(self, input_images, input_measurements, input_actions, input_objectives, reuse=False):
     if reuse:
         tf.get_variable_scope().reuse_variables()
     
     self.fc_val_params = np.copy(self.fc_joint_params)
     self.fc_val_params['out_dims'][-1] = self.target_dim
     self.fc_adv_params = np.copy(self.fc_joint_params)
     self.fc_adv_params['out_dims'][-1] = len(self.net_discrete_actions) * self.target_dim
     p_img_conv = my_ops.conv_encoder(input_images, self.conv_params, 'p_img_conv', msra_coeff=0.9)
     p_img_fc = my_ops.fc_net(my_ops.flatten(p_img_conv), self.fc_img_params, 'p_img_fc', msra_coeff=0.9)
     p_meas_fc = my_ops.fc_net(input_measurements, self.fc_meas_params, 'p_meas_fc', msra_coeff=0.9)
     if isinstance(self.fc_obj_params, np.ndarray):
         p_obj_fc = my_ops.fc_net(input_objectives, self.fc_obj_params, 'p_obj_fc', msra_coeff=0.9)
         p_concat_fc = tf.concat([p_img_fc,p_meas_fc,p_obj_fc], 1)
     else:
         p_concat_fc = tf.concat([p_img_fc,p_meas_fc], 1)
         if self.random_objective_coeffs:
             raise Exception('Need fc_obj_params with randomized objectives')
         
     p_val_fc = my_ops.fc_net(p_concat_fc, self.fc_val_params, 'p_val_fc', last_linear=True, msra_coeff=0.9)
     p_adv_fc = my_ops.fc_net(p_concat_fc, self.fc_adv_params, 'p_adv_fc', last_linear=True, msra_coeff=0.9)
     
     adv_reshape = tf.reshape(p_adv_fc, [-1, len(self.net_discrete_actions), self.target_dim])
     
     pred_all_nomean = adv_reshape - tf.reduce_mean(adv_reshape, reduction_indices=1, keep_dims=True)
     pred_all = pred_all_nomean + tf.reshape(p_val_fc, [-1, 1, self.target_dim])
     pred_relevant = tf.boolean_mask(pred_all, tf.cast(input_actions, tf.bool))
     
     return pred_all, pred_relevant
Beispiel #29
0
def bboxes_filter_labels(labels, bboxes,
                         out_labels=[], num_classes=np.inf,
                         scope=None):
    """Filter out labels from a collection. Typically used to get
    of DontCare elements. Also remove elements based on the number of classes.

    Return:
      labels, bboxes: Filtered elements.
    """
    with tf.name_scope(scope, 'bboxes_filter_labels', [labels, bboxes]):
        mask = tf.greater_equal(labels, num_classes)
        for l in labels:
            mask = tf.logical_and(mask, tf.not_equal(labels, l))
        labels = tf.boolean_mask(labels, mask)
        bboxes = tf.boolean_mask(bboxes, mask)
        return labels, bboxes
Beispiel #30
0
                def get_losses(obj_mask):
                  """Get motion constraint loss."""
                  # Find height of segment.
                  coords = tf.where(tf.greater(  # Shape (num_true, 2=yx)
                      obj_mask[:, :, 0], tf.constant(0.5, dtype=tf.float32)))
                  y_max = tf.reduce_max(coords[:, 0])
                  y_min = tf.reduce_min(coords[:, 0])
                  seg_height = y_max - y_min
                  f_y = self.intrinsic_mat[i, 0, 1, 1]
                  approx_depth = ((f_y * self.global_scale_var) /
                                  tf.to_float(seg_height))
                  reference_pred = tf.boolean_mask(
                      depth_pred, tf.greater(
                          tf.reshape(obj_mask[:, :, 0],
                                     (self.img_height, self.img_width, 1)),
                          tf.constant(0.5, dtype=tf.float32)))

                  # Establish loss on approx_depth, a scalar, and
                  # reference_pred, our dense prediction. Normalize both to
                  # prevent degenerative depth shrinking.
                  global_mean_depth_pred = tf.reduce_mean(depth_pred)
                  reference_pred /= global_mean_depth_pred
                  approx_depth /= global_mean_depth_pred
                  spatial_err = tf.abs(reference_pred - approx_depth)
                  mean_spatial_err = tf.reduce_mean(spatial_err)
                  return mean_spatial_err
Beispiel #31
0
def bbox_to_tensor(bbox,
                   label,
                   input_shape=(416, 416),
                   anchors=YOLOv3_anchors,
                   num_classes=80):

    # NOTE: input_shape is given in (input height, input width) order
    # bbox.shape = (box num, 4) which represents (ymin,xmin,ymax,xmax)
    # label.shape = (box num)
    # anchors = (9,2)
    tf.Assert(tf.equal(tf.reduce_all(label < num_classes), True), [label])
    num_layers = anchors.shape[0] // 3
    tf.Assert(
        tf.equal(
            tf.reduce_any([tf.equal(num_layers, 2),
                           tf.equal(num_layers, 3)]), True), [num_layers])
    anchor_mask = tf.cond(
        tf.equal(num_layers,
                 3), lambda: tf.constant([[6, 7, 8], [3, 4, 5], [0, 1, 2]]),
        lambda: tf.constant([[3, 4, 5], [1, 2, 3]]))

    true_boxes_xy = tf.reverse((bbox[..., 0:2] + bbox[..., 2:4]) / 2.,
                               axis=[-1])
    # box center proportional position
    true_boxes_wh = tf.reverse(tf.math.abs(bbox[..., 2:4] - bbox[..., 0:2]),
                               axis=[-1])
    # box proportional size
    true_boxes = tf.concat([true_boxes_xy, true_boxes_wh], axis=-1)
    input_shape_tensor = tf.reverse(tf.convert_to_tensor(input_shape,
                                                         dtype=tf.float32),
                                    axis=[0])
    boxes_xy = true_boxes[..., 0:2] * input_shape_tensor
    # box center absolute position
    boxes_wh = true_boxes[..., 2:4] * input_shape_tensor
    # box absolute size

    # create tensor for label: y_true.shape[layer] = (height, width, anchor num, 5 + class num)
    y_true = tuple((np.zeros(shape=(input_shape[0] // {
        0: 32,
        1: 16,
        2: 8
    }[l], input_shape[1] // {
        0: 32,
        1: 16,
        2: 8
    }[l], tf.shape(anchor_mask[l, ...])[0], 5 + num_classes),
                             dtype=np.float32) for l in range(num_layers)))

    # center the anchor boxes at the origin, get the max and min of corners' (x,y)
    anchors = tf.expand_dims(tf.convert_to_tensor(anchors, dtype=tf.float32),
                             0)
    # anchors.shape = (1, 9, 2)
    anchor_maxes = anchors / 2.
    # max of width, height, anchors_maxes.shape = (1, 9, 2)
    anchor_mins = -anchor_maxes
    # min of width, height, anchors_mins.shape = (1, 9, 2)

    # center the bbox at the origin, get the max and min of corners' (x,y)
    valid_mask = tf.greater(boxes_wh[..., 0], 0)
    # valid box should have width > 0: valid_mask.shape = (box_num)
    wh = tf.boolean_mask(boxes_wh, valid_mask)
    # absolute size: wh.shape = (valid box num, 2)
    valid_true_boxes = tf.boolean_mask(true_boxes, valid_mask)
    # box proportional position: valid_true_boxes.shape = (valid box num, 4)
    valid_label = tf.boolean_mask(label, valid_mask)
    # valid_label.shape = (valid box num)
    # if there is any valid bbox, get anchor box which has the maximum iou with current bbox.
    if wh.shape[0] > 0:
        wh = tf.expand_dims(wh, -2)
        # wh.shape = (valid box num, 1, 2)
        box_maxes = wh / 2
        # max of width, height, box_maxes.shape = (valid box num, 1, 2)
        box_mins = -box_maxes
        # min of width, height, box_mins.shape = (valid box num, 1, 2)
        intersect_mins = tf.math.maximum(box_mins, anchor_mins)
        # intersect_mins.shape = (valid box num, anchor num(9), 2)
        intersect_maxes = tf.math.minimum(box_maxes, anchor_maxes)
        # intersect_maxes.shape = (valid box num, anchor num(9), 2)
        intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, 0.)
        # intersect_wh.shape = (valid box num, anchor num(9), 2)
        intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
        # intersect_area.shape = (valid box num, anchor num(9))
        box_area = wh[..., 0] * wh[..., 1]
        # box_area.shape = (valid box_num, 1)
        anchor_area = anchors[..., 0] * anchors[..., 1]
        # anchor_area.shape = (1, anchor num(9))
        iou = intersect_area / (box_area + anchor_area - intersect_area)
        # iou.shape = (valid box num, anchor num(9))
        # get the anchor box having maximum iou with each true bbbox
        best_anchor = tf.math.argmax(iou, axis=-1, output_type=tf.int32)
        # best_anchor.shape = (valid box num)
        # fill in label tensor
        for t in range(tf.shape(best_anchor)[0]):
            n = best_anchor[t]
            pos = tf.where(tf.equal(anchor_mask, n))
            l = pos[0][0]
            k = pos[0][1]
            i = int(
                tf.clip_by_value(valid_true_boxes[t, 1] * y_true[l].shape[0],
                                 clip_value_min=0,
                                 clip_value_max=y_true[l].shape[0] - 1))
            # absolute center y = proportional y * grid_shape.height
            j = int(
                tf.clip_by_value(valid_true_boxes[t, 0] * y_true[l].shape[1],
                                 clip_value_min=0,
                                 clip_value_max=y_true[l].shape[1] - 1))
            # absolute center x = proportional x * grid_shape.width
            c = valid_label[t]
            # class
            y_true[l][i, j, k, 0:4] = valid_true_boxes[t, 0:4]
            # box proportional position (w,y,width,height)
            y_true[l][i, j, k, 4] = 1
            # object mask
            y_true[l][i, j, k, 5 + c] = 1
            # class mask

    if num_layers == 3:
        return (tf.convert_to_tensor(y_true[0]),
                tf.convert_to_tensor(y_true[1]),
                tf.convert_to_tensor(y_true[2]))
    else:
        return (tf.convert_to_tensor(y_true[0]),
                tf.convert_to_tensor(y_true[1]))
def preprocess_for_eval(image,
                        labels,
                        bboxes,
                        out_shape=EVAL_SIZE,
                        data_format='NHWC',
                        difficults=None,
                        resize=Resize.WARP_RESIZE,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        out_shape: Output shape after pre-processing (if resize != None)
        resize: Resize strategy.

    Returns:
        A preprocessed image.
    """
    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])

        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat([bbox_img, bboxes], axis=0)

        if resize == Resize.NONE:
            # No resizing...
            pass
        elif resize == Resize.CENTRAL_CROP:
            # Central cropping of the image.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(
                tf.to_double(1.0),
                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(
                image,
                resize_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.WARP_RESIZE:
            # Warp resize of the image.
            image = tf_image.resize_image(
                image,
                out_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        # Remove difficult boxes.
        if difficults is not None:
            mask = tf.logical_not(tf.cast(difficults, tf.bool))
            labels = tf.boolean_mask(labels, mask)
            bboxes = tf.boolean_mask(bboxes, mask)
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, bbox_img
Beispiel #33
0
def preprocess_for_eval(image,
                        labels,
                        bboxes,
                        height,
                        width,
                        out_shape=EVAL_SIZE,
                        data_format='NHWC',
                        use_whiten=True,
                        difficults=None,
                        resize=Resize.WARP_RESIZE,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
      image: A `Tensor` representing an image of arbitrary size.
      labels : A Tensor inlcudes all labels
      bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4]
      out_shape : Image_size ,default is [300, 300]

    Returns:
        A preprocessed image.
    """

    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)

        num = 0
        if labels is not None:
            num = tf.reduce_sum(tf.cast(labels, tf.int32))
        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat([bbox_img, bboxes], axis=0)

        if resize == Resize.NONE:
            # No resizing...
            pass
        elif resize == Resize.CENTRAL_CROP:
            # Central cropping of the image.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(
                tf.to_double(1.0),
                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(
                image,
                resize_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.WARP_RESIZE:
            # Warp resize of the image.
            image = tf_image.resize_image(
                image,
                out_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        # Remove difficult boxes.
        if difficults is not None:
            mask = tf.logical_not(tf.cast(difficults, tf.bool))
            labels = tf.boolean_mask(labels, mask)
            bboxes = tf.boolean_mask(bboxes, mask)
        image = tf.clip_by_value(image, 0., 255.)
        image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        #image = image/255.
        #image = tf.clip_by_value(image, 0., 255.)
        #image = tf.subtract(image, 128.)
        #image = tf.multiply(image, 2.0)

        if data_format == 'NHWC':
            image = image
        else:
            image = tf.transpose(image, perm=(2, 0, 1))

        return image, labels, bboxes, bbox_img, num
Beispiel #34
0
def likelihood_ratio_filter(node_pairs,
                            modified_adjacency,
                            original_adjacency,
                            d_min,
                            threshold=0.004):
    """
    Filter the input node pairs based on the likelihood ratio test proposed by Zügner et al. 2018, see
    https://dl.acm.org/citation.cfm?id=3220078. In essence, for each node pair return 1 if adding/removing the edge
    between the two nodes does not violate the unnoticeability constraint, and return 0 otherwise. Assumes unweighted
    and undirected graphs.

    Parameters
    ----------
    node_pairs: tf.Tensor, shape (e, 2) dtype int
        The e node pairs to consider, where each node pair consists of the two indices of the nodes.

    modified_adjacency: tf.Tensor shape (N,N) dtype int
        The input (modified) adjacency matrix. Assumed to be unweighted and symmetric.

    original_adjacency: tf.Tensor shape (N,N) dtype int
        The input (original) adjacency matrix. Assumed to be unweighted and symmetric.

    d_min: int
        The minimum degree considered in the Powerlaw distribution.

    threshold: float, default 0.004
        Cutoff value for the unnoticeability constraint. Smaller means stricter constraint. 0.004 corresponds to a
        p-value of 0.95 in the Chi-square distribution with one degree of freedom.

    Returns
    -------
    allowed_mask: tf.Tensor, shape (e,), dtype bool
        For each node pair p return True if adding/removing the edge p does not violate the
        cutoff value, False otherwise.

    current_ratio: tf.Tensor, shape (), dtype float
        The current value of the log likelihood ratio.

    """

    N = int(modified_adjacency.shape[0])

    original_degree_sequence = tf.cast(
        tf.reduce_sum(original_adjacency, axis=1), tf.float32)
    current_degree_sequence = tf.cast(
        tf.reduce_sum(modified_adjacency, axis=1), tf.float32)

    # Concatenate the degree sequences
    concat_degree_sequence = tf.concat(
        (current_degree_sequence[None, :], original_degree_sequence[None, :]),
        axis=1)
    # Compute the log likelihood values of the original, modified, and combined degree sequences.
    ll_orig, alpha_orig, n_orig, sum_log_degrees_original = degree_sequence_log_likelihood(
        original_degree_sequence, d_min)
    ll_current, alpha_current, n_current, sum_log_degrees_current = degree_sequence_log_likelihood(
        current_degree_sequence, d_min)
    ll_comb, alpha_comb, n_comb, sum_log_degrees_combined = degree_sequence_log_likelihood(
        concat_degree_sequence, d_min)
    # Compute the log likelihood ratio
    current_ratio = -2 * ll_comb + 2 * (ll_orig + ll_current)

    # Compute new log likelihood values that would arise if we add/remove the edges corresponding to each node pair.
    new_lls, new_alphas, new_ns, new_sum_log_degrees = updated_log_likelihood_for_edge_changes(
        node_pairs, tf.cast(modified_adjacency, tf.float32), d_min)

    # Combination of the original degree distribution with the distributions corresponding to each node pair.
    n_combined = n_orig + new_ns
    new_sum_log_degrees_combined = sum_log_degrees_original + new_sum_log_degrees
    alpha_combined = compute_alpha(n_combined, new_sum_log_degrees_combined,
                                   d_min)
    new_ll_combined = compute_log_likelihood(n_combined, alpha_combined,
                                             new_sum_log_degrees_combined,
                                             d_min)
    new_ratios = -2 * new_ll_combined + 2 * (new_lls + ll_orig)

    # Allowed edges are only those for which the resulting likelihood ratio measure is < than the threshold
    allowed_edges = new_ratios < threshold
    filtered_edges = tf.boolean_mask(node_pairs, allowed_edges)

    # Get the flattened indices for the allowed edges [e,2] -> [e,], similar to np.ravel_multi_index
    flat_ixs = ravel_multiple_indices(tf.cast(filtered_edges, tf.int32),
                                      modified_adjacency.shape)
    # Also for the reverse direction (we assume unweighted graphs).
    flat_ixs_reverse = ravel_multiple_indices(
        tf.reverse(tf.cast(filtered_edges, tf.int32), [1]),
        modified_adjacency.shape)

    # Construct a [N * N] array with ones at the admissible node pair locations and 0 everywhere else.
    indices_1 = tf.scatter_nd(flat_ixs[:, None],
                              tf.ones_like(flat_ixs, dtype=tf.float32),
                              shape=[N * N])
    indices_2 = tf.scatter_nd(flat_ixs_reverse[:, None],
                              tf.ones_like(flat_ixs_reverse, dtype=tf.float32),
                              shape=[N * N])

    # Add both directions
    allowed_mask = tf.clip_by_value(indices_1 + indices_2, 0, 1)

    return allowed_mask, current_ratio
def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
    #字面意思(box和label的损失函数
    """
    Args:
        anchor_labels: fHxfWxNA
        anchor_boxes: fHxfWxNAx4, encoded
        label_logits:  fHxfWxNA
        box_logits: fHxfWxNAx4
    Returns:
        label_loss, box_loss
    """
    with tf.device('/cpu:0'):
        valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1))
        pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1))
        nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask,
                                                     dtype=tf.int32),
                                    name='num_valid_anchor')
        nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32),
                             name='num_pos_anchor')
        # nr_pos is guaranteed >0 in C4. But in FPN. even nr_valid could be 0.

        valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask)
    valid_label_logits = tf.boolean_mask(label_logits, valid_mask)

    with tf.name_scope('label_metrics'):
        valid_label_prob = tf.nn.sigmoid(valid_label_logits)
        summaries = []
        with tf.device('/cpu:0'):
            for th in [0.5, 0.2, 0.1]:
                valid_prediction = tf.cast(valid_label_prob > th, tf.int32)
                nr_pos_prediction = tf.reduce_sum(valid_prediction,
                                                  name='num_pos_prediction')
                pos_prediction_corr = tf.count_nonzero(tf.logical_and(
                    valid_label_prob > th,
                    tf.equal(valid_prediction, valid_anchor_labels)),
                                                       dtype=tf.int32)
                placeholder = 0.5  # A small value will make summaries appear lower.
                recall = tf.cast(tf.truediv(pos_prediction_corr, nr_pos),
                                 tf.float32)
                recall = tf.where(tf.equal(nr_pos, 0),
                                  placeholder,
                                  recall,
                                  name='recall_th{}'.format(th))
                precision = tf.cast(
                    tf.truediv(pos_prediction_corr, nr_pos_prediction),
                    tf.float32)
                precision = tf.where(tf.equal(nr_pos_prediction, 0),
                                     placeholder,
                                     precision,
                                     name='precision_th{}'.format(th))
                summaries.extend([precision, recall])
        add_moving_summary(*summaries)
        #这里是loss summary,底下算label和boxes的loss

    # Per-level loss summaries in FPN may appear lower due to the use of a small placeholder.
    # But the total RPN loss will be fine.  TODO make the summary op smarter
    placeholder = 0.
    label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.cast(valid_anchor_labels, tf.float32),
        logits=valid_label_logits)
    label_loss = tf.reduce_sum(label_loss) * (1. / cfg.RPN.BATCH_PER_IM)
    label_loss = tf.where(tf.equal(nr_valid, 0),
                          placeholder,
                          label_loss,
                          name='label_loss')
    #这里用cross entropy算labels的loss

    pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask)
    pos_box_logits = tf.boolean_mask(box_logits, pos_mask)
    delta = 1.0 / 9
    box_loss = tf.losses.huber_loss(pos_anchor_boxes,
                                    pos_box_logits,
                                    delta=delta,
                                    reduction=tf.losses.Reduction.SUM) / delta
    box_loss = box_loss * (1. / cfg.RPN.BATCH_PER_IM)
    box_loss = tf.where(tf.equal(nr_pos, 0),
                        placeholder,
                        box_loss,
                        name='box_loss')
    #这里是huber loss for boxes

    add_moving_summary(label_loss, box_loss, nr_valid, nr_pos)
    return [label_loss, box_loss]
def off_diagonal_part(matrix):
    return tf.boolean_mask(matrix, 1 - tf.eye(int(matrix.shape[0])))
Beispiel #37
0
def yolo_eval_batch(yolo_outputs,
                    anchors,
                    num_classes,
                    image_shape,
                    batch_size=1,
                    max_boxes=20,
                    score_threshold=.6,
                    iou_threshold=.5):
    """Evaluate YOLO model on given input and return filtered boxes."""
    print('Inference batch size:', batch_size)
    num_layers = len(yolo_outputs)
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[
        3, 4, 5
    ], [1, 2, 3]]  # default setting
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    all_boxes = []
    all_box_scores = []
    for l in range(num_layers):
        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
                                                    anchors[anchor_mask[l]],
                                                    num_classes,
                                                    input_shape,
                                                    image_shape,
                                                    batch_size=batch_size)
        all_boxes.append(_boxes)
        all_box_scores.append(_box_scores)

    all_boxes = K.concatenate(all_boxes, axis=1)
    all_box_scores = K.concatenate(all_box_scores, axis=1)

    all_boxes_res = []
    all_scores_res = []
    all_classes_res = []

    for b in range(batch_size):
        boxes = all_boxes[b]
        box_scores = all_box_scores[b]
        mask = box_scores >= score_threshold
        max_boxes_tensor = K.constant(max_boxes, dtype='int32')
        boxes_ = []
        scores_ = []
        classes_ = []
        for c in range(num_classes):
            # TODO: use keras backend instead of tf.
            class_boxes = tf.boolean_mask(boxes, mask[:, c])
            class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
            nms_index = tf.image.non_max_suppression(
                class_boxes,
                class_box_scores,
                max_boxes_tensor,
                iou_threshold=iou_threshold)
            class_boxes = K.gather(class_boxes, nms_index)
            class_box_scores = K.gather(class_box_scores, nms_index)
            classes = K.ones_like(class_box_scores, 'int32') * c
            boxes_.append(class_boxes)
            scores_.append(class_box_scores)
            classes_.append(classes)
        boxes_ = K.concatenate(boxes_, axis=0)
        scores_ = K.concatenate(scores_, axis=0)
        classes_ = K.concatenate(classes_, axis=0)
        all_boxes_res.append(boxes_)
        all_scores_res.append(scores_)
        all_classes_res.append(classes_)

    return all_boxes_res, all_scores_res, all_classes_res
Beispiel #38
0
    def train_batch(self, source_charseq_ids, source_charseqs, target_charseq_ids, target_charseqs):
        # TODO(lemmatizer_noattn): Modify target_charseqs by appending EOW; only the version with appended EOW is used from now on.

        with tf.GradientTape() as tape:
            # TODO(lemmatizer_noattn): Embed source charseqs
            # TODO: Run self._model.source_rnn on the embedded sequences, returning outputs in `source_encoded`.

            # Copy the source_encoded to corresponding batch places, and then flatten it
            source_mask = tf.not_equal(source_charseq_ids, 0)
            source_encoded = tf.boolean_mask(tf.gather(source_encoded, source_charseq_ids), source_mask)
            targets = tf.boolean_mask(tf.gather(target_charseqs, target_charseq_ids), source_mask)

            class DecoderTraining(decoder.BaseDecoder):
                @property
                def batch_size(self): raise NotImplemented() # TODO: Return batch size of self._source_encoded, using tf.shape
                @property
                def output_size(self): raise NotImplemented() # TODO(lemmatizer_noattn): Return number of the generated logits
                @property
                def output_dtype(self): return NotImplemented() # TODO(lemmatizer_noattn): Return the type of the generated logits

                def _with_attention(self, inputs, states):
                    # TODO: Compute the attention.
                    # - Take self._source_encoded and pass it through the self._model.attention_source_layer.
                    #   Because self._source_encoded does not change, you should in fact do it in `initialize`.
                    # - Pass `states` though self._model.attention_state_layer.
                    # - Sum the two outputs. However, the first has shape [a, b, c] and the second [a, c]. Therefore,
                    #   somehow expand the second to [a, b, c] first. (Hint: use broadcasting rules.)
                    # - Pass the sum through `tf.tanh`, then self._model.attention_weight_layer.
                    # - Then, run softmax on a suitable axis (the one corresponding to characters), generating `weights`.
                    # - Multiply `self._source_encoded` with `weights` and sum the result in the axis
                    #   corresponding to characters, generating `attention`. Therefore, `attention` is a a fixed-size
                    #   representation for every batch element, independently on how many characters had
                    #   the corresponding input forms.
                    # - Finally concatenate `inputs` and `attention` and return the result.

                def initialize(self, layer_inputs, initial_state=None):
                    self._model, self._source_encoded, self._targets = layer_inputs

                    # TODO(lemmatozer_noattn): Define `finished` as a vector of self.batch_size of `False` [see tf.fill].
                    # TODO(lemmatizer_noattn): Define `inputs` as a vector of self.batch_size MorphoDataset.Factor.BOW [see tf.fill],
                    # embedded using self._model.target_embedding
                    # TODO: Define `states` as the last words from self._source_encoded
                    # TODO: Pass `inputs` through `self._with_attention(inputs, states)`.
                    return finished, inputs, states

                def step(self, time, inputs, states):
                    # TODO(lemmatizer_noattn): Pass `inputs` and `[states]` through self._model.target_rnn_cell, generating
                    # `outputs, [states]`.
                    # TODO(lemmatizer_noattn): Overwrite `outputs` by passing them through self._model.target_output_layer,
                    # TODO(lemmatizer_noattn): Define `next_inputs` by embedding `time`-th words from `self._targets`.
                    # TODO(lemmatizer_noattn): Define `finished` as True if `time`-th word from `self._targets` is EOW, False otherwise.
                    # Again, no == or !=.
                    # TODO: Pass `inputs` through `self._with_attention(inputs, states)`.
                    return outputs, states, next_inputs, finished

            output_layer, _, _ = DecoderTraining()([self._model, source_encoded, targets])
            # TODO(lemmatizer_noattn): Compute loss. Use only nonzero `targets` as a mask.
        gradients = tape.gradient(loss, self._model.variables)
        self._optimizer.apply_gradients(zip(gradients, self._model.variables))

        tf.summary.experimental.set_step(self._optimizer.iterations)
        with self._writer.as_default():
            for name, metric in self._metrics_training.items():
                metric.reset_states()
                if name == "loss": metric(loss)
                else: metric(targets, output_layer, tf.not_equal(targets, 0))
                tf.summary.scalar("train/{}".format(name), metric.result())

        return tf.math.argmax(output_layer, axis=2)

    def train_epoch(self, dataset, args):
        for batch in dataset.batches(args.batch_size):
            # TODO(lemmatizer_noattn): Call train_batch, storing results in `predictions`.

            form, gold_lemma, system_lemma = "", "", ""
            for i in batch[dataset.FORMS].charseqs[1]:
                if i: form += dataset.data[dataset.FORMS].alphabet[i]
            for i in range(len(batch[dataset.LEMMAS].charseqs[1])):
                if batch[dataset.LEMMAS].charseqs[1][i]:
                    gold_lemma += dataset.data[dataset.LEMMAS].alphabet[batch[dataset.LEMMAS].charseqs[1][i]]
                    system_lemma += dataset.data[dataset.LEMMAS].alphabet[predictions[0][i]]
            print(float(self._metrics_training["accuracy"].result()), form, gold_lemma, system_lemma)


    @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.int32)] * 2, autograph=False)
    def predict_batch(self, source_charseq_ids, source_charseqs):
        # TODO(lemmatizer_noattn)(train_batch): Embed source charseqs
        # TODO(train_batch): Run self._model.source_rnn on the embedded sequences, returning outputs in `source_encoded`.

        # Copy the source_encoded to corresponding batch places, and then flatten it
        source_mask = tf.not_equal(source_charseq_ids, 0)
        source_encoded = tf.boolean_mask(tf.gather(source_encoded, source_charseq_ids), source_mask)

        class DecoderPrediction(decoder.BaseDecoder):
            @property
            def batch_size(self): raise NotImplemented() # TODO(train_batch): Return batch size of self._source_encoded, using tf.shape
            @property
            def output_size(self): raise NotImplemented() # TODO(lemmatizer_noattn): Return 1 because we are returning directly the predictions
            @property
            def output_dtype(self): return NotImplemented() # TODO(lemmatizer_noattn): Return tf.int32 because the predictions are integral

            def _with_attention(self, inputs, states):
                # TODO: A copy of _with_attention from train_batch; you can of course
                # move the definition to a place where it can be reused in both places.

            def initialize(self, layer_inputs, initial_state=None):
                self._model, self._source_encoded = layer_inputs

                # TODO(lemmatizer_noattn)(train_batch): Define `finished` as a vector of self.batch_size of `False` [see tf.fill].
                # TODO(lemmatizer_noattn)(train_batch): Define `inputs` as a vector of self.batch_size MorphoDataset.Factor.BOW [see tf.fill],
                # embedded using self._model.target_embedding
                # TODO(train_batch): Define `states` as the last words from self._source_encoded
                # TODO(train_batch): Pass `inputs` through `self._with_attention(inputs, states)`.
                return finished, inputs, states

            def step(self, time, inputs, states):
                # TODO(lemmatizer_noattn)(train_batch): Pass `inputs` and `[states]` through self._model.target_rnn_cell, generating
                # `outputs, [states]`.
                # TODO(lemmatizer_noattn)(train_batch): Overwrite `outputs` by passing them through self._model.target_output_layer,
                # TODO(lemmatizer_noattn): Overwirte `outputs` by passing them through `tf.argmax` on suitable axis and with
                # `output_type=tf.int32` parameter.
                # TODO(lemmatizer_noattn): Define `next_inputs` by embedding the `outputs`
                # TODO(lemmatizer_noattn): Define `finished` as True if `outputs` are EOW, False otherwise. [No == or !=].
                # TODO: Pass `inputs` through `self._with_attention(inputs, states)`.
                return outputs, states, next_inputs, finished

        predictions, _, _ = DecoderPrediction(maximum_iterations=tf.shape(source_charseqs)[1] + 10)([self._model, source_encoded])
        return predictions

    @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.int32)] * 4, autograph=False)
    def evaluate_batch(self, source_charseq_ids, source_charseqs, target_charseq_ids, target_charseqs):
        # Predict
        predictions = self.predict_batch(source_charseq_ids, source_charseqs)

        # Append EOW to target_charseqs and copy them to corresponding places and flatten it
        target_charseqs = self._append_eow(target_charseqs)
        targets = tf.boolean_mask(tf.gather(target_charseqs, target_charseq_ids), tf.not_equal(source_charseq_ids, 0))

        # Compute accuracy, but on the whole sequences
        mask = tf.cast(tf.not_equal(targets, 0), tf.int32)
        resized_predictions = tf.concat([predictions, tf.zeros_like(targets)], axis=1)[:, :tf.shape(targets)[1]]
        equals = tf.reduce_all(tf.equal(resized_predictions * mask, targets * mask), axis=1)
        self._metrics_evaluation["accuracy"](equals)

    def evaluate(self, dataset, dataset_name, args):
        for metric in self._metrics_evaluation.values():
            metric.reset_states()
        for batch in dataset.batches(args.batch_size):
            predictions = self.evaluate_batch(batch[dataset.FORMS].charseq_ids, batch[dataset.FORMS].charseqs,
                                              batch[dataset.LEMMAS].charseq_ids, batch[dataset.LEMMAS].charseqs)

        metrics = {name: float(metric.result()) for name, metric in self._metrics_evaluation.items()}
        with self._writer.as_default():
            for name, value in metrics.items():
                tf.summary.scalar("{}/{}".format(dataset_name, name), value)

        return metrics


if __name__ == "__main__":
    import argparse
    import datetime
    import os
    import re

    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("--batch_size", default=10, type=int, help="Batch size.")
    parser.add_argument("--cle_dim", default=64, type=int, help="CLE embedding dimension.")
    parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.")
    parser.add_argument("--max_sentences", default=5000, type=int, help="Maximum number of sentences to load.")
    parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
    parser.add_argument("--rnn_dim", default=64, type=int, help="RNN cell dimension.")
    parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
    args = parser.parse_args()

    # Fix random seeds and number of threads
    np.random.seed(42)
    tf.random.set_seed(42)
    if args.recodex:
        tf.keras.utils.get_custom_objects()["glorot_uniform"] = lambda: tf.initializers.glorot_uniform(seed=42)
        tf.keras.utils.get_custom_objects()["orthogonal"] = lambda: tf.initializers.orthogonal(seed=42)
        tf.keras.utils.get_custom_objects()["uniform"] = lambda: tf.initializers.RandomUniform(seed=42)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)

    # Create logdir name
    args.logdir = os.path.join("logs", "{}-{}-{}".format(
        os.path.basename(__file__),
        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", key), value) for key, value in sorted(vars(args).items())))
    ))

    # Load the data
    morpho = MorphoDataset("czech_cac", max_sentences=args.max_sentences)

    # Create the network and train
    network = Network(args,
                      num_source_chars=len(morpho.train.data[morpho.train.FORMS].alphabet),
                      num_target_chars=len(morpho.train.data[morpho.train.LEMMAS].alphabet))
    for epoch in range(args.epochs):
        network.train_epoch(morpho.train, args)
        metrics = network.evaluate(morpho.dev, "dev", args)
        print("Evaluation on {}, epoch {}: {}".format("dev", epoch + 1, metrics))

    metrics = network.evaluate(morpho.test, "test", args)
    with open("lemmatizer.out", "w") as out_file:
        print("{:.2f}".format(100 * metrics["accuracy"]), file=out_file)
Beispiel #39
0
    def predict_batch(self, source_charseq_ids, source_charseqs):
        # TODO(lemmatizer_noattn)(train_batch): Embed source charseqs
        # TODO(train_batch): Run self._model.source_rnn on the embedded sequences, returning outputs in `source_encoded`.

        # Copy the source_encoded to corresponding batch places, and then flatten it
        source_mask = tf.not_equal(source_charseq_ids, 0)
        source_encoded = tf.boolean_mask(tf.gather(source_encoded, source_charseq_ids), source_mask)

        class DecoderPrediction(decoder.BaseDecoder):
            @property
            def batch_size(self): raise NotImplemented() # TODO(train_batch): Return batch size of self._source_encoded, using tf.shape
            @property
            def output_size(self): raise NotImplemented() # TODO(lemmatizer_noattn): Return 1 because we are returning directly the predictions
            @property
            def output_dtype(self): return NotImplemented() # TODO(lemmatizer_noattn): Return tf.int32 because the predictions are integral

            def _with_attention(self, inputs, states):
                # TODO: A copy of _with_attention from train_batch; you can of course
                # move the definition to a place where it can be reused in both places.

            def initialize(self, layer_inputs, initial_state=None):
                self._model, self._source_encoded = layer_inputs

                # TODO(lemmatizer_noattn)(train_batch): Define `finished` as a vector of self.batch_size of `False` [see tf.fill].
                # TODO(lemmatizer_noattn)(train_batch): Define `inputs` as a vector of self.batch_size MorphoDataset.Factor.BOW [see tf.fill],
                # embedded using self._model.target_embedding
                # TODO(train_batch): Define `states` as the last words from self._source_encoded
                # TODO(train_batch): Pass `inputs` through `self._with_attention(inputs, states)`.
                return finished, inputs, states

            def step(self, time, inputs, states):
                # TODO(lemmatizer_noattn)(train_batch): Pass `inputs` and `[states]` through self._model.target_rnn_cell, generating
                # `outputs, [states]`.
                # TODO(lemmatizer_noattn)(train_batch): Overwrite `outputs` by passing them through self._model.target_output_layer,
                # TODO(lemmatizer_noattn): Overwirte `outputs` by passing them through `tf.argmax` on suitable axis and with
                # `output_type=tf.int32` parameter.
                # TODO(lemmatizer_noattn): Define `next_inputs` by embedding the `outputs`
                # TODO(lemmatizer_noattn): Define `finished` as True if `outputs` are EOW, False otherwise. [No == or !=].
                # TODO: Pass `inputs` through `self._with_attention(inputs, states)`.
                return outputs, states, next_inputs, finished

        predictions, _, _ = DecoderPrediction(maximum_iterations=tf.shape(source_charseqs)[1] + 10)([self._model, source_encoded])
        return predictions

    @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.int32)] * 4, autograph=False)
    def evaluate_batch(self, source_charseq_ids, source_charseqs, target_charseq_ids, target_charseqs):
        # Predict
        predictions = self.predict_batch(source_charseq_ids, source_charseqs)

        # Append EOW to target_charseqs and copy them to corresponding places and flatten it
        target_charseqs = self._append_eow(target_charseqs)
        targets = tf.boolean_mask(tf.gather(target_charseqs, target_charseq_ids), tf.not_equal(source_charseq_ids, 0))

        # Compute accuracy, but on the whole sequences
        mask = tf.cast(tf.not_equal(targets, 0), tf.int32)
        resized_predictions = tf.concat([predictions, tf.zeros_like(targets)], axis=1)[:, :tf.shape(targets)[1]]
        equals = tf.reduce_all(tf.equal(resized_predictions * mask, targets * mask), axis=1)
        self._metrics_evaluation["accuracy"](equals)

    def evaluate(self, dataset, dataset_name, args):
        for metric in self._metrics_evaluation.values():
            metric.reset_states()
        for batch in dataset.batches(args.batch_size):
            predictions = self.evaluate_batch(batch[dataset.FORMS].charseq_ids, batch[dataset.FORMS].charseqs,
                                              batch[dataset.LEMMAS].charseq_ids, batch[dataset.LEMMAS].charseqs)

        metrics = {name: float(metric.result()) for name, metric in self._metrics_evaluation.items()}
        with self._writer.as_default():
            for name, value in metrics.items():
                tf.summary.scalar("{}/{}".format(dataset_name, name), value)

        return metrics
Beispiel #40
0
'''
#1.where只接受一个参数
'''
    where   只接受一个tensor,就是bool矩阵
    a [[true ,false,false]
       [false,true ,false]
       [false,false,true ]]
    a.shape->[3,3]
    where(a),会返回true的坐标
    以左上角第一个元素作为坐标原点
    所以第一个true的坐标为(0,0),第二个(1,1),第3个(2,2)
    
'''
a = tf.random.normal([3, 3])
mask = a > 0  #大于0的值变为true,小于0的变为false。mask与a的shape相同
tf.boolean_mask(a, mask)  #取出a的值,该值在mask中的对应位置为true

indices = tf.where(mask)  #取出mask中为true的位置的坐标
tf.gather_nd(a, indices)  #取出对应坐标的值

#2.where接受3个参数
#where(cond,a,b)
'''
    根据cond矩阵中的True来选择对应位置的A矩阵中的元素,
    False来选择对应位置的B矩阵中的元素,组建一个新的矩阵
    作用:有目的性的对a、b矩阵的值进行筛选
'''
#例
a = tf.ones([3, 3])
b = tf.zeros([3, 3])
c = tf.constant([True, True, False], [True, False, False],
Beispiel #41
0
    def _build_policy_loss(self, i):
        """Build policy loss and other output tensors.

        Args:
            i (namedtuple): Collection of variables to compute policy loss.

        Returns:
            tf.Tensor: Policy loss.
            tf.Tensor: Mean policy KL divergence.

        Raises:
            NotImplementedError: If is_recurrent is True.

        """
        pol_dist = self._policy_network.dist
        old_pol_dist = self._old_policy_network.dist

        # Initialize dual params
        self._param_eta = 15.
        self._param_v = np.random.rand(
            self._env_spec.observation_space.flat_dim * 2 + 4)

        with tf.name_scope('bellman_error'):
            delta_v = tf.boolean_mask(i.reward_var,
                                      i.valid_var) + tf.tensordot(
                                          i.feat_diff, i.param_v, 1)

        with tf.name_scope('policy_loss'):
            ll = pol_dist.log_prob(i.action_var)
            ll = tf.boolean_mask(ll, i.valid_var)
            loss = -tf.reduce_mean(
                ll * tf.exp(delta_v / i.param_eta -
                            tf.reduce_max(delta_v / i.param_eta)))

            reg_params = self.policy.get_regularizable_vars()
            loss += self._l2_reg_loss * tf.reduce_sum(
                [tf.reduce_mean(tf.square(param))
                 for param in reg_params]) / len(reg_params)

        with tf.name_scope('kl'):
            kl = old_pol_dist.kl_divergence(pol_dist)
            pol_mean_kl = tf.reduce_mean(kl)

        with tf.name_scope('dual'):
            dual_loss = i.param_eta * self._epsilon + (
                i.param_eta * tf.math.log(
                    tf.reduce_mean(
                        tf.exp(delta_v / i.param_eta -
                               tf.reduce_max(delta_v / i.param_eta)))) +
                i.param_eta * tf.reduce_max(delta_v / i.param_eta))

            dual_loss += self._l2_reg_dual * (tf.square(i.param_eta) +
                                              tf.square(1 / i.param_eta))

            dual_grad = tf.gradients(dual_loss, [i.param_eta, i.param_v])

        # yapf: disable
        self._f_dual = tensor_utils.compile_function(
            flatten_inputs(self._dual_opt_inputs),
            dual_loss,
            log_name='f_dual')
        # yapf: enable

        self._f_dual_grad = tensor_utils.compile_function(
            flatten_inputs(self._dual_opt_inputs),
            dual_grad,
            log_name='f_dual_grad')

        self._f_policy_kl = tensor_utils.compile_function(
            flatten_inputs(self._policy_opt_inputs),
            pol_mean_kl,
            log_name='f_policy_kl')

        return loss
Beispiel #42
0
    def _build_policy_loss(self, i):
        """Build policy loss and other output tensors.

        Args:
            i (namedtuple): Collection of variables to compute policy loss.

        Returns:
            tf.Tensor: Policy loss.
            tf.Tensor: Mean policy KL divergence.

        """
        # pylint: disable=too-many-statements
        self._policy_network, self._encoder_network = (self.policy.build(
            i.augmented_obs_var, i.task_var, name='loss_policy'))
        self._old_policy_network, self._old_encoder_network = (
            self._old_policy.build(i.augmented_obs_var,
                                   i.task_var,
                                   name='loss_old_policy'))
        self._infer_network = self._inference.build(i.augmented_traj_var,
                                                    name='loss_infer')
        self._old_infer_network = self._old_inference.build(
            i.augmented_traj_var, name='loss_old_infer')

        pol_dist = self._policy_network.dist
        old_pol_dist = self._old_policy_network.dist

        # Entropy terms
        encoder_entropy, inference_ce, policy_entropy = (
            self._build_entropy_terms(i))

        # Augment the path rewards with entropy terms
        with tf.name_scope('augmented_rewards'):
            rewards = (i.reward_var -
                       (self.inference_ce_coeff * inference_ce) +
                       (self._policy_ent_coeff * policy_entropy))

        with tf.name_scope('policy_loss'):
            with tf.name_scope('advantages'):
                adv = compute_advantages(self._discount,
                                         self._gae_lambda,
                                         self.max_episode_length,
                                         i.baseline_var,
                                         rewards,
                                         name='advantages')
                adv = tf.reshape(adv, [-1, self.max_episode_length])

            # Optionally normalize advantages
            eps = tf.constant(1e-8, dtype=tf.float32)
            if self._center_adv:
                adv = center_advs(adv, axes=[0], eps=eps)

            if self._positive_adv:
                adv = positive_advs(adv, eps)

            # Calculate loss function and KL divergence
            with tf.name_scope('kl'):
                kl = old_pol_dist.kl_divergence(pol_dist)
                pol_mean_kl = tf.reduce_mean(kl)

            ll = pol_dist.log_prob(i.action_var, name='log_likelihood')

            # Calculate surrogate loss
            with tf.name_scope('surr_loss'):
                old_ll = old_pol_dist.log_prob(i.action_var)
                old_ll = tf.stop_gradient(old_ll)
                # Clip early to avoid overflow
                lr = tf.exp(
                    tf.minimum(ll - old_ll, np.log(1 + self._lr_clip_range)))

                surrogate = lr * adv

                surrogate = tf.debugging.check_numerics(surrogate,
                                                        message='surrogate')

            # Finalize objective function
            with tf.name_scope('loss'):
                lr_clip = tf.clip_by_value(lr,
                                           1 - self._lr_clip_range,
                                           1 + self._lr_clip_range,
                                           name='lr_clip')
                surr_clip = lr_clip * adv
                obj = tf.minimum(surrogate, surr_clip, name='surr_obj')
                obj = tf.boolean_mask(obj, i.valid_var)
                # Maximize E[surrogate objective] by minimizing
                # -E_t[surrogate objective]
                loss = -tf.reduce_mean(obj)

                # Encoder entropy bonus
                loss -= self.encoder_ent_coeff * encoder_entropy

            encoder_mean_kl = self._build_encoder_kl()

            # Diagnostic functions
            self._f_policy_kl = tf.compat.v1.get_default_session(
            ).make_callable(pol_mean_kl,
                            feed_list=flatten_inputs(self._policy_opt_inputs))

            self._f_rewards = tf.compat.v1.get_default_session().make_callable(
                rewards, feed_list=flatten_inputs(self._policy_opt_inputs))

            returns = discounted_returns(self._discount,
                                         self.max_episode_length,
                                         rewards,
                                         name='returns')
            self._f_returns = tf.compat.v1.get_default_session().make_callable(
                returns, feed_list=flatten_inputs(self._policy_opt_inputs))

        return loss, pol_mean_kl, encoder_mean_kl
Beispiel #43
0
def metric_c(y_true, y_pred):
    classes = tf.argmax(y_true, axis=0)
    class_true = tf.boolean_mask(y_true, tf.equal(classes, 2))
    class_pred = tf.boolean_mask(y_pred, tf.equal(classes, 2))
    return tf.keras.metrics.categorical_accuracy(class_true, class_pred)
Beispiel #44
0
    def make_model(self):
        #TODO: refactor
        if self.args['--pr'] == 'molecule':
            self.placeholders['target_values'] = tf.compat.v1.placeholder(
                tf.float32, [len(self.params['task_ids']), None],
                name='target_values')
            self.placeholders['target_mask'] = tf.compat.v1.placeholder(
                tf.float32, [len(self.params['task_ids']), None],
                name='target_mask')
        elif self.args['--pr'] in ['identity']:
            self.placeholders['target_values'] = tf.compat.v1.placeholder(
                tf.float32, [None, None, self.num_edge_types, None],
                name='target_values')
            self.placeholders['target_mask'] = tf.compat.v1.placeholder(
                tf.float32, [self.num_edge_types, None], name='target_mask')
        elif self.args['--pr'] in ['btb']:
            self.placeholders['target_values_head'] = tf.compat.v1.placeholder(
                tf.float32, [None, None], name='target_values')
            self.placeholders['target_mask'] = tf.compat.v1.placeholder(
                tf.float32, [self.output_size_edges, None], name='target_mask')
            self.placeholders[
                'target_values_edges'] = tf.compat.v1.placeholder(
                    tf.float32, [None, None], name='target_values')

        else:
            self.placeholders['target_values'] = tf.compat.v1.placeholder(
                tf.float32, [None, len(self.params['task_ids']), None],
                name='target_values')
            self.placeholders['target_mask'] = tf.compat.v1.placeholder(
                tf.float32, [len(self.params['task_ids']), None],
                name='target_mask')
        self.placeholders['num_graphs'] = tf.compat.v1.placeholder(
            tf.int32, [], name='num_graphs')
        self.placeholders[
            'out_layer_dropout_keep_prob'] = tf.compat.v1.placeholder(
                tf.float32, [], name='out_layer_dropout_keep_prob')

        with tf.compat.v1.variable_scope("graph_model"):
            self.prepare_specific_graph_model()
            # This does the actual graph work:
            self.ops[
                'initial_node_representations'] = self.get_initial_node_representation(
                )
            if self.params['use_graph']:
                self.ops[
                    'final_node_representations'] = self.compute_final_node_representations(
                        self.ops['initial_node_representations'])
                self.ops[
                    'second_node_representations'] = self.compute_final_node_representations(
                        self.ops['initial_node_representations'], 1)
            else:
                self.ops['final_node_representations'] = tf.zeros_like(
                    self.placeholders['initial_node_representation'])

        self.ops['losses'] = []
        self.ops['losses_edges'] = []
        for (internal_id, task_id) in enumerate(self.params['task_ids']):
            with tf.compat.v1.variable_scope("out_layer_task%i" % task_id):
                output_size = self.params['output_size']
                hidden = []
                with tf.compat.v1.variable_scope("regression_gate"):
                    self.weights['regression_gate_task%i' % task_id] = MLP(
                        2 * self.params['hidden_size'], output_size, hidden,
                        self.placeholders['out_layer_dropout_keep_prob'])
                    self.weights[
                        'regression_gate_task_edges%i' % task_id] = MLP(
                            2 * self.params['hidden_size'],
                            self.output_size_edges, [],
                            self.placeholders['out_layer_dropout_keep_prob'])
                with tf.compat.v1.variable_scope("regression"):
                    self.weights[
                        'regression_transform_task%i' % task_id] = MLP(
                            self.params['hidden_size'], output_size, [],
                            self.placeholders['out_layer_dropout_keep_prob'])
                    self.weights[
                        'regression_transform_task_edges%i' % task_id] = MLP(
                            self.params['hidden_size'], self.output_size_edges,
                            [],
                            self.placeholders['out_layer_dropout_keep_prob'])

                computed_values = self.gated_regression(
                    self.ops['final_node_representations'],
                    self.ops['initial_node_representations'],
                    self.weights['regression_gate_task%i' % task_id],
                    self.weights['regression_transform_task%i' % task_id],
                    None)
                # BTB [b, v * o] ID [e * v * o,  b]  o is 1 for BTB
                if self.args['--pr'] in ['btb']:
                    computed_values_edges = self.gated_regression(
                        self.ops['final_node_representations'],
                        self.ops['initial_node_representations'],
                        self.weights['regression_gate_task_edges%i' % task_id],
                        self.weights['regression_transform_task_edges%i' %
                                     task_id],
                        None,
                        is_edge_regr=True)
                    # [b, v * e]

                task_target_mask = self.placeholders['target_mask'][
                    internal_id, :]
                # ID [b] else: [b]
                task_target_num = tf.reduce_sum(
                    input_tensor=task_target_mask) + SMALL_NUMBER
                # ID and else: b
                if self.args['--pr'] == 'molecule':
                    labels = self.placeholders['target_values'][internal_id, :]
                    mask = tf.transpose(a=self.placeholders['node_mask'])
                elif self.args['--pr'] in ['identity']:
                    labels = self.placeholders['target_values']  # [o, v, e, b]
                    labels = tf.transpose(a=labels, perm=[2, 1, 0,
                                                          3])  # [e, v, o, b]
                    labels = tf.reshape(labels,
                                        [-1, self.placeholders['num_graphs']
                                         ])  # [e * v * o, b]
                    # node_mask ID [b, e * v * o]
                    mask = tf.transpose(
                        a=self.placeholders['node_mask'])  # [e * v * o,b]
                    # ID: [e * v * o,b]
                elif self.args['--pr'] in ['btb']:
                    labels = self.placeholders[
                        'target_values_head']  # [b, v * o]
                    mask = self.placeholders['node_mask']  #[b, v * o]
                    labels_edges = self.placeholders[
                        'target_values_edges']  # [b, v * e]
                    mask_edges = self.placeholders[
                        'node_mask_edges']  # [b, v * e]
                else:
                    labels = self.placeholders['target_values'][:,
                                                                internal_id, :]
                    mask = tf.transpose(a=self.placeholders['node_mask'])
                # diff = computed_values - labels
                # diff = diff * task_target_mask  # Mask out unused values
                # self.ops['accuracy_task%i' % task_id] = tf.reduce_sum(tf.abs(diff)) / task_target_num
                # task_loss = tf.reduce_sum(0.5 * tf.square(diff)) / task_target_num
                # # Normalise loss to account for fewer task-specific examples in batch:
                # task_loss = task_loss * (1.0 / (self.params['task_sample_ratios'].get(task_id) or 1.0))

                # diff =  tf.math.argmax(computed_values, axis = 1) - tf.math.argmax(self.placeholders['target_values'][internal_id, :], axis = 1)
                # diff = tf.dtypes.cast(diff, tf.float32)
                #TODO: FIX THIS

                # computed_values *= task_target_mask
                # we need to redo accuracy
                # diff = tf.nn.softmax_cross_entropy_with_logits(labels=labels,
                #                                                logits=computed_values)
                # task_loss = diff
                if self.args['--pr'] == 'molecule':
                    self.calculate_losses_for_molecules(
                        computed_values, internal_id, task_id)
                else:
                    if self.args['--pr'] == 'btb':
                        task_loss_heads = tf.reduce_sum(-tf.reduce_sum(
                            labels * tf.math.log(computed_values), axis=1)
                                                        ) / task_target_num
                        task_loss_edges = tf.reduce_sum(-tf.reduce_sum(
                            labels_edges * tf.math.log(computed_values_edges),
                            axis=1)) / task_target_num
                        # task_loss = (task_loss_heads + task_loss_edges) * tf.cast(self.placeholders['num_vertices'], tf.float32)
                        task_loss = (task_loss_heads + task_loss_edges)
                    else:
                        if self.args.get('--no_labels'):
                            computed_values, labels, mask = self.reduce_edge_dimension(
                                computed_values=computed_values,
                                labels=labels,
                                mask=mask)
                        new_mask = tf.cast(mask, tf.bool)
                        masked_loss = tf.boolean_mask(
                            tensor=labels * tf.math.log(computed_values),
                            mask=new_mask)
                        task_loss = tf.reduce_sum(
                            input_tensor=-1 * masked_loss) / task_target_num
                    self.ops['accuracy_task%i' % task_id] = task_loss
                    self.ops['losses'].append(task_loss)
                    self.ops['losses_edges'].append(task_loss_edges)
                    self.ops['computed_values'] = computed_values
                    self.ops['computed_values_edges'] = computed_values_edges
                    self.ops['labels'] = labels
                    self.ops['node_mask'] = tf.transpose(
                        mask) if self.args['--pr'] != 'btb' else mask
                    self.ops['task_target_mask'] = task_target_mask

        self.ops['loss'] = tf.reduce_sum(input_tensor=self.ops['losses'])
        self.ops['loss_edges'] = tf.reduce_sum(
            input_tensor=self.ops['losses_edges'])
    def _build_single_target(self, proposals, gt_boxes, gt_class_ids, img_shape):
        '''
        Args
        ---
            proposals: [num_proposals, (y1, x1, y2, x2)] in regular coordinates.
            gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)]
            gt_class_ids: [num_gt_boxes]
            img_shape: np.ndarray. [2]. (img_height, img_width)
            
        Returns
        ---
            rois: [num_rois, (y1, x1, y2, x2)]
            target_matchs: [num_positive_rois]
            target_deltas: [num_positive_rois, (dy, dx, log(dh), log(dw))]
        '''
        # remove padded proposals and gt boxes if any
        proposals, _ = trim_zeros(proposals)
        gt_boxes, non_zeros = trim_zeros(gt_boxes)
        gt_boxes = tf.cast(gt_boxes, proposals.dtype)
        gt_labels = tf.boolean_mask(gt_class_ids, non_zeros)
        noise_mean = 5.0
        noisy_gt_boxes = tf.add(gt_boxes, 
                                tf.random.truncated_normal(tf.shape(gt_boxes), noise_mean, 0.1, dtype=proposals.dtype))
        proposals_gt = tf.concat([proposals, noisy_gt_boxes], axis=0)


        iou = geometry.compute_overlaps(proposals_gt, gt_boxes)  # [rois_size, gt_bboxes_size]
        max_overlaps = tf.reduce_max(iou, axis=1)  # [rois_size, ]
        gt_assignment = tf.argmax(iou, axis=1)  # [rois_size, ]
        labels = tf.gather(gt_labels, gt_assignment)  # [rois_size, ]

        # get FG and BG
        fg_inds = tf.where(max_overlaps >= self.pos_iou_thr)[:, 0]
        bg_inds = tf.where(tf.logical_and(max_overlaps < self.pos_iou_thr,
                                          max_overlaps >= self.neg_iou_thr))[:, 0]

        # filter FG/BG
        if tf.size(fg_inds) > self._max_pos_samples:
            fg_inds = tf.random.shuffle(fg_inds)[:self._max_pos_samples]
        remaining = self.num_rcnn_deltas - tf.size(fg_inds)
        num_bg = tf.size(bg_inds)
        if tf.greater_equal(num_bg, remaining):
            bg_inds = tf.random.shuffle(bg_inds)[:remaining]
        else:
            # sample with replacement from very poor overlaps if number of backgrounds is not enough
            bg_inds = tf.where(max_overlaps < self.pos_iou_thr)[:, 0]
            bg_inds = tf.random.shuffle(bg_inds)[:remaining]
            num_bg = tf.size(bg_inds)
            while remaining > num_bg:
                dups = remaining - num_bg
                dup_bgs = tf.random.shuffle(bg_inds)[:dups]
                bg_inds = tf.concat([bg_inds, dup_bgs], axis=0)
                num_bg = tf.size(bg_inds)

        # tf.print('proposal target generated %d fgs and %d bgs.' % (tf.size(fg_inds), tf.size(bg_inds)))

        keep_inds = tf.concat([fg_inds, bg_inds], axis=0)
        final_rois = tf.gather(proposals_gt, keep_inds)  # rois[keep_inds]
        final_labels = tf.gather(labels, keep_inds)  # labels[keep_inds]
        zero_indices = tf.expand_dims(tf.range(tf.size(fg_inds), tf.size(keep_inds), dtype=tf.int32), axis=1)
        zero_labels = tf.zeros(tf.shape(zero_indices)[0], dtype=tf.int32)
        final_labels = tf.tensor_scatter_nd_update(final_labels, zero_indices, zero_labels)

        # inside weights - positive examples are set, rest are zeros
        bbox_inside_weights = tf.zeros((tf.size(keep_inds), self.num_classes, 4), dtype=tf.float32)
        if tf.size(fg_inds) > 0:
            if self.reg_class_agnostic:
                cur_index = tf.transpose(tf.stack([tf.range(tf.size(fg_inds)), tf.zeros(tf.size(fg_inds), dtype=tf.int32)]))
            else:
                cur_index = tf.stack([tf.range(tf.size(fg_inds)), tf.gather(labels, fg_inds)], axis=1)
            bbox_inside_weights = tf.tensor_scatter_nd_update(bbox_inside_weights,
                                                       cur_index,
                                                       tf.ones([tf.size(fg_inds), 4], bbox_inside_weights.dtype))
        bbox_inside_weights = tf.reshape(bbox_inside_weights, [-1, self.num_classes * 4])

        final_bbox_targets = tf.zeros((tf.size(keep_inds), self.num_classes, 4), dtype=tf.float32)
        if tf.size(fg_inds) > 0:

            bbox_targets = transforms.bbox2delta(
                tf.gather(final_rois, tf.range(tf.size(fg_inds))),
                tf.gather(gt_boxes, tf.gather(gt_assignment, fg_inds)),
                target_stds=self.target_stds, target_means=self.target_means)
            if self.reg_class_agnostic:
                final_bbox_targets = tf.tensor_scatter_nd_update(
                                        final_bbox_targets,
                                        tf.transpose(tf.stack([tf.range(tf.size(fg_inds)),
                                        tf.zeros(tf.size(fg_inds), dtype=tf.int32)])),
                                        bbox_targets)
            else:
                final_bbox_targets = tf.tensor_scatter_nd_update(
                                        final_bbox_targets,
                                        tf.stack([tf.range(tf.size(fg_inds)),
                                        tf.gather(labels, fg_inds)], axis=1), bbox_targets)
        final_bbox_targets = tf.reshape(final_bbox_targets, [-1, self.num_classes * 4])

        bbox_outside_weights = tf.ones_like(bbox_inside_weights, dtype=bbox_inside_weights.dtype) * 1.0 / self.num_rcnn_deltas
        fg_assignments = tf.gather(gt_assignment, keep_inds)
        return (tf.stop_gradient(final_rois), tf.stop_gradient(final_labels), tf.stop_gradient(final_bbox_targets),
               tf.stop_gradient(bbox_inside_weights), tf.stop_gradient(bbox_outside_weights), tf.stop_gradient(fg_assignments))
Beispiel #46
0
    def _get_cost(self, logits, cost_name, cost_kwargs={}):
        """
        Constructs the cost function, either cross_entropy, weighted cross_entropy or dice_coefficient.
        Optional arguments are:
        class_weights: weights for the different classes in case of multi-class imbalance
        regularizer: power of the L2 regularizers added to the loss function
        """

        flat_logits = tf.reshape(logits, [-1, self.n_class])
        flat_labels = tf.reshape(self.y, [-1, self.n_class])
        if cost_name == "cross_entropy":

            if "class_weights" in cost_kwargs:
                class_weights = tf.constant(
                    np.array(cost_kwargs["class_weights"], dtype=np.float32))

                weight_map = tf.multiply(flat_labels, class_weights)
                weight_map = tf.reduce_sum(weight_map, axis=1)

                loss_map = tf.nn.softmax_cross_entropy_with_logits(
                    logits=flat_logits, labels=flat_labels)
                weighted_loss = tf.multiply(loss_map, weight_map)

                loss = tf.reduce_mean(weighted_loss)

            else:
                loss = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(
                        logits=flat_logits, labels=flat_labels))
        # elif cost_name == "dice_coefficient":
        #     eps = 1e-5
        #     prediction = pixel_wise_softmax_2(logits)
        #     intersection = tf.reduce_sum(prediction * self.y, axis=[0, 1, 2])
        #     union = eps + tf.reduce_sum(prediction, axis=[0, 1, 2]) + tf.reduce_sum(self.y, axis=[0, 1, 2]) - intersection
        #     loss = tf.reduce_sum(-(2 * intersection / (union)))

        elif cost_name == "liver_dice":
            eps = 1e-5
            prediction = tf.argmax(pixel_wise_softmax_2(logits), axis=3)
            gt = tf.argmax(self.y, axis=3)

            prediction_b = prediction > 0
            gt_b = gt > 0

            intersection = tf.to_float(tf.count_nonzero(prediction_b & gt_b))
            size_pred = tf.to_float(tf.count_nonzero(prediction_b))
            size_gt = tf.to_float(tf.count_nonzero(gt_b))

            loss = -(2. * intersection / (size_pred + size_gt + eps))

        elif cost_name == "tumor_dice":
            eps = 1e-5
            prediction = tf.argmax(pixel_wise_softmax_2(logits), axis=3)
            gt = tf.argmax(self.y, axis=3)

            prediction_b = prediction > 1
            gt_b = gt > 1

            intersection = tf.to_float(tf.count_nonzero(prediction_b & gt_b))
            size_pred = tf.to_float(tf.count_nonzero(prediction_b))
            size_gt = tf.to_float(tf.count_nonzero(gt_b))

            loss = -(2. * intersection / (size_pred + size_gt + eps))

        elif cost_name == "avg_class_ce":
            if "class_weights" in cost_kwargs:
                class_weights = cost_kwargs["class_weights"]
            else:
                class_weights = np.ones(self.n_class)
            class_weights = tf.constant(
                np.array(class_weights, dtype=np.float32))
            # class_weights = tf.Print(class_weights, [class_weights], 'Class weigihts:')

            weight_map = tf.multiply(flat_labels, class_weights)
            loss_map = tf.nn.softmax_cross_entropy_with_logits(
                logits=flat_logits, labels=flat_labels)
            loss_map = tf.tile(tf.expand_dims(loss_map, 1), [1, self.n_class])
            # both are npixel x n_class

            weighted_loss = tf.multiply(loss_map, weight_map)
            loss_sum_per_class = tf.reduce_sum(weighted_loss, axis=0)
            # loss_sum_per_class = tf.Print(loss_sum_per_class, [loss_sum_per_class], 'Sum of loss per class:')

            px_per_class = tf.reduce_sum(flat_labels, axis=0)
            # px_per_class = tf.Print(px_per_class, [px_per_class], 'Pixels per class:')
            include_class = tf.not_equal(px_per_class, 0)
            loss_sum_per_class_valid = tf.boolean_mask(loss_sum_per_class,
                                                       include_class)
            px_per_class_valid = tf.boolean_mask(px_per_class, include_class)

            loss_per_class = tf.divide(loss_sum_per_class_valid,
                                       px_per_class_valid)
            # loss_per_class = tf.Print(loss_per_class, [loss_per_class], 'Mean loss per class:')
            loss = tf.reduce_mean(loss_per_class)
            # loss = tf.Print(loss, [loss], "Loss:")

        elif cost_name == "avg_class_ce_binary":
            """Only care whether it's tumor or not"""
            if "class_weights" in cost_kwargs:
                class_weights = cost_kwargs["class_weights"]
            else:
                class_weights = np.ones(self.n_class - 1)
            class_weights = tf.constant(
                np.array(class_weights, dtype=np.float32))

            combined_labels = tf.stack(
                [flat_labels[:, 0] + flat_labels[:, 1], flat_labels[:, 2]],
                axis=1)
            combined_logits = tf.stack([
                tf.log(tf.exp(flat_logits[:, 0]) + tf.exp(flat_logits[:, 1])),
                flat_logits[:, 2]
            ],
                                       axis=1)

            weight_map = tf.multiply(combined_labels, class_weights)
            loss_map = tf.nn.softmax_cross_entropy_with_logits(
                logits=combined_logits, labels=combined_labels)
            loss_map = tf.tile(tf.expand_dims(loss_map, 1),
                               [1, self.n_class - 1])
            # both are npixel x n_class

            weighted_loss = tf.multiply(loss_map, weight_map)
            loss_sum_per_class = tf.reduce_sum(weighted_loss, axis=0)
            # loss_sum_per_class = tf.Print(loss_sum_per_class, [loss_sum_per_class], 'Sum of loss per class:')

            px_per_class = tf.reduce_sum(combined_labels, axis=0)
            # px_per_class = tf.Print(px_per_class, [px_per_class], 'Pixels per class:')
            include_class = tf.not_equal(px_per_class, 0)
            loss_sum_per_class_valid = tf.boolean_mask(loss_sum_per_class,
                                                       include_class)
            px_per_class_valid = tf.boolean_mask(px_per_class, include_class)

            loss_per_class = tf.divide(loss_sum_per_class_valid,
                                       px_per_class_valid)
            # loss_per_class = tf.Print(loss_per_class, [loss_per_class], 'Mean loss per class:')
            loss = tf.reduce_mean(loss_per_class)
            # loss = tf.Print(loss, [loss], "Loss:")

        elif cost_name == "avg_class_ce_symmetric":
            prediction = pixel_wise_softmax_2(logits)
            flat_prediction = tf.reshape(prediction, [-1, self.n_class])

            if "class_weights" in cost_kwargs:
                class_weights = cost_kwargs["class_weights"]
            else:
                class_weights = np.ones(self.n_class)
            class_weights = tf.constant(
                np.array(class_weights, dtype=np.float32))

            weight_map = tf.multiply(flat_labels,
                                     class_weights) + 0.1 * tf.multiply(
                                         flat_prediction, class_weights)
            loss_map = tf.nn.softmax_cross_entropy_with_logits(
                logits=flat_logits, labels=flat_labels)
            loss_map = tf.tile(tf.expand_dims(loss_map, 1), [1, self.n_class])
            # both are npixel x n_class

            weighted_loss = tf.multiply(loss_map, weight_map)
            loss_sum_per_class = tf.reduce_sum(weighted_loss, axis=0)

            px_per_class = tf.reduce_sum(
                flat_labels,
                axis=0) + 0.1 * tf.reduce_sum(flat_prediction, axis=0)
            include_class = tf.not_equal(px_per_class, 0)
            loss_sum_per_class_valid = tf.boolean_mask(loss_sum_per_class,
                                                       include_class)
            px_per_class_valid = tf.boolean_mask(px_per_class, include_class)

            loss_per_class = tf.divide(loss_sum_per_class_valid,
                                       px_per_class_valid)
            loss = tf.reduce_mean(loss_per_class)

        else:
            raise ValueError("Unknown cost function: " % cost_name)

        if "regularizer" in cost_kwargs:
            regularizer = cost_kwargs["regularizer"]
            regularizers = sum(
                [tf.nn.l2_loss(variable) for variable in self.variables])
            loss += (regularizer * regularizers)

        return loss
Beispiel #47
0
def discriminative_loss_single(prediction, correct_label, feature_dim, label_shape, 
							delta_v, delta_d, param_var, param_dist, param_reg):
	
	''' Discriminative loss for a single prediction/label pair.
	:param prediction: inference of network
	:param correct_label: instance label
	:feature_dim: feature dimension of prediction
	:param label_shape: shape of label
	:param delta_v: cutoff variance distance
	:param delta_d: curoff cluster distance
	:param param_var: weight for intra cluster variance
	:param param_dist: weight for inter cluster distances
	:param param_reg: weight regularization
	'''

	### Reshape so pixels are aligned along a vector
	correct_label = tf.reshape(correct_label, [label_shape[1]*label_shape[0]])
	reshaped_pred = tf.reshape(prediction, [label_shape[1]*label_shape[0], feature_dim])

	### Count instances
	unique_labels, unique_id, counts = tf.unique_with_counts(correct_label)
	counts = tf.cast(counts, tf.float32)
	num_instances = tf.size(unique_labels)

	segmented_sum = tf.unsorted_segment_sum(reshaped_pred, unique_id, num_instances)

	mu = tf.div(segmented_sum, tf.reshape(counts, (-1, 1)))
	mu_expand = tf.gather(mu, unique_id)

	### Calculate l_var
	distance = tf_norm(tf.subtract(mu_expand, reshaped_pred), axis=1)
	distance = tf.subtract(distance, delta_v)
	distance = tf.clip_by_value(distance, 0., distance)
	distance = tf.square(distance)

	l_var = tf.unsorted_segment_sum(distance, unique_id, num_instances)
	l_var = tf.div(l_var, counts)
	l_var = tf.reduce_sum(l_var)
	l_var = tf.divide(l_var, tf.cast(num_instances, tf.float32))
	
	### Calculate l_dist
	
	# Get distance for each pair of clusters like this:
	#   mu_1 - mu_1
	#   mu_2 - mu_1
	#   mu_3 - mu_1
	#   mu_1 - mu_2
	#   mu_2 - mu_2
	#   mu_3 - mu_2
	#   mu_1 - mu_3
	#   mu_2 - mu_3
	#   mu_3 - mu_3

	mu_interleaved_rep = tf.tile(mu, [num_instances, 1])
	mu_band_rep = tf.tile(mu, [1, num_instances])
	mu_band_rep = tf.reshape(mu_band_rep, (num_instances*num_instances, feature_dim))

	mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep)
	
	# Filter out zeros from same cluster subtraction
	intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff),axis=1)
	zero_vector = tf.zeros(1, dtype=tf.float32)
	bool_mask = tf.not_equal(intermediate_tensor, zero_vector)
	mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask)

	mu_norm = tf_norm(mu_diff_bool, axis=1)
	mu_norm = tf.subtract(2.*delta_d, mu_norm)
	mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm)
	mu_norm = tf.square(mu_norm)

	l_dist = tf.reduce_mean(mu_norm)

	### Calculate l_reg
	l_reg = tf.reduce_mean(tf_norm(mu, axis=1))

	param_scale = 1.
	l_var = param_var * l_var
	l_dist = param_dist * l_dist
	l_reg = param_reg * l_reg

	loss = param_scale*(l_var + l_dist + l_reg)
	
	return loss, l_var, l_dist, l_reg
Beispiel #48
0
    def loss_op(self):
        with tf.variable_scope("loss"):
            # [batch_size * seq_length, 1]
            neg_labels = tf.reshape(self.music_id, [-1, 1])
            # [batch_size * seq_length, hidden_dim]
            neg_inputs = tf.reshape(self.transformer_out,
                                    [-1, self.hidden_dim])

            nce_weights = tf.get_variable(
                name='nce_weights',
                initializer=tf.truncated_normal(
                    [self.music_num, self.hidden_dim],
                    stddev=1.0 / math.sqrt(self.hidden_dim)))
            nce_biases = tf.get_variable(name='nce_biases',
                                         initializer=tf.zeros([self.music_num
                                                               ]))
            mask = tf.sequence_mask(self.lengths)

            # if tf.equal(self.is_train, 1) is not None:
            #     print("房雨帆")
            # 没有mask [batch_size * seq_length]
            loss = tf.nn.nce_loss(weights=nce_weights,
                                  biases=nce_biases,
                                  labels=neg_labels,
                                  inputs=neg_inputs,
                                  num_sampled=self.neg_num,
                                  num_classes=self.music_num,
                                  remove_accidental_hits=True)
            loss = tf.reshape(loss, [-1, self.sequence_length])
            self.loss = tf.reduce_mean(tf.boolean_mask(loss, mask))

            # else:
            #     print("孙香")

            logits = tf.matmul(neg_inputs, tf.transpose(nce_weights))
            # [batch_size*seq_length, music_num]
            self.logits = tf.nn.bias_add(logits, nce_biases)

            # 矩阵分块相乘,处理程序瓶颈
            # [hidden_dim, music_num]
            # nce_weights_t = tf.transpose(nce_weights)
            # part_len = int(self.music_num // 32)
            # part_value = []
            # part_index = []
            # for i in range(31):
            #     part = nce_weights_t[:, i*part_len:(i+1)*part_len]
            #     res = tf.nn.bias_add(tf.matmul(neg_inputs, part),
            #                          nce_biases[i*part_len:(i+1)*part_len])
            #     res_k = tf.nn.top_k(res, self.top_k)
            #     part_index.append(res_k[1])
            #     part_value.append(res_k[0])
            # # if self.music_num % 32 > 0:
            # part = nce_weights_t[:, 31*part_len:]
            # res = tf.nn.bias_add(tf.matmul(neg_inputs, part),
            #                      nce_biases[31*part_len:])
            # res_k = tf.nn.top_k(res, self.top_k)
            # part_index.append(res_k[1])
            # part_value.append(res_k[0])
            # self.index = tf.concat(part_index, -1)
            # self.value = tf.concat(part_value, -1)

            # self.logits = tf.nn.bias_add(tf.concat(part_res, -1), nce_biases)

            # [batch_size * seq_length, music_num]
            # labels_one_hot = tf.one_hot(neg_labels, self.music_num)
            # labels_one_hot = tf.reshape(labels_one_hot, [-1, self.music_num])
            # # [batch_size * seq_length]
            # loss = tf.nn.sigmoid_cross_entropy_with_logits(
            #     labels=labels_one_hot,
            #     logits=logits)
            # loss = tf.reshape(loss, [-1, self.sequence_length])
            # self.loss_test = tf.reduce_mean(tf.boolean_mask(loss, mask))
            self.loss_test = tf.constant(0, dtype=tf.float32, shape=[])
            # [batch_size * seq_length]
            neg_labels = tf.reshape(self.music_id, [-1])
            # [batch_size * seq_length]
            hit = tf.nn.in_top_k(self.logits, neg_labels, self.top_k)
            hit = tf.reshape(hit, [-1, self.sequence_length])
            mask_hit = tf.boolean_mask(hit, mask)
            self.hit_shape = tf.shape(mask_hit)
            self.recall = tf.reduce_mean(tf.to_float(mask_hit))

            # [batch_size*seq_length, top_k]
            top_k_index = tf.nn.top_k(self.logits, self.top_k)[1]
            index_mask = tf.boolean_mask(top_k_index, tf.reshape(mask, [-1]))
            label_mask = tf.boolean_mask(neg_labels, tf.reshape(mask, [-1]))
            label_mask = tf.reshape(label_mask, [-1, 1])
            self.rank = tf.where(
                tf.equal(tf.to_int32(index_mask), tf.to_int32(label_mask)))[:,
                                                                            -1]
def upper_triangular_part(matrix):
    a = tf.linalg.band_part(tf.ones(matrix.shape), -1, 0)
    return tf.boolean_mask(matrix, 1 - a)
Beispiel #50
0
def updated_log_likelihood_for_edge_changes(node_pairs, adjacency_matrix,
                                            d_min):
    """
    Compute the change of the log likelihood of the Powerlaw distribution fit on the input adjacency matrix's degree
    distribution that results when adding/removing edges for the input node pairs. Assumes an undirected unweighted
    graph.

    Parameters
    ----------
    node_pairs: tf.Tensor, shape (e, 2) dtype int
        The e node pairs to consider, where each node pair consists of the two indices of the nodes.

    adjacency_matrix: tf.Tensor shape (N,N) dtype int
        The input adjacency matrix. Assumed to be unweighted and symmetric.

    d_min: int
        The minimum degree considered in the Powerlaw distribution.

    Returns
    -------
    new_ll: tf.Tensor of shape (e,) and dtype float
        The log likelihoods for node pair in node_pairs obtained when adding/removing the edge for that node pair.

    new_alpha: tf.Tensor of shape (e,) and dtype float
        For each node pair, contains the maximum likelihood estimates of the Powerlaw distributions obtained when
        adding/removing the edge for that node pair.

    new_n: tf.Tensor of shape (e,) and dtype float
        The updated number of degrees which are >= d_min for each potential edge being added/removed.

    sum_log_degrees_after: tf.Tensor of floats shape (e,)
        The updated sum of log degrees whose values are >= d_min for each of the e potential edges being added/removed.

    """

    # For each node pair find out whether there is an edge or not in the input adjacency matrix.
    edge_entries_before = tf.cast(
        tf.gather_nd(adjacency_matrix, tf.cast(node_pairs, tf.int32)),
        tf.float32)
    # Compute the degree for each node
    degree_seq = tf.reduce_sum(adjacency_matrix, 1)

    # Determine which degrees are to be considered, i.e. >= d_min.
    in_range = tf.greater_equal(degree_seq, d_min)
    # Sum the log of the degrees to be considered
    sum_log_degrees = tf.reduce_sum(
        tf.log(tf.boolean_mask(degree_seq, in_range)))
    # Number of degrees >= d_min
    n = tf.cast(tf.count_nonzero(in_range), tf.float32)

    # The changes to the edge entries to add an edge if none was present and remove it otherwise.
    # i.e., deltas[ix] = -1 if edge_entries[ix] == 1 else 1
    deltas = -2 * edge_entries_before + 1

    # The degrees of the nodes in the input node pairs
    d_edges_before = tf.gather(degree_seq, tf.cast(node_pairs, tf.int32))
    # The degrees of the nodes in the input node pairs after performing the change (i.e. adding the respective value of
    # delta.
    d_edges_after = tf.gather(degree_seq, tf.cast(node_pairs,
                                                  tf.int32)) + deltas[:, None]
    # Sum the log of the degrees after the potential changes which are >= d_min
    sum_log_degrees_after, new_n = update_sum_log_degrees(
        sum_log_degrees, n, d_edges_before, d_edges_after, d_min)
    # Update the number of degrees >= d_min
    new_n = tf.cast(new_n, tf.float32)

    # Updated estimates of the Powerlaw exponents
    new_alpha = compute_alpha(new_n, sum_log_degrees_after, d_min)
    # Updated log likelihood values for the Powerlaw distributions
    new_ll = compute_log_likelihood(new_n, new_alpha, sum_log_degrees_after,
                                    d_min)

    return new_ll, new_alpha, new_n, sum_log_degrees_after
    def tf_retrieve_timesteps(self, n):
        num_buffer_elems = tf.minimum(x=self.buffer_index, y=n)

        # We can only sample from priority memory if buffer elements were previously inserted.
        num_priority_elements = tf.cond(
            pred=self.memory_size > 0,
            true_fn=lambda: n - num_buffer_elems,
            false_fn=lambda: 0
        )

        def sampling_fn():
            # Vectorized sampling.
            sum_priorities = tf.reduce_sum(input_tensor=self.priorities, axis=0)
            sample = tf.random_uniform(shape=(num_priority_elements,), dtype=tf.float32)
            indices = tf.zeros(shape=(num_priority_elements,), dtype=tf.int32)

            def cond(loop_index, sample):
                return tf.reduce_all(input_tensor=(sample <= 0.0))

            def sampling_body(loop_index, sample):
                priority = tf.gather(params=self.priorities, indices=loop_index)
                sample -= priority / sum_priorities
                loop_index += tf.cast(
                    x=(sample > 0.0),
                    dtype=tf.int32,
                )

                return loop_index, sample

            priority_indices = tf.while_loop(
                cond=cond,
                body=sampling_body,
                loop_vars=(indices, sample)
            )[0]
            return priority_indices

        # Reset batch indices.
        assignment = tf.assign(
            ref=self.batch_indices,
            value=tf.zeros(shape=tf.shape(self.batch_indices), dtype=tf.int32)
        )
        with tf.control_dependencies(control_inputs=(assignment,)):
            priority_indices = tf.cond(
                pred=num_priority_elements > 0,
                true_fn=sampling_fn,
                false_fn=lambda: tf.zeros(shape=(num_priority_elements,), dtype=tf.int32)
            )
            priority_terminal = tf.gather(params=self.terminal_memory, indices=priority_indices)
            priority_indices = tf.boolean_mask(tensor=priority_indices, mask=tf.logical_not(x=priority_terminal))

            # Store how many elements we retrieved from the buffer for updating priorities.
            # Note that this is just the count, as we can reconstruct the indices from that.
            assignments = list()
            assignments.append(tf.assign(ref=self.last_batch_buffer_elems, value=num_buffer_elems))

            # Store indices used from priority memory. Note that these are the full indices
            # as they were not taken in order.
            update = tf.ones(shape=tf.shape(input=priority_indices), dtype=tf.int32)
            assignments.append(tf.scatter_update(
                ref=self.batch_indices,
                indices=priority_indices,
                updates=update
            ))
        # Fetch results.
        with tf.control_dependencies(control_inputs=assignments):
            return self.retrieve_indices(buffer_elements=num_buffer_elems, priority_indices=priority_indices)
Beispiel #52
0
    def __init__(self,
                 actions,
                 actions_logp,
                 actions_entropy,
                 dones,
                 behaviour_logits,
                 target_logits,
                 discount,
                 rewards,
                 values,
                 bootstrap_value,
                 valid_mask,
                 vf_loss_coeff=0.5,
                 entropy_coeff=-0.01,
                 clip_rho_threshold=1.0,
                 clip_pg_rho_threshold=1.0):
        """Policy gradient loss with vtrace importance weighting.

        VTraceLoss takes tensors of shape [T, B, ...], where `B` is the
        batch_size. The reason we need to know `B` is for V-trace to properly
        handle episode cut boundaries.

        Args:
            actions: An int32 tensor of shape [T, B, NUM_ACTIONS].
            actions_logp: A float32 tensor of shape [T, B].
            actions_entropy: A float32 tensor of shape [T, B].
            dones: A bool tensor of shape [T, B].
            behaviour_logits: A float32 tensor of shape [T, B, NUM_ACTIONS].
            target_logits: A float32 tensor of shape [T, B, NUM_ACTIONS].
            discount: A float32 scalar.
            rewards: A float32 tensor of shape [T, B].
            values: A float32 tensor of shape [T, B].
            bootstrap_value: A float32 tensor of shape [B].
            valid_mask: A bool tensor of valid RNN input elements (#2992).
        """

        # Compute vtrace on the CPU for better perf.
        with tf.device("/cpu:0"):
            self.vtrace_returns = vtrace.from_logits(
                behaviour_policy_logits=behaviour_logits,
                target_policy_logits=target_logits,
                actions=tf.cast(actions, tf.int32),
                discounts=tf.to_float(~dones) * discount,
                rewards=rewards,
                values=values,
                bootstrap_value=bootstrap_value,
                clip_rho_threshold=tf.cast(clip_rho_threshold, tf.float32),
                clip_pg_rho_threshold=tf.cast(clip_pg_rho_threshold,
                                              tf.float32))

        # The policy gradients loss
        self.pi_loss = -tf.reduce_sum(
            tf.boolean_mask(actions_logp * self.vtrace_returns.pg_advantages,
                            valid_mask))

        # The baseline loss
        delta = tf.boolean_mask(values - self.vtrace_returns.vs, valid_mask)
        self.vf_loss = 0.5 * tf.reduce_sum(tf.square(delta))

        # The entropy loss
        self.entropy = tf.reduce_sum(
            tf.boolean_mask(actions_entropy, valid_mask))

        # The summed weighted loss
        self.total_loss = (self.pi_loss + self.vf_loss * vf_loss_coeff +
                           self.entropy * entropy_coeff)
Beispiel #53
0
def to_sparse(tensor, lengths, max_length):
    mask = tf.sequence_mask(lengths, max_length)
    indices = tf.to_int64(tf.where(tf.equal(mask, True)))
    values = tf.to_int32(tf.boolean_mask(tensor, mask))
    shape = tf.to_int64(tf.shape(tensor))
    return tf.SparseTensor(indices, values, shape)
Beispiel #54
0
    def construct(self, args, convolution, hidden_size, state_shape,
                  num_actions):
        with self.session.graph.as_default():
            self.states = tf.placeholder(tf.float32, [None] + state_shape)
            self.prev_states = tf.placeholder(tf.float32, [None] + state_shape)
            self.actions = tf.placeholder(tf.int32, [None])
            self.returns = tf.placeholder(tf.float32, [None])
            self.weights = tf.placeholder(tf.float32, [None])

            input = tf.concat([
                tf.image.resize_images(self.states, [32, 32]),
                tf.image.resize_images(self.prev_states, [32, 32])
            ],
                              axis=3)

            output = input
            for filters, kernel, stride in convolution:
                if filters == 0:
                    output = tf.layers.max_pooling2d(
                        inputs=output,
                        pool_size=[kernel, kernel],
                        strides=stride)
                else:
                    output = tf.layers.conv2d(inputs=output,
                                              filters=filters,
                                              kernel_size=[kernel, kernel],
                                              strides=stride,
                                              padding=args.padding)
                    output = tf.nn.relu(output)

            output = tf.layers.flatten(output)
            output = tf.layers.dense(output,
                                     hidden_size,
                                     activation=tf.nn.relu)
            self.predicted_values = tf.layers.dense(output,
                                                    num_actions,
                                                    activation=None)

            # v_dense = tf.layers.dense(output, hidden_size, activation=tf.nn.relu)
            # a_dense = tf.layers.dense(output, hidden_size, activation=tf.nn.relu)
            #
            # v = tf.layers.dense(v_dense, 1, activation=None)
            # a = tf.layers.dense(a_dense, num_actions, activation=None)
            #
            # self.predicted_values = v + a - tf.reduce_mean(a, 1, keep_dims=True)

            loss = tf.losses.mean_squared_error(
                self.returns,
                tf.boolean_mask(self.predicted_values,
                                tf.one_hot(self.actions, num_actions)),
                weights=self.weights)
            global_step = tf.train.create_global_step()
            self.training = tf.train.AdamOptimizer(
                args.learning_rate).minimize(loss,
                                             global_step=global_step,
                                             name="training")

            self.saver = tf.train.Saver()

            # Initialize variables
            self.session.run(tf.global_variables_initializer())
Beispiel #55
0
 def forward(self, tensors, mode: str = None):
     """Forward method of the layer"""
     tensor, mask = tensors
     return tf.boolean_mask(tensor, mask)
Beispiel #56
0
    def __init__(self, n_hidden_size, n_class, lr, n_enc_size, n_dec_size,
                 n_enc_vocab_size, n_dec_vocab_size, n_embedding_size):
        with tf.variable_scope('Input'):
            self.lr = lr
            self.n_class = n_class
            self.n_enc_size = n_enc_size
            self.n_dec_size = n_dec_size
            self.n_hidden_size = n_hidden_size
            self.n_enc_vocab_size = n_enc_vocab_size
            self.n_dec_vocab_size = n_dec_vocab_size
            self.n_embedding_size = n_embedding_size

            with tf.variable_scope('Placeholder'):
                self.enc_input = tf.placeholder(tf.int64, [None, None],
                                                name='enc_input')
                self.dec_input = tf.placeholder(tf.int64, [None, None],
                                                name='dec_input')
                self.inf_input = tf.placeholder(tf.int64, [None, None],
                                                name='inf_input')
                self.targets = tf.placeholder(tf.int64, [None, None],
                                              name='tar_input')
                self.x_seq_len = tf.placeholder(tf.int64, [None],
                                                name="x_seq_len")
                self.y_seq_len = tf.placeholder(tf.int64, [None],
                                                name="y_seq_len")
                self.dropout_keep = tf.placeholder(tf.float32,
                                                   name="dropout_keep")

            with tf.variable_scope('Variable'):
                # enc_embeddings [ enc_voc_size, embedding_size ]
                # dec_embeddings [ dec_voc_size, embedding_size ]
                self.enc_embeddings = tf.Variable(tf.random_normal(
                    [self.n_enc_vocab_size, self.n_embedding_size]),
                                                  name='enc_embedding')
                self.dec_embeddings = tf.Variable(tf.random_normal(
                    [self.n_dec_vocab_size, self.n_embedding_size]),
                                                  name='dec_embedding')

            with tf.variable_scope('MakeCell'):
                self.enc_cell = tf.nn.rnn_cell.LSTMCell(
                    num_units=self.n_hidden_size)
                self.dec_cell = tf.nn.rnn_cell.LSTMCell(
                    num_units=self.n_hidden_size)
                self.enc_cell = tf.nn.rnn_cell.DropoutWrapper(
                    self.enc_cell, output_keep_prob=self.dropout_keep)
                self.dec_cell = tf.nn.rnn_cell.DropoutWrapper(
                    self.dec_cell, output_keep_prob=self.dropout_keep)

            with tf.variable_scope('Embedding'):
                #enc_embed [ batch, seqlen, embedding_size ]
                self.enc_embed = tf.nn.embedding_lookup(
                    self.enc_embeddings, self.enc_input,
                    name='enc_embed')  # ( enc_voc_size, hidden )
                self.dec_embed = tf.nn.embedding_lookup(
                    self.dec_embeddings, self.dec_input,
                    name='dec_embed')  # ( dec_voc_size, hidden )

    # enc_state  [ 2,     batch,  hidden ] context, hidden
    # enc_outputs[ batch, seqlen, hidden ]
        with tf.variable_scope('Encoder'):
            self.enc_outputs, self.enc_state = \
            tf.nn.dynamic_rnn( self.enc_cell, self.enc_embed, sequence_length=self.x_seq_len, dtype=tf.float32 )
            self.dec_state = self.enc_state

        # dec_embed [ batch, seqlen, hidden ]
        # context   [ batch, hidden         ]
        with tf.variable_scope('Decoder'):
            self.context = self.bahdanau_attention(self.enc_state,
                                                   self.enc_outputs)
            self.t_dec_embed = tf.transpose(self.dec_embed, [1, 0, 2])
            dec_idx = tf.constant(0)
            dec_output_tensor = tf.TensorArray(tf.float32,
                                               size=self.n_dec_size)

            def dec_cond(idx, p_state, enc_outputs, outupt_tensor,
                         max_dec_size):
                return tf.less(idx, max_dec_size)

            def dec_body(idx, p_state, enc_outputs, dec_output_tensor,
                         max_dec_size):
                i_dec_embed = tf.gather_nd(self.t_dec_embed, [[idx]])
                i_dec_embed = tf.transpose(i_dec_embed,
                                           [1, 0, 2])  # [batch, 1, hidden]
                context_expand = tf.expand_dims(self.context,
                                                1)  # [batch, 1, hidden]
                i_dec_embed_concat = tf.concat(
                    [context_expand, i_dec_embed],
                    axis=-1)  # [ batch, 1, hidden*2 ]
                i_dec_outputs, i_dec_state = tf.nn.dynamic_rnn(
                    self.dec_cell,
                    i_dec_embed_concat,
                    initial_state=p_state,
                    dtype=tf.float32)
                self.context = self.bahdanau_attention(i_dec_state,
                                                       self.enc_outputs)
                i_dec_outputs = tf.reshape(i_dec_outputs,
                                           [-1, self.n_hidden_size])
                dec_output_tensor = dec_output_tensor.write(idx, i_dec_outputs)
                return idx + 1, i_dec_state, enc_outputs, dec_output_tensor, max_dec_size

        self.n_dec_state = tf.nn.rnn_cell.LSTMStateTuple(c=self.context,
                                                         h=self.dec_state.h)
        with tf.variable_scope('While'):
            _, _, _, dec_output_tensor, _ = \
            tf.while_loop( cond = dec_cond,
                           body = dec_body,
                           loop_vars=[ dec_idx,
                                       self.n_dec_state,
                                       self.enc_outputs,
                                       dec_output_tensor,
                                       self.n_dec_size ] )

            self.dec_outputs = dec_output_tensor.stack()
            self.dec_outputs = tf.transpose(self.dec_outputs, [1, 0, 2])
            self.logits = tf.layers.dense(self.dec_outputs,
                                          self.n_class,
                                          activation=None,
                                          reuse=tf.AUTO_REUSE,
                                          name='output_dense')

        self.mask = tf.sequence_mask(self.y_seq_len, n_dec_size)
        with tf.variable_scope('Loss'):
            # targets [ batch, dec_voc_size ]
            self.losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=self.logits,
                labels=self.targets)  # losses =  [1, 32, 13]
            self.t_loss = tf.boolean_mask(self.losses, self.mask)
            self.loss = tf.reduce_mean(tf.boolean_mask(self.losses, self.mask))
            self.optimizer = tf.train.AdamOptimizer(self.lr).minimize(
                self.loss)

        with tf.variable_scope('Accuracy'):
            self.prediction = tf.argmax(self.logits,
                                        2,
                                        name='prediction',
                                        output_type=tf.int64)
            prediction_mask = self.prediction * tf.to_int64(self.mask)
            correct_pred = tf.equal(prediction_mask, self.targets)
            self.accuracy = tf.reduce_mean(tf.cast(correct_pred, "float"),
                                           name="accuracy")

        with tf.variable_scope('While'):

            def inf_cond(inf_idx, dec_input_idx, prev_state, output_tensor,
                         max_dec_size):
                return tf.less(inf_idx, max_dec_size)

            def inf_body(inf_idx, dec_input_idx, prev_state, output_tensor,
                         max_dec_size):
                dec_input_embeddings = tf.nn.embedding_lookup(
                    self.dec_embeddings,
                    dec_input_idx)  # [ batch, 1, embedding ] [
                context_expand = tf.expand_dims(self.context,
                                                1)  # [ batch, 1, hidden    ]
                dec_input_embeddings = tf.concat(
                    [context_expand, dec_input_embeddings], axis=-1)
                dec_outputs, dec_state = tf.nn.dynamic_rnn(
                    self.dec_cell,
                    dec_input_embeddings,
                    sequence_length=[1],
                    initial_state=prev_state,
                    dtype=tf.float32)
                self.context = self.bahdanau_attention(dec_state,
                                                       self.enc_outputs)
                logits = tf.layers.dense(dec_outputs,
                                         self.n_class,
                                         activation=None,
                                         reuse=tf.AUTO_REUSE,
                                         name='output_dense')
                idx_prediction = tf.argmax(logits,
                                           2,
                                           output_type=tf.int64,
                                           name='idx_prediction')
                output_tensor = output_tensor.write(inf_idx, idx_prediction)
                return inf_idx + 1, idx_prediction, dec_state, output_tensor, max_dec_size

            inf_idx = tf.constant(0)
            inf_output_tensor = tf.TensorArray(tf.int64,
                                               size=self.n_dec_size,
                                               name='inf_output_tensor')
            self.context = self.bahdanau_attention(self.enc_state,
                                                   self.enc_outputs)
            self.n_dec_state = tf.nn.rnn_cell.LSTMStateTuple(
                c=self.context, h=self.dec_state.h)

            _, _, _, inf_output_tensor, _ = \
            tf.while_loop( cond = inf_cond,
                           body = inf_body,
                           loop_vars=[ inf_idx,
                                       self.inf_input,
                                       self.n_dec_state,
                                       inf_output_tensor,
                                       self.n_dec_size ])
            self.inf_result = inf_output_tensor.stack()
            self.inf_result = tf.reshape(self.inf_result, [-1], 'inf_result')
Beispiel #57
0
def model_fn(mode, inputs, params, reuse=False):
    """Model function defining the graph operations.

    Args:
        mode: (string) can be 'train' or 'eval'
        inputs: (dict) contains the inputs of the graph (features, labels...)
                this can be `tf.placeholder` or outputs of `tf.data`
        params: (Params) contains hyperparameters of the model (ex: `params.learning_rate`)
        reuse: (bool) whether to reuse the weights

    Returns:
        model_spec: (dict) contains the graph operations or nodes needed for training / evaluation
    """
    is_training = (mode == 'train')
    labels = inputs['labels']
    images = inputs['images']

    # -----------------------------------------------------------
    # MODEL: define the layers of the model
    with tf.variable_scope('model', reuse=reuse):
        # Compute the output distribution of the model and the predictions
        predictions = build_model(is_training, inputs, params)

    # Define loss and similarity
    loss = tf.losses.mean_squared_error(labels=labels, predictions=predictions)

    size = params.image_size
    predictions_reshape = tf.reshape(predictions, [-1, size * size])
    predictions_reshape = tf.nn.l2_normalize(predictions_reshape, [1])

    labels_reshape = tf.reshape(labels, [-1, size * size])
    labels_reshape = tf.nn.l2_normalize(labels_reshape, [1])

    images_reshape = tf.reshape(images, [-1, size * size])
    images_reshape = tf.nn.l2_normalize(images_reshape, [1])

    orig_similarity = (tf.reduce_sum(tf.multiply(images_reshape,
                                                 labels_reshape),
                                     axis=1))
    new_similarity = (tf.reduce_sum(tf.multiply(predictions_reshape,
                                                labels_reshape),
                                    axis=1))
    similarity_progress = tf.reduce_mean(new_similarity - orig_similarity)

    similarity = tf.reduce_mean(new_similarity)

    # Define training step that minimizes the loss with the Adam optimizer
    if is_training:
        optimizer = tf.train.AdamOptimizer(params.learning_rate)
        global_step = tf.train.get_or_create_global_step()
        if params.use_batch_norm:
            # Add a dependency to update the moving mean and variance for batch normalization
            with tf.control_dependencies(
                    tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                train_op = optimizer.minimize(loss, global_step=global_step)
        else:
            train_op = optimizer.minimize(loss, global_step=global_step)

    # -----------------------------------------------------------
    # METRICS AND SUMMARIES
    # Metrics for evaluation using tf.metrics (average over whole dataset)
    with tf.variable_scope("metrics"):
        metrics = {
            'similarity': tf.metrics.mean(similarity),
            'loss': tf.metrics.mean(loss),
            'similarity_progress': tf.metrics.mean(similarity_progress)
        }

    # Group the update ops for the tf.metrics
    update_metrics_op = tf.group(*[op for _, op in metrics.values()])

    # Get the op to reset the local variables used in tf.metrics
    metric_variables = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                         scope="metrics")
    metrics_init_op = tf.variables_initializer(metric_variables)

    # Summaries for training
    tf.summary.scalar('loss', loss)
    tf.summary.scalar('similarity', similarity)
    tf.summary.scalar('similarity_progress', similarity_progress)
    #    tf.summary.image('train_image', inputs['images'])

    #TODO: if mode == 'eval': ?
    # Add incorrectly labeled images
    similarity_arr = (tf.reduce_sum(tf.multiply(predictions_reshape,
                                                labels_reshape),
                                    axis=1))
    mask = tf.greater(0.5, similarity_arr)

    # Add a different summary to know how they were misclassified

    incorrect_train_image = tf.boolean_mask(inputs['images'], mask)
    tf.summary.image('incorrectly_train', incorrect_train_image)
    incorrect_predict_image = tf.boolean_mask(predictions, mask)
    tf.summary.image('incorrectly_predict', incorrect_predict_image)
    incorrect_image_label = tf.boolean_mask(labels, mask)
    tf.summary.image('incorrect_label', incorrect_image_label)

    # -----------------------------------------------------------
    # MODEL SPECIFICATION
    # Create the model specification and return it
    # It contains nodes or operations in the graph that will be used for training and evaluation
    model_spec = inputs
    model_spec['variable_init_op'] = tf.global_variables_initializer()
    model_spec["predictions"] = predictions
    model_spec['loss'] = loss
    model_spec['similarity'] = similarity
    model_spec['similarity_progress'] = similarity_progress
    model_spec['metrics_init_op'] = metrics_init_op
    model_spec['metrics'] = metrics
    model_spec['update_metrics'] = update_metrics_op
    model_spec['summary_op'] = tf.summary.merge_all()

    if is_training:
        model_spec['train_op'] = train_op

    return model_spec
Beispiel #58
0
def preprocess(image,
               bbox,
               input_shape=(416, 416),
               random=False,
               jitter=.3,
               hue=.1,
               sat=1.5,
               bri=.1):

    # NOTE: input_shape is given in (input height, input width) order
    assert 3 == len(image.shape) and 3 == image.shape[-1]
    assert 0 < jitter < 1
    assert -1 < hue < 1
    assert 0 < sat
    assert 0 < bri < 1
    # add batch dimension
    image = tf.expand_dims(image, axis=0)
    img_shape = image.shape[1:3]
    #(height, width)

    if False == random:
        # scale the input image to make the wider edge fit the input shape
        # NOTE: I don't use resize_with_pad because it can only stuff zeros, but I want 128
        resize_image = tf.image.resize(image,
                                       input_shape,
                                       method=tf.image.ResizeMethod.BICUBIC,
                                       preserve_aspect_ratio=True)
        resize_shape = resize_image.shape[1:3]
        #(height, width)
        top_pad = (input_shape[0] - resize_shape[0]) // 2
        bottom_pad = input_shape[0] - resize_shape[0] - top_pad
        left_pad = (input_shape[1] - resize_shape[1]) // 2
        right_pad = input_shape[1] - resize_shape[1] - left_pad
        resize_image = tf.pad(
            resize_image,
            [[0, 0], [top_pad, bottom_pad], [left_pad, right_pad], [0, 0]],
            constant_values=128)
        # cast to float32
        image_data = tf.cast(resize_image, tf.float32) / 255.
        # correct boxes
        bbox = bbox * tf.convert_to_tensor([
            resize_shape[0], resize_shape[1], resize_shape[0], resize_shape[1]
        ],
                                           dtype=tf.float32)
        bbox = bbox + tf.convert_to_tensor(
            [top_pad, left_pad, top_pad, left_pad], dtype=tf.float32)
        bbox = bbox / tf.convert_to_tensor(
            [input_shape[0], input_shape[1], input_shape[0], input_shape[1]],
            dtype=tf.float32)
        # return
        return image_data, bbox
    else:
        # randomly sample aspect ratio to input shape
        # resize image to the randomly sampled input shape
        aspect_ratio_jitter = tf.random.uniform(shape=[2],
                                                minval=1 - jitter,
                                                maxval=1 + jitter,
                                                dtype=tf.float32)
        resize_input_shape = tf.convert_to_tensor(
            input_shape, dtype=tf.float32) * aspect_ratio_jitter
        scale = tf.random.uniform(shape=[1],
                                  minval=.8,
                                  maxval=1.2,
                                  dtype=tf.float32)
        resize_shape = tf.cond(tf.greater(resize_input_shape[0],
                                          resize_input_shape[1]),
                               true_fn=lambda: scale * resize_input_shape /
                               aspect_ratio_jitter[0],
                               false_fn=lambda: scale * resize_input_shape /
                               aspect_ratio_jitter[1])
        resize_shape = tf.cast(resize_shape, dtype=tf.int32)
        resize_image = tf.image.resize(image,
                                       resize_shape,
                                       method=tf.image.ResizeMethod.BICUBIC)
        if input_shape[0] > resize_shape[0]:
            pad = input_shape[0] - resize_shape[0]
            resize_image = tf.pad(resize_image,
                                  [[0, 0], [pad, pad], [0, 0], [0, 0]],
                                  constant_values=128)
            # sample crop offset_height
            offset_height = tf.random.uniform(maxval=pad + 1,
                                              dtype=tf.int32,
                                              shape=())
            # correct boxes
            bbox = bbox * tf.convert_to_tensor([
                resize_shape[0], resize_shape[1], resize_shape[0],
                resize_shape[1]
            ],
                                               dtype=tf.float32)
            bbox = bbox + tf.convert_to_tensor([pad, 0, pad, 0],
                                               dtype=tf.float32)
            resize_shape = resize_shape + tf.convert_to_tensor([2 * pad, 0],
                                                               dtype=tf.int32)
            bbox = bbox / tf.convert_to_tensor([
                resize_shape[0], resize_shape[1], resize_shape[0],
                resize_shape[1]
            ],
                                               dtype=tf.float32)
        else:
            crop = resize_shape[0] - input_shape[0]
            # sample crop offset_height
            offset_height = tf.random.uniform(maxval=crop + 1,
                                              dtype=tf.int32,
                                              shape=())
        if input_shape[1] > resize_shape[1]:
            pad = input_shape[1] - resize_shape[1]
            resize_image = tf.pad(resize_image,
                                  [[0, 0], [0, 0], [pad, pad], [0, 0]],
                                  constant_values=128)
            # sample crop offset_width
            offset_width = tf.random.uniform(maxval=pad + 1,
                                             dtype=tf.int32,
                                             shape=())
            # correct boxes
            bbox = bbox * tf.convert_to_tensor([
                resize_shape[0], resize_shape[1], resize_shape[0],
                resize_shape[1]
            ],
                                               dtype=tf.float32)
            bbox = bbox + tf.convert_to_tensor([0, pad, 0, pad],
                                               dtype=tf.float32)
            resize_shape = resize_shape + tf.convert_to_tensor([0, 2 * pad],
                                                               dtype=tf.int32)
            bbox = bbox / tf.convert_to_tensor([
                resize_shape[0], resize_shape[1], resize_shape[0],
                resize_shape[1]
            ],
                                               dtype=tf.float32)
        else:
            crop = resize_shape[1] - input_shape[1]
            # sample crop offset_width
            offset_width = tf.random.uniform(maxval=crop + 1,
                                             dtype=tf.int32,
                                             shape=())
        # crop
        resize_image = tf.image.crop_to_bounding_box(resize_image,
                                                     offset_height,
                                                     offset_width,
                                                     input_shape[0],
                                                     input_shape[1])
        # correct boxes
        bbox = bbox * tf.convert_to_tensor([
            resize_shape[0], resize_shape[1], resize_shape[0], resize_shape[1]
        ],
                                           dtype=tf.float32)
        bbox = bbox + tf.convert_to_tensor(
            [-offset_height, -offset_width, -offset_height, -offset_width],
            dtype=tf.float32)
        bbox = bbox / tf.convert_to_tensor(
            [input_shape[0], input_shape[1], input_shape[0], input_shape[1]],
            dtype=tf.float32)
        # randomly flip image
        if np.random.rand() < .5:
            resize_image = tf.image.flip_left_right(resize_image)
            # correct boxes(y remains while x = 1 - x)
            bbox = tf.convert_to_tensor(
                [0, 1, 0, 1], dtype=tf.float32) + tf.convert_to_tensor(
                    [1, -1, 1, -1], dtype=tf.float32) * bbox
        # distort image in HSV color space
        image_data = tf.cast(resize_image, tf.float32) / 255.
        image_data = tf.image.random_hue(image_data, hue)
        image_data = tf.image.random_saturation(image_data,
                                                lower=1. / sat,
                                                upper=sat)
        image_data = tf.image.random_brightness(image_data, bri)
        # discard invalid boxes (small box or box having negative width or height)
        bbox = tf.clip_by_value(bbox, 0, 1)
        # restrict the min and max coordinates
        bbox_hw = bbox[..., 2:4] - bbox[...,
                                        0:2]  # bbox_hw.shape = (bbox_num,2)
        bbox_hw = bbox_hw * tf.convert_to_tensor(input_shape, dtype=tf.float32)
        valid = tf.math.logical_and(bbox_hw[..., 0] > 1, bbox_hw[..., 1] > 1)
        # valid.shape = (bbox_num)
        valid_bbox = tf.boolean_mask(bbox, valid)
        # valid_bbox.shape = (valid box num, 4)
        assert (valid_bbox.shape[1] != 0)
        # return
        return tf.squeeze(image_data), bbox
Beispiel #59
0
parse = l_datasets.Parse(l_config.train_image_dir, anchors,
                         l_config.grid_sizes, l_config.image_target_size)

train_ds = tf.data.TextLineDataset(l_config.train_label_file)
train_ds = train_ds.map(parse)

flat_anchors = [tf.reshape(item, (-1, 4)) for item in anchors]
flat_anchors = tf.concat(flat_anchors, 0)

for index, (image, label) in enumerate(train_ds.take(3)):
    image = (image + 1.0) * 127.5
    images = [image]
    layer_conf = label[1]
    mask = layer_conf[..., 0] == 1
    mask_boxes = tf.boolean_mask(label[0], mask)
    mask_anchors = tf.boolean_mask(flat_anchors, mask)
    mask_cates = tf.boolean_mask(label[2][..., 0], mask)
    cates = tf.boolean_mask(label[2][..., 0], mask)
    images = tf.image.draw_bounding_boxes(images, [mask_boxes], [[0, 255, 0]])
    images = tf.image.draw_bounding_boxes(images, [mask_anchors],
                                          [[255, 0, 0]])

    image = images[0].numpy().astype(np.int32)

    cv2_loca = mask_boxes.numpy()[..., :2] * np.array(
        l_config.image_target_size)
    cv2_loca = cv2_loca.astype(np.int32)
    cv2_loca[..., 0] += 10
    cv2_cate = mask_cates.numpy().astype(np.int32)
    for index, (loca, cate) in enumerate(zip(cv2_loca, cv2_cate)):
Beispiel #60
0
def _streaming_tp_fp_array(num_gt_boxes,
                           tp,
                           fp,
                           scores,
                           class_name,
                           remove_zero_scores=True,
                           metrics_collections=None,
                           updates_collections=None,
                           name=None):
    """Streaming computation of True Positive and False Positive arrays. This metrics
    also keeps track of scores and number of grountruth objects.
    """
    default_name = 'streaming_tp_fp_{}'.format(class_name)
    # Input Tensors...
    with variable_scope.variable_scope(name, default_name,
                                       [num_gt_boxes, tp, fp, scores]):
        tp = tf.cast(tp, tf.bool)
        fp = tf.cast(fp, tf.bool)
        scores = tf.to_float(scores)
        num_gt_boxes = tf.to_int64(num_gt_boxes)

        # Reshape TP and FP tensors and clean away 0 class values.
        tp = tf.reshape(tp, [-1])
        fp = tf.reshape(fp, [-1])
        scores = tf.reshape(scores, [-1])

        # Remove TP and FP both false.
        if remove_zero_scores:
            mask = tf.logical_or(tp, fp)
            rm_threshold = 1e-4
            mask = tf.logical_and(mask, tf.greater(scores, rm_threshold))
            tp = tf.boolean_mask(tp, mask)
            fp = tf.boolean_mask(fp, mask)
            scores = tf.boolean_mask(scores, mask)

        # Local variables accumlating information over batches.
        tp_value = metrics_impl._create_local('tp_value',
                                              shape=[
                                                  0,
                                              ],
                                              dtype=tf.bool,
                                              validate_shape=False)
        fp_value = metrics_impl._create_local('fp_value',
                                              shape=[
                                                  0,
                                              ],
                                              dtype=tf.bool,
                                              validate_shape=False)
        scores_value = metrics_impl._create_local('scores_value',
                                                  shape=[
                                                      0,
                                                  ],
                                                  validate_shape=False)
        num_gt_boxes_value = metrics_impl._create_local('num_gt_boxes_value',
                                                        shape=[],
                                                        dtype=tf.int64)

        # Update operations.
        tp_op = tf.assign(tp_value,
                          tf.concat([tp_value, tp], axis=0),
                          validate_shape=False)
        fp_op = tf.assign(fp_value,
                          tf.concat([fp_value, fp], axis=0),
                          validate_shape=False)
        scores_op = tf.assign(scores_value,
                              tf.concat([scores_value, scores], axis=0),
                              validate_shape=False)
        num_gt_boxes_op = tf.assign_add(num_gt_boxes_value, num_gt_boxes)

        # Value and update ops.
        values = (tp_value, fp_value, scores_value, num_gt_boxes_value)
        update_ops = (tp_op, fp_op, scores_op, num_gt_boxes_op)

        if metrics_collections:
            ops.add_to_collections(metrics_collections, values)
        if updates_collections:
            ops.add_to_collections(updates_collections, update_ops)

        update_op = tf.group(*update_ops)
        return values, update_op