Beispiel #1
0
    def getRpRnTpTnForTrain0OrVal1(self, y, training0OrValidation1):
        # The returned list has (numberOfClasses)x4 integers: >numberOfRealPositives, numberOfRealNegatives, numberOfTruePredictedPositives, numberOfTruePredictedNegatives< for each class (incl background).
        # Order in the list is the natural order of the classes (ie class-0 RP,RN,TPP,TPN, class-1 RP,RN,TPP,TPN, class-2 RP,RN,TPP,TPN ...)
        # param y: y = T.itensor4('y'). Dimensions [batchSize, r, c, z]
        
        yPredToUse = self.y_pred_train if  training0OrValidation1 == 0 else self.y_pred_val
        
        returnedListWithNumberOfRpRnTpTnForEachClass = []
        
        for class_i in range(0, self._numberOfOutputClasses) :
            #Number of Real Positive, Real Negatives, True Predicted Positives and True Predicted Negatives are reported PER CLASS (first for WHOLE).
            tensorOneAtRealPos = tf.equal(y, class_i)
            tensorOneAtRealNeg = tf.logical_not(tensorOneAtRealPos)

            tensorOneAtPredictedPos = tf.equal(yPredToUse, class_i)
            tensorOneAtPredictedNeg = tf.logical_not(tensorOneAtPredictedPos)
            tensorOneAtTruePos = tf.logical_and(tensorOneAtRealPos,tensorOneAtPredictedPos)
            tensorOneAtTrueNeg = tf.logical_and(tensorOneAtRealNeg,tensorOneAtPredictedNeg)
                    
            returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtRealPos, dtype="int32")) )
            returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtRealNeg, dtype="int32")) )
            returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtTruePos, dtype="int32")) )
            returnedListWithNumberOfRpRnTpTnForEachClass.append( tf.reduce_sum( tf.cast(tensorOneAtTrueNeg, dtype="int32")) )
            
        return returnedListWithNumberOfRpRnTpTnForEachClass
Beispiel #2
0
        def m_body(i, ta_tp, ta_fp, gmatch, n_ignored_det):
            # Jaccard score with groundtruth bboxes.
            rbbox = bboxes[i, :]
#             rbbox = tf.Print(rbbox, [rbbox])
            jaccard = bboxes_jaccard(rbbox, gxs, gys)

            # Best fit, checking it's above threshold.
            idxmax = tf.cast(tf.argmax(jaccard, axis=0), dtype = tf.int32)
            
            jcdmax = jaccard[idxmax]
            match = jcdmax > matching_threshold
            existing_match = gmatch[idxmax]
            not_ignored = tf.logical_not(gignored[idxmax])

            n_ignored_det = n_ignored_det + tf.cast(gignored[idxmax], tf.int32)
            # TP: match & no previous match and FP: previous match | no match.
            # If ignored: no record, i.e FP=False and TP=False.
            tp = tf.logical_and(not_ignored, tf.logical_and(match, tf.logical_not(existing_match)))
            ta_tp = ta_tp.write(i, tp)
            
            fp = tf.logical_and(not_ignored, tf.logical_or(existing_match, tf.logical_not(match)))
            ta_fp = ta_fp.write(i, fp)
            
            # Update grountruth match.
            mask = tf.logical_and(tf.equal(grange, idxmax), tf.logical_and(not_ignored, match))
            gmatch = tf.logical_or(gmatch, mask)
            return [i+1, ta_tp, ta_fp, gmatch,n_ignored_det]
  def train(self, sentences):
    token_ids, token_values, token_dense_shape = self._tokenize(sentences)
    tokens_sparse = tf.sparse.SparseTensor(
        indices=token_ids, values=token_values, dense_shape=token_dense_shape)
    tokens = tf.sparse.to_dense(tokens_sparse, default_value="")

    sparse_lookup_ids = tf.sparse.SparseTensor(
        indices=tokens_sparse.indices,
        values=self._words_to_indices(tokens_sparse.values),
        dense_shape=tokens_sparse.dense_shape)
    lookup_ids = tf.sparse.to_dense(sparse_lookup_ids, default_value=0)

    # Targets are the next word for each word of the sentence.
    tokens_ids_seq = lookup_ids[:, 0:-1]
    tokens_ids_target = lookup_ids[:, 1:]

    tokens_prefix = tokens[:, 0:-1]

    # Mask determining which positions we care about for a loss: all positions
    # that have a valid non-terminal token.
    mask = tf.logical_and(
        tf.logical_not(tf.equal(tokens_prefix, "")),
        tf.logical_not(tf.equal(tokens_prefix, "<E>")))

    input_mask = tf.cast(mask, tf.int32)

    with tf.GradientTape() as t:
      sentence_embeddings = tf.nn.embedding_lookup(self._embeddings,
                                                   tokens_ids_seq)

      lstm_initial_state = self._lstm_cell.get_initial_state(
          sentence_embeddings)

      lstm_output = self._rnn_layer(
          inputs=sentence_embeddings, initial_state=lstm_initial_state)

      # Stack LSTM outputs into a batch instead of a 2D array.
      lstm_output = tf.reshape(lstm_output, [-1, self._lstm_cell.output_size])

      logits = self._logit_layer(lstm_output)

      targets = tf.reshape(tokens_ids_target, [-1])
      weights = tf.cast(tf.reshape(input_mask, [-1]), tf.float32)

      losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
          labels=targets, logits=logits)

      # Final loss is the mean loss for all token losses.
      final_loss = tf.math.divide(
          tf.reduce_sum(tf.multiply(losses, weights)),
          tf.reduce_sum(weights),
          name="final_loss")

    watched = t.watched_variables()
    gradients = t.gradient(final_loss, watched)

    for w, g in zip(watched, gradients):
      w.assign_sub(g)

    return final_loss
Beispiel #4
0
        def m_body(i, ta_tp, ta_fp, gmatch):
            # Jaccard score with groundtruth bboxes.
            rbbox = bboxes[i]
            jaccard = bboxes_jaccard(rbbox, gbboxes)
            jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype)

            # Best fit, checking it's above threshold.
            idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32)
            jcdmax = jaccard[idxmax]
            match = jcdmax > matching_threshold
            existing_match = gmatch[idxmax]
            not_difficult = tf.logical_not(gdifficults[idxmax])

            # TP: match & no previous match and FP: previous match | no match.
            # If difficult: no record, i.e FP=False and TP=False.
            tp = tf.logical_and(not_difficult,
                                tf.logical_and(match, tf.logical_not(existing_match)))
            ta_tp = ta_tp.write(i, tp)
            fp = tf.logical_and(not_difficult,
                                tf.logical_or(existing_match, tf.logical_not(match)))
            ta_fp = ta_fp.write(i, fp)
            # Update grountruth match.
            mask = tf.logical_and(tf.equal(grange, idxmax),
                                  tf.logical_and(not_difficult, match))
            gmatch = tf.logical_or(gmatch, mask)

            return [i+1, ta_tp, ta_fp, gmatch]
Beispiel #5
0
  def get_scheduled_sample_inputs(self,
                                  done_warm_start,
                                  groundtruth_items,
                                  generated_items,
                                  scheduled_sampling_func):
    """Scheduled sampling.

    Args:
      done_warm_start: whether we are done with warm start or not.
      groundtruth_items: list of ground truth items.
      generated_items: list of generated items.
      scheduled_sampling_func: scheduled sampling function to choose between
        groundtruth items and generated items.

    Returns:
      A mix list of ground truth and generated items.
    """
    def sample():
      """Calculate the scheduled sampling params based on iteration number."""
      with tf.variable_scope("scheduled_sampling", reuse=tf.AUTO_REUSE):
        output_items = []
        for item_gt, item_gen in zip(groundtruth_items, generated_items):
          output_items.append(scheduled_sampling_func(item_gt, item_gen))
        return output_items

    cases = [
        (tf.logical_not(done_warm_start), lambda: groundtruth_items),
        (tf.logical_not(self.is_training), lambda: generated_items),
    ]
    output_items = tf.case(cases, default=sample, strict=True)

    return output_items
Beispiel #6
0
    def build_graph(self, nn_im_w, nn_im_h, num_colour_channels=3, weights=None, biases=None):
        num_outputs = 1 #ofc
        self.nn_im_w = nn_im_w
        self.nn_im_h = nn_im_h

        if weights is None:
            weights = [None, None, None, None, None]
        if biases is None:
            biases = [None, None, None, None, None]

        with tf.device('/cpu:0'):
            # Placeholder variables for the input image and output images
            self.x = tf.placeholder(tf.float32, shape=[None, nn_im_w*nn_im_h*3])
            self.y_ = tf.placeholder(tf.float32, shape=[None, num_outputs])
            self.threshold = tf.placeholder(tf.float32)

            # Build the convolutional and pooling layers
            conv1_output_channels = 32
            conv2_output_channels = 16
            conv3_output_channels = 8

            conv_layer_1_input = tf.reshape(self.x, [-1, nn_im_h, nn_im_w, num_colour_channels]) #The resized input image
            self.build_conv_layer(conv_layer_1_input, num_colour_channels, conv1_output_channels, initial_weights=weights[0], initial_biases=biases[0]) # layer 1
            self.build_conv_layer(self.layers[0][0], conv1_output_channels, conv2_output_channels, initial_weights=weights[1], initial_biases=biases[1])# layer 2
            self.build_conv_layer(self.layers[1][0], conv2_output_channels, conv3_output_channels, initial_weights=weights[2], initial_biases=biases[2])# layer 3

            # Build the fully connected layer
            convnet_output_w = nn_im_w//8
            convnet_output_h = nn_im_h//8

            fully_connected_layer_input = tf.reshape(self.layers[2][0], [-1, convnet_output_w * convnet_output_h * conv3_output_channels])
            self.build_fully_connected_layer(fully_connected_layer_input, convnet_output_w, convnet_output_h, conv3_output_channels, initial_weights=weights[3], initial_biases=biases[3])

            # The dropout stage and readout layer
            self.keep_prob, self.h_drop = self.dropout(self.layers[3][0])
            self.y_conv,_,_ = self.build_readout_layer(self.h_drop, num_outputs, initial_weights=weights[4], initial_biases=biases[4])

            self.mean_error =  tf.sqrt(tf.reduce_mean(tf.square(self.y_ - self.y_conv)))
            self.train_step = tf.train.AdamOptimizer(1e-4).minimize(self.mean_error)

            self.accuracy = (1.0 - tf.reduce_mean(tf.abs(self.y_ - tf.round(self.y_conv))))


            positive_examples = tf.greater_equal(self.y_, 0.5)
            negative_examples = tf.logical_not(positive_examples)
            positive_classifications = tf.greater_equal(self.y_conv, self.threshold)
            negative_classifications = tf.logical_not(positive_classifications)

            self.true_positive = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, positive_classifications),tf.int32)) # count the examples that are positive and classified as positive
            self.false_positive = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, positive_classifications),tf.int32)) # count the examples that are negative but classified as positive

            self.true_negative = tf.reduce_sum(tf.cast(tf.logical_and(negative_examples, negative_classifications),tf.int32)) # count the examples that are negative and classified as negative
            self.false_negative = tf.reduce_sum(tf.cast(tf.logical_and(positive_examples, negative_classifications),tf.int32)) # count the examples that are positive but classified as negative

            self.positive_count = tf.reduce_sum(tf.cast(positive_examples, tf.int32)) # count the examples that are positive
            self.negative_count = tf.reduce_sum(tf.cast(negative_examples, tf.int32)) # count the examples that are negative

            self.confusion_matrix = tf.reshape(tf.pack([self.true_positive, self.false_positive, self.false_negative, self.true_negative]), [2,2])

        self.sess.run(tf.initialize_all_variables())
Beispiel #7
0
 def compute_error(self):
   #Sets mask variables and performs batch processing
   self.batch_gold_select = self.batch_print_answer > 0.0
   self.full_column_mask = tf.concat(
       axis=1, values=[self.batch_number_column_mask, self.batch_word_column_mask])
   self.full_processed_column = tf.concat(
       axis=1,
       values=[self.batch_processed_number_column, self.batch_processed_word_column])
   self.full_processed_sorted_index_column = tf.concat(axis=1, values=[
       self.batch_processed_sorted_index_number_column,
       self.batch_processed_sorted_index_word_column
   ])
   self.select_bad_number_mask = tf.cast(
       tf.logical_and(
           tf.not_equal(self.full_processed_column,
                        self.utility.FLAGS.pad_int),
           tf.not_equal(self.full_processed_column,
                        self.utility.FLAGS.bad_number_pre_process)),
       self.data_type)
   self.select_mask = tf.cast(
       tf.logical_not(
           tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int)),
       self.data_type)
   self.select_word_mask = tf.cast(
       tf.logical_not(
           tf.equal(self.batch_word_column_entry_mask,
                    self.utility.dummy_token_id)), self.data_type)
   self.select_full_mask = tf.concat(
       axis=1, values=[self.select_mask, self.select_word_mask])
   self.select_whole_mask = tf.maximum(
       tf.reshape(
           tf.slice(self.select_mask, [0, 0, 0],
                    [self.batch_size, 1, self.max_elements]),
           [self.batch_size, self.max_elements]),
       tf.reshape(
           tf.slice(self.select_word_mask, [0, 0, 0],
                    [self.batch_size, 1, self.max_elements]),
           [self.batch_size, self.max_elements]))
   self.invert_select_full_mask = tf.cast(
       tf.concat(axis=1, values=[
           tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int),
           tf.equal(self.batch_word_column_entry_mask,
                    self.utility.dummy_token_id)
       ]), self.data_type)
   self.batch_lookup_answer = tf.zeros(tf.shape(self.batch_gold_select))
   self.reset_select = self.select_whole_mask
   self.rows = tf.reduce_sum(self.select_whole_mask, 1)
   self.num_entries = tf.reshape(
       tf.reduce_sum(tf.reduce_sum(self.select_full_mask, 1), 1),
       [self.batch_size])
   self.final_error, self.final_correct = self.batch_process()
   return self.final_error
 def recall(self, y_):
     y_true = tf.cast(tf.argmin(y_, 1), tf.bool)
     y_pred = tf.cast(tf.argmin(self.y, 1), tf.bool)
     # 1 stands for positive, 0 stands for negative
     tp = tf.reduce_sum(tf.cast(tf.logical_and(y_true, y_pred), tf.float32))
     tn = tf.reduce_sum(tf.cast(tf.logical_not(tf.logical_or(y_true, y_pred)), tf.float32))
     p = tf.reduce_sum(tf.cast(y_true, tf.float32))
     n = tf.reduce_sum(tf.cast(tf.logical_not(y_true), tf.float32))
     fp = p - tp
     fn = n - tn
     # t = tf.add(tp, tn)
     # f = tf.add(fp, fn)
     relevant = tf.add(tp, fn)
     recall = tf.div(tp, relevant)
     return recall
Beispiel #9
0
    def NLL(self, y, lengths, pis, mus, sigmas, rho, es, eps=1e-8):
        sigma_1, sigma_2 = tf.split(sigmas, 2, axis=2)
        y_1, y_2, y_3 = tf.split(y, 3, axis=2)
        mu_1, mu_2 = tf.split(mus, 2, axis=2)

        norm = 1.0 / (2*np.pi*sigma_1*sigma_2 * tf.sqrt(1 - tf.square(rho)))
        Z = tf.square((y_1 - mu_1) / (sigma_1)) + \
            tf.square((y_2 - mu_2) / (sigma_2)) - \
            2*rho*(y_1 - mu_1)*(y_2 - mu_2) / (sigma_1*sigma_2)

        exp = -1.0*Z / (2*(1 - tf.square(rho)))
        gaussian_likelihoods = tf.exp(exp) * norm
        gmm_likelihood = tf.reduce_sum(pis * gaussian_likelihoods, 2)
        gmm_likelihood = tf.clip_by_value(gmm_likelihood, eps, np.inf)

        bernoulli_likelihood = tf.squeeze(tf.where(tf.equal(tf.ones_like(y_3), y_3), es, 1 - es))

        nll = -(tf.log(gmm_likelihood) + tf.log(bernoulli_likelihood))
        sequence_mask = tf.logical_and(
            tf.sequence_mask(lengths, maxlen=tf.shape(y)[1]),
            tf.logical_not(tf.is_nan(nll)),
        )
        nll = tf.where(sequence_mask, nll, tf.zeros_like(nll))
        num_valid = tf.reduce_sum(tf.cast(sequence_mask, tf.float32), axis=1)

        sequence_loss = tf.reduce_sum(nll, axis=1) / tf.maximum(num_valid, 1.0)
        element_loss = tf.reduce_sum(nll) / tf.maximum(tf.reduce_sum(num_valid), 1.0)
        return sequence_loss, element_loss
Beispiel #10
0
def prune_outside_window(boxlist, window, scope=None):
  """Prunes bounding boxes that fall outside a given window.

  This function prunes bounding boxes that even partially fall outside the given
  window. See also clip_to_window which only prunes bounding boxes that fall
  completely outside the window, and clips any bounding boxes that partially
  overflow.

  Args:
    boxlist: a BoxList holding M_in boxes.
    window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
      of the window
    scope: name scope.

  Returns:
    pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
     in the input tensor.
  """
  with tf.name_scope(scope, 'PruneOutsideWindow'):
    y_min, x_min, y_max, x_max = tf.split(
        value=boxlist.get(), num_or_size_splits=4, axis=1)
    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
    coordinate_violations = tf.concat([
        tf.less(y_min, win_y_min), tf.less(x_min, win_x_min),
        tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max)
    ], 1)
    valid_indices = tf.reshape(
        tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
    return gather(boxlist, valid_indices), valid_indices
Beispiel #11
0
def loss_estimate(batch_size,old_state,data,total_data,model_params,base_mean,base_log_var):
    clipped_log_vals, nan_mask, reset_rows = data


    zeros = tf.zeros_like(clipped_log_vals)
    state_init = model_params.init_state(batch_size)
    data_count = tf.reduce_sum(tf.to_float(tf.logical_not(nan_mask)),name='data_count')
    
    model_input = tf.select(nan_mask,zeros,clipped_log_vals)
    target_outputs = model_input

    sample_params = model_params.sample_vals(batch_size)

    #TODO verify significance of old_state
    filtered_state = tf.select(reset_rows,old_state,state_init)

    new_state,delta_mean = sample_inference(filtered_state,model_input,sample_params)
    variance = tf.exp(base_log_var)
    mean = base_mean + delta_mean * variance

    raw_losses = gaussian_neg_log_likelyhood(target_outputs,mean,variance)
    clean_raw_losses = tf.select(nan_mask,zeros,raw_losses)
    raw_loss = tf.reduce_sum(clean_raw_losses)

    kl_divergence = model_params.get_divergence()

    loss_estimate = raw_loss * (total_data / data_count) + kl_divergence

    return loss_estimate,new_state,kl_divergence
    def build_graph(self, image, label):
        assert tf.test.is_gpu_available()

        MEAN_IMAGE = tf.constant([0.4914, 0.4822, 0.4465], dtype=tf.float32)
        STD_IMAGE = tf.constant([0.2023, 0.1994, 0.2010], dtype=tf.float32)
        image = ((image / 255.0) - MEAN_IMAGE) / STD_IMAGE
        image = tf.transpose(image, [0, 3, 1, 2])

        pytorch_default_init = tf.variance_scaling_initializer(scale=1.0 / 3, mode='fan_in', distribution='uniform')
        with argscope([Conv2D, BatchNorm, GlobalAvgPooling], data_format='channels_first'), \
                argscope(Conv2D, kernel_initializer=pytorch_default_init):
            net = Conv2D('conv0', image, 64, kernel_size=3, strides=1, use_bias=False)
            for i, blocks_in_module in enumerate(MODULE_SIZES):
                for j in range(blocks_in_module):
                    stride = 2 if j == 0 and i > 0 else 1
                    with tf.variable_scope("res%d.%d" % (i, j)):
                        net = preactivation_block(net, FILTER_SIZES[i], stride)
            net = GlobalAvgPooling('gap', net)
            logits = FullyConnected('linear', net, CLASS_NUM,
                                    kernel_initializer=tf.random_normal_initializer(stddev=1e-3))

        ce_cost = tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits)
        ce_cost = tf.reduce_mean(ce_cost, name='cross_entropy_loss')

        single_label = tf.to_int32(tf.argmax(label, axis=1))
        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, single_label, 1)), name='wrong_vector')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'), ce_cost)
        add_param_summary(('.*/W', ['histogram']))

        # weight decay on all W matrixes. including convolutional layers
        wd_cost = tf.multiply(WEIGHT_DECAY, regularize_cost('.*', tf.nn.l2_loss), name='wd_cost')

        return tf.add_n([ce_cost, wd_cost], name='cost')
Beispiel #13
0
    def build_graph(self, image, label):
        xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE)
                        for x in range(WARP_TARGET_SIZE)], dtype='float32')
        xys = tf.constant(xys, dtype=tf.float32, name='xys')    # p x 3

        image = image / 255.0 - 0.5  # bhw2

        def get_stn(image):
            stn = (LinearWrap(image)
                   .AvgPooling('downsample', 2)
                   .Conv2D('conv0', 20, 5, padding='VALID')
                   .MaxPooling('pool0', 2)
                   .Conv2D('conv1', 20, 5, padding='VALID')
                   .FullyConnected('fc1', 32)
                   .FullyConnected('fct', 6, activation=tf.identity,
                                   kernel_initializer=tf.constant_initializer(),
                                   bias_initializer=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))())
            # output 6 parameters for affine transformation
            stn = tf.reshape(stn, [-1, 2, 3], name='affine')  # bx2x3
            stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1])  # 3 x (bx2)
            coor = tf.reshape(tf.matmul(xys, stn),
                              [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
            coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords')  # b h w 2
            sampled = BilinearSample('warp', [image, coor], borderMode='constant')
            return sampled

        with argscope([Conv2D, FullyConnected], activation=tf.nn.relu):
            with tf.variable_scope('STN1'):
                sampled1 = get_stn(image)
            with tf.variable_scope('STN2'):
                sampled2 = get_stn(image)

        # For visualization in tensorboard
        with tf.name_scope('visualization'):
            padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
            padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
            img_orig = tf.concat([image[:, :, :, 0], image[:, :, :, 1]], 1)  # b x 2h  x w
            transform1 = tf.concat([padded1[:, :, :, 0], padded1[:, :, :, 1]], 1)
            transform2 = tf.concat([padded2[:, :, :, 0], padded2[:, :, :, 1]], 1)
            stacked = tf.concat([img_orig, transform1, transform2], 2, 'viz')
            tf.summary.image('visualize',
                             tf.expand_dims(stacked, -1), max_outputs=30)

        sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat')
        logits = (LinearWrap(sampled)
                  .FullyConnected('fc1', 256, activation=tf.nn.relu)
                  .FullyConnected('fc2', 128, activation=tf.nn.relu)
                  .FullyConnected('fct', 19, activation=tf.identity)())
        tf.nn.softmax(logits, name='prob')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='incorrect_vector')
        summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        summary.add_moving_summary(cost, wd_cost)
        return tf.add_n([wd_cost, cost], name='cost')
Beispiel #14
0
    def next_state(self, loop_state: LoopState) -> Tuple[tf.Tensor, Any, Any]:
        feedables = loop_state.feedables
        tr_feedables = feedables.other
        tr_histories = loop_state.histories.other

        with tf.variable_scope(self._variable_scope, reuse=tf.AUTO_REUSE):
            # shape (time, batch)
            input_sequence = append_tensor(
                tr_feedables.input_sequence, feedables.embedded_input, 1)

            unfinished_mask = tf.to_float(tf.logical_not(feedables.finished))
            input_mask = append_tensor(
                tr_feedables.input_mask,
                tf.expand_dims(unfinished_mask, -1),
                axis=1)

            last_layer = self.layer(
                self.depth, input_sequence, tf.squeeze(input_mask, -1))

            # (batch, state_size)
            output_state = last_layer.temporal_states[:, -1, :]

        new_feedables = TransformerFeedables(
            input_sequence=input_sequence,
            input_mask=input_mask)

        # TODO: do something more interesting here
        new_histories = tr_histories

        return (output_state, new_feedables, new_histories)
Beispiel #15
0
def prune_completely_outside_window(boxlist, window, scope=None):
  """Prunes bounding boxes that fall completely outside of the given window.

  The function clip_to_window prunes bounding boxes that fall
  completely outside the window, but also clips any bounding boxes that
  partially overflow. This function does not clip partially overflowing boxes.

  Args:
    boxlist: a BoxList holding M_in boxes.
    window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
      of the window
    scope: name scope.

  Returns:
    pruned_boxlist: a new BoxList with all bounding boxes partially or fully in
      the window.
    valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
     in the input tensor.
  """
  with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
    y_min, x_min, y_max, x_max = tf.split(
        value=boxlist.get(), num_or_size_splits=4, axis=1)
    win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
    coordinate_violations = tf.concat([
        tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max),
        tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min)
    ], 1)
    valid_indices = tf.reshape(
        tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
    return gather(boxlist, valid_indices), valid_indices
def _has_foreground_and_background_in_first_frame(label, subsampling_factor):
  """Checks if the labels have foreground and background in the first frame.

  Args:
    label: Label tensor of shape [num_frames, height, width, 1].
    subsampling_factor: Integer, the subsampling factor.

  Returns:
    Boolean, whether the labels have foreground and background in the first
      frame.
  """
  h, w = train_utils.resolve_shape(label)[1:3]
  label_downscaled = tf.squeeze(
      tf.image.resize_nearest_neighbor(label[0, tf.newaxis],
                                       [h // subsampling_factor,
                                        w // subsampling_factor],
                                       align_corners=True),
      axis=0)
  is_bg = tf.equal(label_downscaled, 0)
  is_fg = tf.logical_not(is_bg)
  # Just using reduce_any was not robust enough, so lets make sure the count
  # is above MIN_LABEL_COUNT.
  fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32))
  bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32))
  has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT)
  has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT)
  return tf.logical_and(has_bg, has_fg)
    def loop_continue_criterion(self, *args) -> tf.Tensor:
        """Decide whether to break out of the while loop.

        The criterion for stopping the loop is that either all hypotheses are
        finished or a maximum number of steps has been reached. Here the number
        of steps is the number of steps of the underlying decoder minus one,
        because this function is evaluated after the decoder step has been
        called and its step has been incremented. This is caused by the fact
        that we call the decoder body function at the end of the beam body
        function. (And that, in turn, is to support ensembling.)

        Arguments:
            args: A ``BeamSearchLoopState`` instance.

        Returns:
            A scalar boolean ``Tensor``.
        """
        loop_state = BeamSearchLoopState(*args)

        beam_step = loop_state.decoder_loop_state.feedables.step - 1
        finished = loop_state.search_state.finished

        max_step_cond = tf.less(beam_step, self.max_steps)
        unfinished_cond = tf.logical_not(tf.reduce_all(finished))

        return tf.logical_and(max_step_cond, unfinished_cond)
Beispiel #18
0
  def _match(self, similarity_matrix, valid_rows):
    """Bipartite matches a collection rows and columns. A greedy bi-partite.

    TODO(rathodv): Add num_valid_columns options to match only that many columns
    with all the rows.

    Args:
      similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
        where higher values mean more similar.
      valid_rows: A boolean tensor of shape [N] indicating the rows that are
        valid.

    Returns:
      match_results: int32 tensor of shape [M] with match_results[i]=-1
        meaning that column i is not matched and otherwise that it is matched to
        row match_results[i].
    """
    valid_row_sim_matrix = tf.gather(similarity_matrix,
                                     tf.squeeze(tf.where(valid_rows), axis=-1))
    invalid_row_sim_matrix = tf.gather(
        similarity_matrix,
        tf.squeeze(tf.where(tf.logical_not(valid_rows)), axis=-1))
    similarity_matrix = tf.concat(
        [valid_row_sim_matrix, invalid_row_sim_matrix], axis=0)
    # Convert similarity matrix to distance matrix as tf.image.bipartite tries
    # to find minimum distance matches.
    distance_matrix = -1 * similarity_matrix
    num_valid_rows = tf.reduce_sum(tf.to_float(valid_rows))
    _, match_results = image_ops.bipartite_match(
        distance_matrix, num_valid_rows=num_valid_rows)
    match_results = tf.reshape(match_results, [-1])
    match_results = tf.cast(match_results, tf.int32)
    return match_results
def _has_foreground_and_background_in_first_frame_2(label,
                                                    decoder_output_stride):
  """Checks if the labels have foreground and background in the first frame.

  Second attempt, this time we use the actual output dimension for resizing.

  Args:
    label: Label tensor of shape [num_frames, height, width, 1].
    decoder_output_stride: Integer, the stride of the decoder output.

  Returns:
    Boolean, whether the labels have foreground and background in the first
      frame.
  """
  h, w = train_utils.resolve_shape(label)[1:3]
  h_sub = model.scale_dimension(h, 1.0 / decoder_output_stride)
  w_sub = model.scale_dimension(w, 1.0 / decoder_output_stride)
  label_downscaled = tf.squeeze(
      tf.image.resize_nearest_neighbor(label[0, tf.newaxis], [h_sub, w_sub],
                                       align_corners=True), axis=0)
  is_bg = tf.equal(label_downscaled, 0)
  is_fg = tf.logical_not(is_bg)
  # Just using reduce_any was not robust enough, so lets make sure the count
  # is above MIN_LABEL_COUNT.
  fg_count = tf.reduce_sum(tf.cast(is_fg, tf.int32))
  bg_count = tf.reduce_sum(tf.cast(is_bg, tf.int32))
  has_bg = tf.greater_equal(fg_count, MIN_LABEL_COUNT)
  has_fg = tf.greater_equal(bg_count, MIN_LABEL_COUNT)
  return tf.logical_and(has_bg, has_fg)
    def __loss__(self):
        """
        Calculate loss
        :return:
        """
        # regularization ?

        self.d_loss_real = tf.reduce_mean(ops.binary_cross_entropy(preds=self.predict_d, targets=tf.ones_like(self.predict_d)))
            # tf.nn.sigmoid_cross_entropy_with_logits(logits=self.predict_d_logits,
            #                                         labels=tf.ones_like(self.predict_d)))

        tf.summary.scalar('d_loss_real', self.d_loss_real, collections='D')

        self.d_loss_fake = tf.reduce_mean(ops.binary_cross_entropy(preds=self.predict_d_for_g, targets=tf.zeros_like(self.predict_d_for_g)))
            # tf.nn.sigmoid_cross_entropy_with_logits(logits=self.predict_d_logits_for_g,
            #                                         labels=tf.zeros_like(self.predict_d_for_g)))

        tf.summary.scalar('d_loss_fake', self.d_loss_fake, collections='D')

        self.d_loss = self.d_loss_real + self.d_loss_fake
        tf.summary.scalar('d_loss', self.d_loss, collections='D')

        if len(self.regularization_values_d) > 0:
            reg_loss_d = self.reg_w * tf.reduce_sum(self.regularization_values_d)
            self.d_loss += reg_loss_d
            if self.FLAGS.dump_debug:
                tf.summary.scalar('d_loss_plus_reg', self.d_loss, collections='D')
                tf.summary.scalar('d_loss_reg_only', reg_loss_d, collections='D')

        # Generative loss
        g_loss = tf.reduce_mean(ops.binary_cross_entropy(preds=self.predict_d_for_g, targets=tf.ones_like(self.predict_d_for_g)))
            # tf.nn.sigmoid_cross_entropy_with_logits(logits=self.predict_d_logits_for_g,
            #                                         labels=tf.ones_like(self.predict_d_for_g)))

        tf.summary.scalar('g_loss', g_loss, collections='G')

        # Context loss
        mask_not = tf.cast(tf.logical_not(tf.cast(self.labels['mask'], tf.bool)), tf.float32)
        real_diff = tf.contrib.layers.flatten(tf.multiply(self.predict_g['real'] - self.labels['real'], mask_not))
        imag_diff = tf.contrib.layers.flatten(tf.multiply(self.predict_g['imag'] - self.labels['imag'], mask_not))

        # real_diff = tf.multiply(tf.squeeze(self.predict_g['real']) - tf.squeeze(self.labels['real']), tf.squeeze(self.labels['mask']))
        # imag_diff = tf.multiply(tf.squeeze(self.predict_g['imag']) - tf.squeeze(self.labels['imag']), tf.squeeze(self.labels['mask']))

        self.context_loss = tf.reduce_mean(tf.square(real_diff) + tf.square(imag_diff), name='Context_loss_mean')

        tf.summary.scalar('g_loss_context_only', self.context_loss, collections='G')

        self.g_loss = self.adv_loss_w * g_loss + self.FLAGS.gen_loss_context * self.context_loss
        # self.g_loss = self.FLAGS.gen_loss_adversarial * g_loss + self.FLAGS.gen_loss_context * context_loss
        tf.summary.scalar('g_loss_plus_context', self.g_loss, collections='G')

        if len(self.regularization_values) > 0:
            reg_loss_g = self.reg_w * tf.reduce_sum(self.regularization_values)
            self.g_loss += reg_loss_g
            if self.FLAGS.dump_debug:
                tf.summary.scalar('g_loss_plus_context_plus_reg', self.g_loss, collections='G')
                tf.summary.scalar('g_loss_reg_only', reg_loss_g, collections='D')

        tf.summary.scalar('diff-loss', tf.abs(self.d_loss - self.g_loss), collections='G')
Beispiel #21
0
def filter_groundtruth_with_nan_box_coordinates(tensor_dict):
  """Filters out groundtruth with no bounding boxes.

  Args:
    tensor_dict: a dictionary of following groundtruth tensors -
      fields.InputDataFields.groundtruth_boxes
      fields.InputDataFields.groundtruth_classes
      fields.InputDataFields.groundtruth_confidences
      fields.InputDataFields.groundtruth_keypoints
      fields.InputDataFields.groundtruth_instance_masks
      fields.InputDataFields.groundtruth_is_crowd
      fields.InputDataFields.groundtruth_area
      fields.InputDataFields.groundtruth_label_types

  Returns:
    a dictionary of tensors containing only the groundtruth that have bounding
    boxes.
  """
  groundtruth_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
  nan_indicator_vector = tf.greater(tf.reduce_sum(tf.to_int32(
      tf.is_nan(groundtruth_boxes)), reduction_indices=[1]), 0)
  valid_indicator_vector = tf.logical_not(nan_indicator_vector)
  valid_indices = tf.where(valid_indicator_vector)

  return retain_groundtruth(tensor_dict, valid_indices)
Beispiel #22
0
def prediction_incorrect(logits, label, topk=1):
    """
    :param logits: NxC
    :param label: N
    :returns: a float32 vector of length N with 0/1 values, 1 meaning incorrect prediction
    """
    return tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, topk)), tf.float32)
Beispiel #23
0
def IoU(bbox, gt):

    # bbox = [ x , y , w , h ] ( x , y  left up)

    shape = [-1, 1]

    x1 = tf.maximum(tf.cast(bbox[0], tf.float32), tf.reshape(tf.cast(gt[:,0], tf.float32), shape))
    y1 = tf.maximum(tf.cast(bbox[1], tf.float32), tf.reshape(tf.cast(gt[:,1], tf.float32), shape))
    x2 = tf.minimum(tf.cast(bbox[2] + bbox[0], tf.float32), tf.reshape(tf.cast(gt[:,2] + gt[:,0], tf.float32), shape))
    y2 = tf.minimum(tf.cast(bbox[3] + bbox[1], tf.float32), tf.reshape(tf.cast(gt[:,3] + gt[:,1], tf.float32), shape))


    inter_w = tf.sub(x2,x1)

    inter_h = tf.sub(y2,y1)

    inter = tf.cast(inter_w * inter_h, tf.float32)

    bounding_box = tf.cast(tf.mul(bbox[2],bbox[3]), tf.float32)

    ground_truth = tf.reshape(tf.cast(tf.mul(gt[:,2],gt[:,3]), tf.float32), shape)

    #iou = tf.div(inter,tf.sub(tf.add(bounding_box,tf.reshape(ground_truth,shape)),inter))

    iou = inter / (bounding_box + ground_truth - inter)

    # limit the iou range between 0 and 1
    
    mask_less = tf.cast(tf.logical_not(tf.less(iou, tf.zeros_like(iou))), tf.float32)
    #mask_great = tf.cast(tf.logical_not(tf.greater(iou, tf.ones_like(iou))), tf.float32)
    
    iou = tf.mul(iou, mask_less)
    #iou = tf.mul(iou, positive_mask)
    
    return iou
  def _inverse_log_det_jacobian(self, y, use_saved_statistics=False):
    if not self.batchnorm.built:
      # Create variables.
      self.batchnorm.build(y.shape)

    event_dims = self.batchnorm.axis
    reduction_axes = [i for i in range(len(y.shape)) if i not in event_dims]

    # At training-time, ildj is computed from the mean and log-variance across
    # the current minibatch.
    # We use multiplication instead of tf.where() to get easier broadcasting.
    use_saved_statistics = tf.cast(
        tf.logical_or(use_saved_statistics, tf.logical_not(self._training)),
        tf.float32)
    log_variance = tf.log(
        (1 - use_saved_statistics) * tf.nn.moments(y, axes=reduction_axes,
                                                   keep_dims=True)[1]
        + use_saved_statistics * self.batchnorm.moving_variance
        + self.batchnorm.epsilon)

    # `gamma` and `log Var(y)` reductions over event_dims.
    # Log(total change in area from gamma term).
    log_total_gamma = tf.reduce_sum(tf.log(self.batchnorm.gamma))

    # Log(total change in area from log-variance term).
    log_total_variance = tf.reduce_sum(log_variance)
    # The ildj is scalar, as it does not depend on the values of x and are
    # constant across minibatch elements.
    return log_total_gamma - 0.5 * log_total_variance
Beispiel #25
0
def aggregate_single_gradient(grad_and_vars, use_mean, check_inf_nan):
    """Calculate the average gradient for a shared variable across all towers.

  Note that this function provides a synchronization point across all towers.

  Args:
    grad_and_vars: A list or tuple of (gradient, variable) tuples. Each
      (gradient, variable) pair within the outer list represents the gradient
      of the variable calculated for a single tower, and the number of pairs
      equals the number of towers.
    use_mean: if True, mean is taken, else sum of gradients is taken.
    check_inf_nan: check grads for nans and infs.

  Returns:
    The tuple ([(average_gradient, variable),], has_nan_or_inf) where the
      gradient has been averaged across all towers. The variable is chosen from
      the first tower. The has_nan_or_inf indicates the grads has nan or inf.
  """
    grads = [g for g, _ in grad_and_vars]
    grad = tf.add_n(grads)

    if use_mean and len(grads) > 1:
        grad = tf.multiply(grad, 1.0 / len(grads))

    v = grad_and_vars[0][1]
    if check_inf_nan:
        has_nan_or_inf = tf.logical_not(tf.reduce_all(tf.is_finite(grads)))
        return (grad, v), has_nan_or_inf
    else:
        return (grad, v), None
Beispiel #26
0
def read_record(filename_queue):
    class FCNRecord(object):
        pass
    result = FCNRecord()
    result.mask_height = int(420/DOWNSAMPLE_FACTOR)
    result.mask_width = int(580/DOWNSAMPLE_FACTOR)
    result.mask_depth = 1
    result.img_depth = 1
    img_len = result.mask_height*result.mask_width*result.img_depth
    mask_len = result.mask_height*result.mask_width*result.mask_depth
    record_len = img_len + mask_len

    reader = tf.FixedLengthRecordReader(record_bytes=record_len)
    result.key, value = reader.read(filename_queue)
    record_bytes = tf.decode_raw(value, tf.uint8)
    #print(record_bytes.get_shape())
    int_image = tf.reshape(tf.slice(record_bytes, [0], [img_len]),[result.mask_height, result.mask_width])
    rgb_image = tf.pack([int_image,int_image,int_image])
    rgb_img = tf.transpose(rgb_image,(1,2,0))
    result.image = tf.cast(rgb_img,tf.float32)
    bool_mask = tf.cast( tf.reshape(tf.slice(record_bytes, [img_len], [mask_len]),[result.mask_height, result.mask_width]), tf.bool)
    hot_mask= tf.pack( [bool_mask, tf.logical_not(bool_mask)])
    h_mask = tf.transpose(hot_mask,(1,2,0))
    result.mask = tf.cast(h_mask, tf.float32)
    return result
def tf_format_mnist_images(X, Y, Y_, n=100, lines=10):
    correct_prediction = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1))
    correctly_recognised_indices = tf.squeeze(tf.where(correct_prediction), [1])  # indices of correctly recognised images
    incorrectly_recognised_indices = tf.squeeze(tf.where(tf.logical_not(correct_prediction)), [1]) # indices of incorrectly recognised images
    everything_incorrect_first = tf.concat([incorrectly_recognised_indices, correctly_recognised_indices], 0) # images reordered with indeces of unrecognised images first
    everything_incorrect_first = tf.slice(everything_incorrect_first, [0], [n]) # compute first 100 only - no space to display more anyway
    # compute n=100 digits to display only
    Xs = tf.gather(X, everything_incorrect_first)
    Ys = tf.gather(Y, everything_incorrect_first)
    Ys_ = tf.gather(Y_, everything_incorrect_first)
    correct_prediction_s = tf.gather(correct_prediction, everything_incorrect_first)

    digits_left = tf.image.grayscale_to_rgb(tensorflowvisu_digits.digits_left())
    correct_tags = tf.gather(digits_left, tf.argmax(Ys_, 1)) # correct digits to be printed on the images
    digits_right = tf.image.grayscale_to_rgb(tensorflowvisu_digits.digits_right())
    computed_tags = tf.gather(digits_right, tf.argmax(Ys, 1)) # computed digits to be printed on the images
    #superimposed_digits = correct_tags+computed_tags
    superimposed_digits = tf.where(correct_prediction_s, tf.zeros_like(correct_tags),correct_tags+computed_tags) # only pring the correct and computed digits on unrecognised images
    correct_bkg   = tf.reshape(tf.tile([1.3,1.3,1.3], [28*28]), [1, 28,28,3]) # white background
    incorrect_bkg = tf.reshape(tf.tile([1.3,1.0,1.0], [28*28]), [1, 28,28,3]) # red background
    recognised_bkg = tf.gather(tf.concat([incorrect_bkg, correct_bkg], 0), tf.cast(correct_prediction_s, tf.int32)) # pick either the red or the white background depending on recognised status

    I = tf.image.grayscale_to_rgb(Xs)
    I = ((1-(I+superimposed_digits))*recognised_bkg)/1.3 # stencil extra data on top of images and reorder them unrecognised first
    I = tf.image.convert_image_dtype(I, tf.uint8, saturate=True)
    Islices = [] # 100 images => 10x10 image block
    for imslice in range(lines):
        Islices.append(tf.concat(tf.unstack(tf.slice(I, [imslice*n//lines,0,0,0], [n//lines,28,28,3])), 1))
    I = tf.concat(Islices, 0)
    return I
def kl_divergence(distribution_a, distribution_b,
                  allow_nan_stats=True, name=None):
  """Get the KL-divergence KL(distribution_a || distribution_b).

  If there is no KL method registered specifically for `type(distribution_a)`
  and `type(distribution_b)`, then the class hierarchies of these types are
  searched.

  If one KL method is registered between any pairs of classes in these two
  parent hierarchies, it is used.

  If more than one such registered method exists, the method whose registered
  classes have the shortest sum MRO paths to the input types is used.

  If more than one such shortest path exists, the first method
  identified in the search is used (favoring a shorter MRO distance to
  `type(distribution_a)`).

  Args:
    distribution_a: The first distribution.
    distribution_b: The second distribution.
    allow_nan_stats: Python `bool`, default `True`. When `True`,
      statistics (e.g., mean, mode, variance) use the value "`NaN`" to
      indicate the result is undefined. When `False`, an exception is raised
      if one or more of the statistic's batch members are undefined.
    name: Python `str` name prefixed to Ops created by this class.

  Returns:
    A Tensor with the batchwise KL-divergence between `distribution_a`
    and `distribution_b`.

  Raises:
    NotImplementedError: If no KL method is defined for distribution types
      of `distribution_a` and `distribution_b`.
  """
  kl_fn = _registered_kl(type(distribution_a), type(distribution_b))
  if kl_fn is None:
    # TODO(b/117098119): For backwards compatibility, we check TF's registry as
    # well. This typically happens when this function is called on a pair of
    # TF's distributions.
    with deprecation.silence():
      return tf.distributions.kl_divergence(distribution_a, distribution_b)

  with tf.name_scope("KullbackLeibler"):
    kl_t = kl_fn(distribution_a, distribution_b, name=name)
    if allow_nan_stats:
      return kl_t

    # Check KL for NaNs
    kl_t = tf.identity(kl_t, name="kl")

    with tf.control_dependencies([
        tf.Assert(
            tf.logical_not(
                tf.reduce_any(tf.is_nan(kl_t))),
            ["KL calculation between %s and %s returned NaN values "
             "(and was called with allow_nan_stats=False). Values:"
             % (distribution_a.name, distribution_b.name), kl_t])]):
      return tf.identity(kl_t, name="checked_kl")
Beispiel #29
0
 def _not(self, x, use_gpu=False):
   np_ans = np.logical_not(x)
   with self.test_session(use_gpu=use_gpu):
     out = tf.logical_not(tf.convert_to_tensor(x))
     tf_val = out.eval()
   self.assertEqual(out.dtype, tf.bool)
   self.assertAllEqual(np_ans, tf_val)
   self.assertShapeEqual(np_ans, out)
def lamb_func(logit, logic, lamb):
    logit_pos = tf.boolean_mask(logit, logic)
    logit_neg = tf.boolean_mask(logit, tf.logical_not(logic))
    logit_neg_exp = tf.exp(logit_neg * lamb)
    z = tf.reduce_mean(logit_neg_exp)
    left = tf.truediv(tf.reduce_mean(logit_neg * logit_neg_exp), z)
    right = tf.reduce_mean(logit_pos)
    return left, right
Beispiel #31
0
    def buildModel(self, inputShape, inMatFilename):
        if (inMatFilename):
            npWeights = loadWeights(inMatFilename)

        #Running on GPU
        with tf.device('gpu:0'):
            with tf.name_scope("inputOps"):
                #Get convolution variables as placeholders
                self.inputImage = node_variable(
                    [None, inputShape[0], inputShape[1], inputShape[2]],
                    "inputImage")
                self.gt = node_variable([None, 2], "gt")
                #Model variables for convolutions

            with tf.name_scope("Conv1Ops"):
                if (inMatFilename):
                    self.W_conv1 = weight_variable_fromnp(
                        npWeights["conv1_w"], "w_conv1")
                    self.B_conv1 = weight_variable_fromnp(
                        npWeights["conv1_b"], "b_conv1")
                else:
                    self.W_conv1 = weight_variable_fromnp(
                        np.zeros((11, 11, 3, 64), dtype=np.float32), "w_conv1")
                    self.B_conv1 = weight_variable_fromnp(
                        np.zeros((64), dtype=np.float32), "b_conv1")
                    #self.W_conv1 = weight_variable_xavier([11, 11, 3, 64], "w_conv1", conv=True)
                    #self.B_conv1 = bias_variable([64], "b_conv1")
                self.h_conv1 = tf.nn.relu(
                    conv2d(self.inputImage,
                           self.W_conv1,
                           "conv1",
                           stride=[1, 4, 4, 1]) + self.B_conv1)
                self.h_norm1 = tf.nn.local_response_normalization(self.h_conv1,
                                                                  name="LRN1")
                self.h_pool1 = maxpool_2x2(self.h_norm1, "pool1")

            with tf.name_scope("Conv2Ops"):
                if (inMatFilename):
                    self.W_conv2 = weight_variable_fromnp(
                        npWeights["conv2_w"], "w_conv2")
                    self.B_conv2 = weight_variable_fromnp(
                        npWeights["conv2_b"], "b_conv2")
                else:
                    self.W_conv2 = weight_variable_fromnp(
                        np.zeros((5, 5, 64, 256), dtype=np.float32), "w_conv2")
                    self.B_conv2 = weight_variable_fromnp(
                        np.zeros((256), dtype=np.float32), "b_conv2")
                    #self.W_conv2 = weight_variable_xavier([5, 5, 64, 256], "w_conv2", conv=True)
                    #self.B_conv2 = bias_variable([256], "b_conv2")
                self.h_conv2 = tf.nn.relu(
                    conv2d(self.h_pool1, self.W_conv2, "conv2") + self.B_conv2)
                self.h_norm2 = tf.nn.local_response_normalization(self.h_conv2,
                                                                  name="LRN2")
                self.h_pool2 = maxpool_2x2(self.h_norm2, "pool2")

            with tf.name_scope("Conv3Ops"):
                if (inMatFilename):
                    self.W_conv3 = weight_variable_fromnp(
                        npWeights["conv3_w"], "w_conv3")
                    self.B_conv3 = weight_variable_fromnp(
                        npWeights["conv3_b"], "b_conv3")
                else:
                    self.W_conv3 = weight_variable_fromnp(
                        np.zeros((3, 3, 256, 256), dtype=np.float32),
                        "w_conv3")
                    self.B_conv3 = weight_variable_fromnp(
                        np.zeros((256), dtype=np.float32), "b_conv3")
                    #self.W_conv3 = weight_variable_xavier([3, 3, 256, 256], "w_conv3", conv=True)
                    #self.B_conv3 = bias_variable([256], "b_conv3")
                self.h_conv3 = tf.nn.relu(
                    conv2d(self.h_pool2, self.W_conv3, "conv3") + self.B_conv3,
                    name="relu3")

            with tf.name_scope("Conv4Ops"):
                if (inMatFilename):
                    self.W_conv4 = weight_variable_fromnp(
                        npWeights["conv4_w"], "w_conv4")
                    self.B_conv4 = weight_variable_fromnp(
                        npWeights["conv4_b"], "b_conv4")
                else:
                    self.W_conv4 = weight_variable_fromnp(
                        np.zeros((3, 3, 256, 256), dtype=np.float32),
                        "w_conv4")
                    self.B_conv4 = weight_variable_fromnp(
                        np.zeros((256), dtype=np.float32), "b_conv4")
                    #self.W_conv4 = weight_variable_xavier([3, 3, 256, 256], "w_conv4", conv=True)
                    #self.B_conv4 = bias_variable([256], "b_conv4")
                self.h_conv4 = tf.nn.relu(
                    conv2d(self.h_conv3, self.W_conv4, "conv4") + self.B_conv4,
                    name="relu4")

            with tf.name_scope("Conv5Ops"):
                if (inMatFilename):
                    self.W_conv5 = weight_variable_fromnp(
                        npWeights["conv5_w"], "w_conv5")
                    self.B_conv5 = weight_variable_fromnp(
                        npWeights["conv5_b"], "b_conv5")
                else:
                    self.W_conv5 = weight_variable_fromnp(
                        np.zeros((3, 3, 256, 256), dtype=np.float32),
                        "w_conv5")
                    self.B_conv5 = weight_variable_fromnp(
                        np.zeros((256), dtype=np.float32), "b_conv5")
                    #self.W_conv5 = weight_variable_xavier([3, 3, 256, 256], "w_conv5", conv=True)
                    #self.B_conv5 = bias_variable([256], "b_conv5")
                self.h_conv5 = tf.nn.relu(
                    conv2d(self.h_conv4, self.W_conv5, "conv5") + self.B_conv5)
                self.h_pool5 = maxpool_2x2(self.h_conv5, "pool5")

            #placeholder for specifying dropout
            self.keep_prob = tf.placeholder(tf.float32)

            #32 comes from 4 stride in conv1, 2 stride in pool1, 2 stride in pool2, 2 stride in pool5
            numInputs = (inputShape[0] / 32) * (inputShape[1] / 32) * 256
            with tf.name_scope("FC1"):
                #if(inMatFilename):
                #    self.W_conv5 = weight_variable_fromnp(npWeights["fc1_w"], "w_fc1")
                #    self.B_conv5 = weight_variable_fromnp(npWeights["fc1_b"], "b_fc1")
                #else:
                #    self.W_conv5 = weight_variable_fromnp(np.zeros((6*6*256, 4096), dtype=np.float32), "w_fc1")
                #    self.B_conv5 = weight_variable_fromnp(np.zeros((4096), dtype = np.float32), "b_fc1")
                self.W_fc1 = weight_variable_xavier([numInputs, 4096], "w_fc1")
                self.B_fc1 = bias_variable([4096], "b_fc1")
                h_pool5_flat = tf.reshape(self.h_pool5, [-1, numInputs],
                                          name="pool5_flat")
                self.h_fc1 = tf.nn.relu(
                    tf.matmul(h_pool5_flat, self.W_fc1, name="fc1") +
                    self.B_fc1, "fc1_relu")
                self.h_fc1_drop = tf.nn.dropout(self.h_fc1, self.keep_prob)

            with tf.name_scope("FC2"):
                #if(inMatFilename):
                #    self.W_conv5 = weight_variable_fromnp(npWeights["fc2_w"], "w_fc2")
                #    self.B_conv5 = weight_variable_fromnp(npWeights["fc2_b"], "b_fc2")
                #else:
                #    self.W_conv5 = weight_variable_fromnp(np.zeros((4096, 4096), dtype=np.float32), "w_fc2")
                #    self.B_conv5 = weight_variable_fromnp(np.zeros((4096), dtype = np.float32), "b_fc2")
                self.W_fc2 = weight_variable_xavier([4096, 4096], "w_fc2")
                self.B_fc2 = bias_variable([4096], "b_fc2")
                self.h_fc2 = tf.nn.relu(
                    tf.matmul(self.h_fc1_drop, self.W_fc2, name="fc2") +
                    self.B_fc2, "fc2_relu")
                self.h_fc2_drop = tf.nn.dropout(self.h_fc2, self.keep_prob)

            #fc3 should have 16 channels
            #fc3 also uses a sigmoid function
            #We change it to tanh
            with tf.name_scope("FC3"):
                #if(inMatFilename):
                #    self.W_conv5 = weight_variable_fromnp(npWeights["fc3_w"], "w_fc3")
                #    self.B_conv5 = weight_variable_fromnp(npWeights["fc3_b"], "b_fc3")
                #else:
                #    self.W_conv5 = weight_variable_fromnp(np.zeros((4096, 2), dtype=np.float32), "w_fc3")
                #    self.B_conv5 = weight_variable_fromnp(np.zeros((2), dtype = np.float32), "b_fc3")
                self.W_fc3 = weight_variable_xavier([4096, 2], "w_fc3")
                self.B_fc3 = bias_variable([2], "b_fc3")
                self.est = tf.nn.softmax(
                    tf.matmul(self.h_fc2_drop, self.W_fc3, name="fc3") +
                    self.B_fc3, "fc3_softmax")

            with tf.name_scope("Loss"):
                #Define loss
                #self.loss = tf.reduce_mean(-tf.reduce_sum(self.gt * tf.log(self.est), reduction_indices=[1]))
                self.loss = tf.reduce_mean(
                    -(self.gt[:, 1] * .8 * tf.log(self.est[:, 1]) +
                      self.gt[:, 0] * .2 * tf.log(self.est[:, 0])))

            with tf.name_scope("Opt"):
                #Define optimizer
                #self.optimizerAll = tf.train.AdagradOptimizer(self.learningRate).minimize(self.loss)
                #self.optimizerFC = tf.train.AdagradOptimizer(self.learningRate).minimize(self.loss,
                self.optimizerAll = tf.train.AdamOptimizer(
                    self.learningRate).minimize(self.loss)
                self.optimizerFC = tf.train.AdamOptimizer(
                    self.learningRate).minimize(self.loss,
                                                var_list=[
                                                    self.W_fc1, self.B_fc1,
                                                    self.W_fc2, self.B_fc2,
                                                    self.W_fc3, self.B_fc3
                                                ])

            with tf.name_scope("Metric"):
                self.gtIdx = tf.argmax(self.gt, 1)
                self.estIdx = tf.argmax(self.est, 1)
                boolGtIdx = tf.cast(self.gtIdx, tf.bool)
                boolEstIdx = tf.cast(self.estIdx, tf.bool)

                #Logical and for true positive
                lAnd = tf.logical_and(boolGtIdx, boolEstIdx)
                self.tp = tf.reduce_sum(tf.cast(lAnd, tf.float32))
                #Logical nor for true negatives
                lNor = tf.logical_not(tf.logical_or(boolGtIdx, boolEstIdx))
                self.tn = tf.reduce_sum(tf.cast(lNor, tf.float32))

                #Subtraction and comparison for others
                lSub = self.gtIdx - self.estIdx
                Ones = tf.cast(tf.ones(tf.shape(lSub)), tf.int64)
                self.fn = tf.reduce_sum(
                    tf.cast(tf.equal(lSub, Ones), tf.float32))
                self.fp = tf.reduce_sum(
                    tf.cast(tf.equal(lSub, -Ones), tf.float32))

                #Accuracy, precision, and recall calculations
                self.accuracy = (self.tp + self.tn) / (self.tp + self.tn +
                                                       self.fp + self.fn)
                self.precision = self.tp / (self.tp + self.fp)
                self.recall = self.tp / (self.tp + self.fn)

        #Summaries
        tf.scalar_summary('loss', self.loss, name="lossSum")
        tf.scalar_summary('accuracy', self.accuracy, name="accSum")
        tf.scalar_summary('precision', self.precision, name="precSum")
        tf.scalar_summary('recall', self.recall, name="recallSum")
        tf.scalar_summary('tp', self.tp, name="tp")
        tf.scalar_summary('fp', self.fp, name="fp")
        tf.scalar_summary('tn', self.tn, name="tn")
        tf.scalar_summary('fn', self.fn, name="fn")

        tf.histogram_summary('input', self.inputImage, name="image")
        tf.histogram_summary('gt', self.gt, name="gt")
        tf.histogram_summary('conv1', self.h_pool1, name="conv1")
        tf.histogram_summary('conv2', self.h_pool2, name="conv2")
        tf.histogram_summary('conv3', self.h_conv3, name="conv3")
        tf.histogram_summary('conv4', self.h_conv4, name="conv4")
        tf.histogram_summary('conv5', self.h_pool5, name="conv5")
        tf.histogram_summary('fc1', self.h_fc1, name="fc1")
        tf.histogram_summary('fc2', self.h_fc2, name="fc2")
        tf.histogram_summary('est', self.est, name="fc3")
        tf.histogram_summary('w_conv1', self.W_conv1, name="w_conv1")
        tf.histogram_summary('b_conv1', self.B_conv1, name="b_conv1")
        tf.histogram_summary('w_conv2', self.W_conv2, name="w_conv2")
        tf.histogram_summary('b_conv2', self.B_conv2, name="b_conv2")
        tf.histogram_summary('w_conv3', self.W_conv3, name="w_conv3")
        tf.histogram_summary('b_conv3', self.B_conv3, name="b_conv3")
        tf.histogram_summary('w_conv4', self.W_conv4, name="w_conv4")
        tf.histogram_summary('b_conv4', self.B_conv4, name="b_conv4")
        tf.histogram_summary('w_conv5', self.W_conv5, name="w_conv5")
        tf.histogram_summary('b_conv5', self.B_conv5, name="b_conv5")
        tf.histogram_summary('w_fc1', self.W_fc1, name="w_fc1")
        tf.histogram_summary('b_fc1', self.B_fc1, name="b_fc1")
        tf.histogram_summary('w_fc2', self.W_fc2, name="w_fc2")
        tf.histogram_summary('b_fc2', self.B_fc2, name="b_fc2")
        tf.histogram_summary('w_fc3', self.W_fc3, name="w_fc3")
        tf.histogram_summary('b_fc3', self.B_fc3, name="b_fc3")

        #Define saver
        self.saver = tf.train.Saver()
Beispiel #32
0
def ssd_decode_and_crop(image_buffer, boxes, classes, raw_shape):
    """Crop image randomly and decode the cropped region.

  This function will crop an image to meet the following requirements:
  1. height to width ratio between 0.5 and 2;
  2. IoUs of some boxes exceed specified threshold;
  3. At least one box center is in the cropped region.
  We defer the jpeg decoding task until after the crop to avoid wasted work.

  Reference: https://github.com/chauhan-utk/ssd.DomainAdaptation

  Args:
    image_buffer: Tensor tf.string containing the contents of a JPEG file.
    boxes: Tensor tf.float32 of shape [num_boxes, 4], containing coordinates of
      object bounding boxes.
    classes: Tensor tf.int64 of shape [num_boxes, 1], containing class labels
      of objects.
    raw_shape: [height, width, 3].

  Returns:
    resized_image: decoded, cropped, and resized image Tensor tf.float32 of
      shape [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE, 3], value
      range 0--255.
    cropped_boxes: box coordinates for objects in the cropped region.
    cropped_classes: class labels for objects in the cropped region.
  """

    num_boxes = tf.shape(boxes)[0]

    def no_crop_check():
        return (tf.random_uniform(
            shape=(), minval=0, maxval=1, dtype=tf.float32) <
                ssd_constants.P_NO_CROP_PER_PASS)

    def no_crop_proposal():
        return (
            tf.ones((), tf.bool),
            tf.convert_to_tensor([0, 0, 1, 1], dtype=tf.float32),
            tf.ones((num_boxes, ), tf.bool),
        )

    def crop_proposal():
        rand_vec = lambda minval, maxval: tf.random_uniform(shape=(
            ssd_constants.NUM_CROP_PASSES, 1),
                                                            minval=minval,
                                                            maxval=maxval,
                                                            dtype=tf.float32)

        width, height = rand_vec(0.3, 1), rand_vec(0.3, 1)
        left, top = rand_vec(0, 1 - width), rand_vec(0, 1 - height)

        right = left + width
        bottom = top + height

        ltrb = tf.concat([left, top, right, bottom], axis=1)

        min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0]
        ious = calc_iou_tensor(ltrb, boxes)

        # discard any bboxes whose center not in the cropped image
        xc, yc = [
            tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :],
                    (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2)
        ]

        masks = tf.reduce_all(tf.stack([
            tf.greater(xc, tf.tile(left, (1, num_boxes))),
            tf.less(xc, tf.tile(right, (1, num_boxes))),
            tf.greater(yc, tf.tile(top, (1, num_boxes))),
            tf.less(yc, tf.tile(bottom, (1, num_boxes))),
        ],
                                       axis=2),
                              axis=2)

        # Checks of whether a crop is valid.
        valid_aspect = tf.logical_and(tf.less(height / width, 2),
                                      tf.less(width / height, 2))
        valid_ious = tf.reduce_all(tf.greater(ious, min_iou),
                                   axis=1,
                                   keepdims=True)
        valid_masks = tf.reduce_any(masks, axis=1, keepdims=True)

        valid_all = tf.cast(
            tf.reduce_all(tf.concat([valid_aspect, valid_ious, valid_masks],
                                    axis=1),
                          axis=1), tf.int32)

        # One indexed, as zero is needed for the case of no matches.
        index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32)

        # Either one-hot, or zeros if there is no valid crop.
        selection = tf.equal(tf.reduce_max(index * valid_all), index)

        use_crop = tf.reduce_any(selection)
        output_ltrb = tf.reduce_sum(tf.multiply(
            ltrb,
            tf.tile(tf.cast(selection, tf.float32)[:, tf.newaxis], (1, 4))),
                                    axis=0)
        output_masks = tf.reduce_any(tf.logical_and(
            masks, tf.tile(selection[:, tf.newaxis], (1, num_boxes))),
                                     axis=0)

        return use_crop, output_ltrb, output_masks

    def proposal(*args):
        return tf.cond(
            pred=no_crop_check(),
            true_fn=no_crop_proposal,
            false_fn=crop_proposal,
        )

    _, crop_bounds, box_masks = tf.while_loop(
        cond=lambda x, *_: tf.logical_not(x),
        body=proposal,
        loop_vars=[
            tf.zeros((), tf.bool),
            tf.zeros((4, ), tf.float32),
            tf.zeros((num_boxes, ), tf.bool)
        ],
    )

    filtered_boxes = tf.boolean_mask(boxes, box_masks, axis=0)

    # Clip boxes to the cropped region.
    filtered_boxes = tf.stack([
        tf.maximum(filtered_boxes[:, 0], crop_bounds[0]),
        tf.maximum(filtered_boxes[:, 1], crop_bounds[1]),
        tf.minimum(filtered_boxes[:, 2], crop_bounds[2]),
        tf.minimum(filtered_boxes[:, 3], crop_bounds[3]),
    ],
                              axis=1)

    left = crop_bounds[0]
    top = crop_bounds[1]
    width = crop_bounds[2] - left
    height = crop_bounds[3] - top

    cropped_boxes = tf.stack([
        (filtered_boxes[:, 0] - left) / width,
        (filtered_boxes[:, 1] - top) / height,
        (filtered_boxes[:, 2] - left) / width,
        (filtered_boxes[:, 3] - top) / height,
    ],
                             axis=1)

    # crop_window containing integer coordinates of cropped region. A normalized
    # coordinate value of y should be mapped to the image coordinate at
    # y * (height - 1).
    raw_shape = tf.cast(raw_shape, tf.float32)
    crop_window = tf.stack([
        left * (raw_shape[0] - 1), top * (raw_shape[1] - 1),
        width * raw_shape[0], height * raw_shape[1]
    ])
    crop_window = tf.cast(crop_window, tf.int32)

    # Fused op only decodes the cropped portion of an image
    cropped_image = tf.image.decode_and_crop_jpeg(image_buffer,
                                                  crop_window,
                                                  channels=3)

    # Resize converts image dtype from uint8 to float32, without rescaling values.
    resized_image = tf.image.resize_images(
        cropped_image, [ssd_constants.IMAGE_SIZE, ssd_constants.IMAGE_SIZE])

    cropped_classes = tf.boolean_mask(classes, box_masks, axis=0)

    return resized_image, cropped_boxes, cropped_classes
Beispiel #33
0
 def condition(time, unused_outputs_ta, unused_state, unused_inputs,
               finished):
     return tf.logical_not(tf.reduce_all(finished))
Beispiel #34
0
    def connect_data_and_network(self,
                                 outputs_collector=None,
                                 gradients_collector=None):
        if self.is_training:
            self.patience = self.action_param.patience

            def switch_sampler(for_training):
                with tf.name_scope('train' if for_training else 'validation'):
                    sampler = self.get_sampler()[0][0 if for_training else -1]
                    return sampler.pop_batch_op()

            if self.action_param.validation_every_n > 0:
                data_dict = tf.cond(tf.logical_not(self.is_validation),
                                    lambda: switch_sampler(for_training=True),
                                    lambda: switch_sampler(for_training=False))
            else:
                data_dict = switch_sampler(for_training=True)

            images = tf.cast(data_dict['image'], tf.float32)
            noise_shape = [self.net_param.batch_size,
                           self.gan_param.noise_size]
            noise = tf.random_normal(shape=noise_shape,
                                     mean=0.0,
                                     stddev=1.0,
                                     dtype=tf.float32)
            conditioning = data_dict['conditioning']
            net_output = self.net(
                noise, images, conditioning, self.is_training)

            loss_func = LossFunction(
                loss_type=self.action_param.loss_type)
            real_logits = net_output[1]
            fake_logits = net_output[2]
            lossG, lossD = loss_func(real_logits, fake_logits)
            if self.net_param.decay > 0:
                reg_losses = tf.get_collection(
                    tf.GraphKeys.REGULARIZATION_LOSSES)
                if reg_losses:
                    reg_loss = tf.reduce_mean(
                        [tf.reduce_mean(l_reg) for l_reg in reg_losses])
                    lossD = lossD + reg_loss
                    lossG = lossG + reg_loss

            self.total_loss = lossD + lossG

            outputs_collector.add_to_collection(
                var=self.total_loss, name='total_loss',
                average_over_devices=True, collection=CONSOLE)
            outputs_collector.add_to_collection(
                var=self.total_loss, name='total_loss',
                average_over_devices=True, summary_type='scalar',
                collection=TF_SUMMARIES)

            # variables to display in STDOUT
            outputs_collector.add_to_collection(
                var=lossD, name='lossD', average_over_devices=True,
                collection=CONSOLE)
            outputs_collector.add_to_collection(
                var=lossG, name='lossG', average_over_devices=False,
                collection=CONSOLE)
            # variables to display in tensorboard
            outputs_collector.add_to_collection(
                var=lossG, name='lossG', average_over_devices=False,
                collection=TF_SUMMARIES)
            outputs_collector.add_to_collection(
                var=lossG, name='lossD', average_over_devices=True,
                collection=TF_SUMMARIES)

            with tf.name_scope('Optimiser'):
                optimiser_class = OptimiserFactory.create(
                    name=self.action_param.optimiser)
                self.optimiser = optimiser_class.get_instance(
                    learning_rate=self.action_param.lr)

            with tf.name_scope('ComputeGradients'):
                # gradients of generator
                generator_variables = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator')
                generator_grads = self.optimiser.compute_gradients(
                    lossG,
                    var_list=generator_variables,
                    colocate_gradients_with_ops=True)

                # gradients of discriminator
                discriminator_variables = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope='discriminator')
                discriminator_grads = self.optimiser.compute_gradients(
                    lossD,
                    var_list=discriminator_variables,
                    colocate_gradients_with_ops=True)
                grads = [generator_grads, discriminator_grads]

                # add the grads back to application_driver's training_grads
                gradients_collector.add_to_collection(grads)
        else:
            data_dict = self.get_sampler()[0][0].pop_batch_op()
            conditioning_dict = self.get_sampler()[1][0].pop_batch_op()
            conditioning = conditioning_dict['conditioning']
            image_size = conditioning.shape.as_list()[:-1]
            dummy_image = tf.zeros(image_size + [1])
            net_output = self.net(data_dict['vector'],
                                  dummy_image,
                                  conditioning,
                                  self.is_training)
            outputs_collector.add_to_collection(
                var=net_output[0],
                name='image',
                average_over_devices=False,
                collection=NETWORK_OUTPUT)
            outputs_collector.add_to_collection(
                var=conditioning_dict['conditioning_location'],
                name='location',
                average_over_devices=False,
                collection=NETWORK_OUTPUT)

            self.output_decoder = WindowAsImageAggregator(
                image_reader=self.readers[0],
                output_path=self.action_param.save_seg_dir)
Beispiel #35
0
def get_geometric_idxs(atoms, adjacency_map):
    """ Find the bond, angles, and torsion indices in a molecular graph or
    graphs.

    Parameters
    ----------
    atoms : tf.Tensor, dtype=tf.int64,
        a tensor denoting the sequence of type of atoms
    adjacency_map : tf.Tensor, dtype=tf.int64,
        upper triangular tensor representing the adjacency map of the molecules

    Returns
    -------
    bond_idxs
    angle_idxs
    torsion_idxs
    """

    # get the attributes of the molecule
    # adjacency_map = mol[1]
    # atoms = mol[0]
    adjacency_map_full = adjacency_map \
        + tf.transpose(adjacency_map)

    n_atoms = tf.cast(tf.shape(atoms)[0], tf.int64)

    # (n_atoms, n_atoms, 2)
    all_idxs_stack = tf.stack(
        tf.meshgrid(
            tf.range(n_atoms, dtype=tf.int64),
            tf.range(n_atoms, dtype=tf.int64)),
        axis=2)

    # (n_atoms, n_atoms, 2) # boolean
    is_bond = tf.greater(
        adjacency_map,
        tf.constant(0, dtype=tf.float32))

    # (n_bonds, 2)
    bond_idxs = tf.boolean_mask(
        all_idxs_stack,
        is_bond)

    n_bonds = tf.cast(tf.shape(bond_idxs)[0], tf.int64)

    # init the angles idxs to be all negative ones
    angle_idxs = tf.constant([[-1, -1, -1]], dtype=tf.int64)

    @tf.function
    def process_one_atom_if_there_is_angle(idx, angle_idxs,
            adjacency_map_full=adjacency_map_full):

        # get all the connection indices
        connection_idxs = tf.where(
            tf.greater(
                adjacency_map_full[idx, :],
                tf.constant(0, dtype=tf.float32)))

        # get the number of connections
        n_connections = tf.shape(connection_idxs)[0]

        # get the combinations from these connection indices
        connection_combinations = tf.gather_nd(
            tf.stack(
                tf.meshgrid(
                    connection_idxs,
                    connection_idxs),
                axis=2),
            tf.where(
                tf.greater(
                    tf.linalg.band_part(
                        tf.ones(
                            (
                                n_connections,
                                n_connections
                            ),
                            dtype=tf.int64),
                        0, -1),
                    tf.constant(0, dtype=tf.int64))))

        connection_combinations = tf.boolean_mask(
            connection_combinations,
            tf.greater(
                connection_combinations[:, 0] \
                 - connection_combinations[:, 1],
                tf.constant(0, dtype=tf.int64)))

        angle_idxs = tf.concat(
            [
                angle_idxs,
                tf.concat(
                    [
                        tf.expand_dims(
                            connection_combinations[:, 0],
                            1),
                        tf.expand_dims(
                            idx * tf.ones(
                                (tf.shape(connection_combinations)[0], ),
                                dtype=tf.int64),
                            1),
                        tf.expand_dims(
                            connection_combinations[:, 1],
                            1)
                    ],
                    axis=1)
            ],
            axis=0)

        return idx + 1, angle_idxs

    @tf.function
    def process_one_atom(idx, angle_idxs,
            adjacency_map_full=adjacency_map_full):

        if tf.less(
            tf.math.count_nonzero(adjacency_map_full[idx, :]),
            tf.constant(1, dtype=tf.int64)):
            return idx+1, angle_idxs

        else:
            return process_one_atom_if_there_is_angle(idx, angle_idxs)

    idx = tf.constant(0, dtype=tf.int64)
    # use while loop to update the indices forming the angles
    idx, angle_idxs = tf.while_loop(
        # condition
        lambda idx, angle_idxs: tf.less(idx, n_atoms),

        process_one_atom,

        [idx, angle_idxs],

        shape_invariants=[
            idx.get_shape(),
            tf.TensorShape((None, 3))])

    # discard the first row
    angle_idxs = angle_idxs[1:, ]

    n_angles = tf.shape(angle_idxs, tf.int64)[0]

    # init the torsion idxs to be all negative ones
    torsion_idxs = tf.constant([[-1, -1, -1, -1]], dtype=tf.int64)

    # for each bond, there is at least one torsion terms associated
    def process_one_bond_if_there_is_torsion(idx, torsion_idxs):
        bond = bond_idxs[idx]
        left_atom_connections = tf.where(
            tf.greater(
                adjacency_map_full[bond[0]],
                tf.constant(0, dtype=tf.float32)))

        right_atom_connections = tf.where(
            tf.greater(
                adjacency_map_full[bond[1]],
                tf.constant(0, dtype=tf.float32)))

        # get the combinations from these connection indices
        connection_combinations = tf.reshape(
            tf.stack(
                tf.meshgrid(
                    left_atom_connections,
                    right_atom_connections),
                axis=2),
            [-1, 2])

        torsion_idxs = tf.concat(
            [
                torsion_idxs,
                tf.concat(
                    [
                        tf.expand_dims(
                            connection_combinations[:, 0],
                            1),
                        bond[0] * tf.ones(
                            (tf.shape(connection_combinations)[0], 1),
                            dtype=tf.int64),
                        bond[1] * tf.ones(
                            (tf.shape(connection_combinations)[0], 1),
                            dtype=tf.int64),
                        tf.expand_dims(
                            connection_combinations[:, 1],
                            1)
                    ],
                    axis=1)
            ],
            axis=0)

        return idx + 1, torsion_idxs

    def process_one_bond(idx, torsion_idxs):
        if tf.logical_not(
            tf.logical_and(
                tf.greater(
                    tf.math.count_nonzero(
                        adjacency_map_full[bond_idxs[idx][0]]),
                    tf.constant(1, dtype=tf.int64)),
                tf.greater(
                    tf.math.count_nonzero(
                        adjacency_map_full[bond_idxs[idx][1]]),
                    tf.constant(1, dtype=tf.int64)))):
            return idx + 1, torsion_idxs

        else:
            return process_one_bond_if_there_is_torsion(
                idx, torsion_idxs)


    idx = tf.constant(0, dtype=tf.int64)
    idx, torsion_idxs = tf.while_loop(
        # condition
        lambda idx, _: tf.less(idx, tf.shape(bond_idxs, tf.int64)[0]),

        # body
        process_one_bond,

        # vars
        [idx, torsion_idxs],

        shape_invariants=[
            idx.get_shape(),
            tf.TensorShape([None, 4])
            ])

    # get rid of the first one
    torsion_idxs = torsion_idxs[1:, ]

    torsion_idxs = tf.boolean_mask(
        torsion_idxs,
        tf.logical_and(
            tf.logical_not(
                tf.equal(
                    torsion_idxs[:, 0] - torsion_idxs[:, 2],
                    tf.constant(0, dtype=tf.int64))),
            tf.logical_not(
                tf.equal(
                    torsion_idxs[:, 1] - torsion_idxs[:, 3],
                    tf.constant(0, dtype=tf.int64)))))

    return bond_idxs, angle_idxs, torsion_idxs
Beispiel #36
0
 def prediction_incorrect(logits, label, topk=1, name='incorrect_vector'):
     with tf.name_scope('prediction_incorrect'):
         x = tf.logical_not(tf.nn.in_top_k(logits, label, topk))
     return tf.cast(x, tf.float32, name=name)
Beispiel #37
0
    def _call(
            self,
            atoms, # NOTE: here there could be more than one mol
            adjacency_map,
            coordinates,
            atom_in_mol=False, # (n_atoms, )
            batched_attr_in_mol=False,
            repeat=3):
        """ More general __call__ method.

        """

        # get the attributes of the molecule
        # adjacency_map = mol[1]
        # atoms = mol[0]
        adjacency_map_full = adjacency_map \
            + tf.transpose(adjacency_map)

        n_atoms = tf.cast(tf.shape(atoms)[0], tf.int64)

        # (n_atoms, n_atoms, 2)
        all_idxs_stack = tf.stack(
            tf.meshgrid(
                tf.range(n_atoms, dtype=tf.int64),
                tf.range(n_atoms, dtype=tf.int64)),
            axis=2)

        # (n_atoms, n_atoms, 2) # boolean
        is_bond = tf.greater(
            adjacency_map,
            tf.constant(0, dtype=tf.float32))

        bond_idxs, angle_idxs, torsion_idxs = get_geometric_idxs(
            atoms, adjacency_map)

        # get the dimensinos of the indices
        n_atoms = tf.shape(atoms, tf.int64)[0]
        n_bonds = tf.shape(bond_idxs, tf.int64)[0]
        n_angles = tf.shape(angle_idxs, tf.int64)[0]
        n_torsions = tf.shape(torsion_idxs, tf.int64)[0]

        # grab atoms that are at the two ends of a bond
        # (n_bonds, 2)
        left_idxs = bond_idxs[:, 0]

        # (n_bonds, 2)
        right_idxs = bond_idxs[:, 1]

        if tf.logical_not(tf.reduce_any(atom_in_mol)):
            atom_in_mol = tf.tile(
                [[True]],
                [n_atoms, 1])

        if tf.logical_not(tf.reduce_any(batched_attr_in_mol)):
            batched_attr_in_mol = tf.constant([[True]])

        # (n_bonds, n_atoms)
        bond_is_connected_to_atoms = tf.logical_or(
            tf.equal(
                tf.tile(
                    tf.expand_dims(
                        tf.range(n_atoms),
                        0),
                    [n_bonds, 1]),
                tf.tile(
                    tf.expand_dims(
                        bond_idxs[:,0],
                        1),
                    [1, n_atoms])),
            tf.equal(
                tf.tile(
                    tf.expand_dims(
                        tf.range(n_atoms),
                        0),
                    [n_bonds, 1]),
                tf.tile(
                    tf.expand_dims(
                        bond_idxs[:,1],
                        1),
                    [1, n_atoms])))

        # (n_atoms, n_bonds)
        atom_is_connected_to_bonds = tf.transpose(
            bond_is_connected_to_atoms)


        # (n_angles, n_atoms)
        angle_is_connected_to_atoms = tf.reduce_any(
            [
                tf.equal(
                    tf.tile(
                        tf.expand_dims(
                            tf.range(n_atoms),
                            0),
                        [n_angles, 1]),
                    tf.tile(
                        tf.expand_dims(
                            angle_idxs[:, 0],
                            1),
                        [1, n_atoms])),
                tf.equal(
                    tf.tile(
                        tf.expand_dims(
                            tf.range(n_atoms),
                            0),
                        [n_angles, 1]),
                    tf.tile(
                        tf.expand_dims(
                            angle_idxs[:, 1],
                            1),
                        [1, n_atoms])),
                tf.equal(
                    tf.tile(
                        tf.expand_dims(
                            tf.range(n_atoms),
                            0),
                        [n_angles, 1]),
                    tf.tile(
                        tf.expand_dims(
                            angle_idxs[:, 2],
                            1),
                        [1, n_atoms]))
            ],
            axis=0)

        # (n_torsions, n_atoms)
        torsion_is_connected_to_atoms = tf.reduce_any(
            [
                tf.equal(
                    tf.tile(
                        tf.expand_dims(
                            tf.range(n_atoms),
                            0),
                        [n_torsions, 1]),
                    tf.tile(
                        tf.expand_dims(
                            torsion_idxs[:, 0],
                            1),
                        [1, n_atoms])),
                tf.equal(
                    tf.tile(
                        tf.expand_dims(
                            tf.range(n_atoms),
                            0),
                        [n_torsions, 1]),
                    tf.tile(
                        tf.expand_dims(
                            torsion_idxs[:, 1],
                            1),
                        [1, n_atoms])),
                tf.equal(
                    tf.tile(
                        tf.expand_dims(
                            tf.range(n_atoms),
                            0),
                        [n_torsions, 1]),
                    tf.tile(
                        tf.expand_dims(
                            torsion_idxs[:, 2],
                            1),
                        [1, n_atoms])),
                tf.equal(
                    tf.tile(
                        tf.expand_dims(
                            tf.range(n_atoms),
                            0),
                        [n_torsions, 1]),
                    tf.tile(
                        tf.expand_dims(
                            torsion_idxs[:, 3],
                            1),
                        [1, n_atoms]))
            ],
            axis=0)

        # (n_bonds, )
        # NOTE: here we use the same boolean mask as before, so they
        #       should be following the same order
        bond_orders = tf.boolean_mask(
            adjacency_map,
            is_bond)

        bond_distances = tf.boolean_mask(
            gin.deterministic.md.get_distance_matrix(coordinates),
            is_bond)

        angle_angles = gin.deterministic.md.get_angles(
            coordinates,
            angle_idxs)

        torsion_dihedrals = gin.deterministic.md.get_dihedrals(
            coordinates,
            torsion_idxs)

        # initialize the hidden layers
        # (n_bonds, ...)
        h_e = self.f_e(
            tf.expand_dims(bond_orders, 1))

        h_e_0 = h_e
        h_e_history = tf.expand_dims(h_e_0, 1)
        d_e = tf.shape(h_e, tf.int64)[1]

        # (n_atoms, ...)
        h_v = self.f_v(atoms)
        h_v_0 = h_v
        h_v_history = tf.expand_dims(h_v_0, 1)
        d_v = tf.shape(h_v, tf.int64)[1]

        # (n_angles, ...)
        h_a = self.f_a(
            tf.concat(
                [
                    tf.gather(
                        h_v,
                        angle_idxs[:, 1]),
                    tf.math.add(
                        tf.gather(
                            h_v,
                            angle_idxs[:, 0]),
                        tf.gather(
                            h_v,
                            angle_idxs[:, 1]))
                ],
                axis=1))

        h_a_0 = h_a
        h_a_history = tf.expand_dims(h_a_0, 1)
        d_a = tf.shape(h_a, tf.int64)[1]

        # (n_torsions, ...)
        h_t = self.f_t(
            tf.concat(
                [
                    tf.math.add(
                        tf.gather(
                            h_v,
                            torsion_idxs[:, 0]),
                        tf.gather(
                            h_v,
                            torsion_idxs[:, 3])),
                    tf.math.add(
                        tf.gather(
                            h_v,
                            torsion_idxs[:, 1]),
                        tf.gather(
                            h_v,
                            torsion_idxs[:, 2]))
                ],
                axis=1))
        h_t_0 = h_t
        h_t_history = tf.expand_dims(h_t_0, 1)
        d_t = tf.shape(h_t, tf.int64)[1]

        # (n_mols, ...)
        # NOTE: here $h_u$ could have more than one first dimensions
        h_u = self.f_u(atoms, adjacency_map, batched_attr_in_mol)
        h_u_0 = h_u
        h_u_history = tf.expand_dims(h_u_0, 1)
        d_u = tf.shape(h_u, tf.int64)[1]
        n_mols = tf.shape(h_u, tf.int64)[0]

        # specify what we know about the shape of the mask
        atom_in_mol.set_shape([None, None])

        atom_in_mol = tf.boolean_mask(
            atom_in_mol,
            tf.reduce_any(
                atom_in_mol,
                axis=0),
        axis=1)

        bond_in_mol = tf.greater(
            tf.matmul(
                tf.where(
                    bond_is_connected_to_atoms,
                    tf.ones_like(
                        bond_is_connected_to_atoms,
                        tf.int64),
                    tf.zeros_like(
                        bond_is_connected_to_atoms,
                        tf.int64)),
                tf.where(
                    atom_in_mol,
                    tf.ones_like(
                        atom_in_mol,
                        tf.int64),
                    tf.zeros_like(
                        atom_in_mol,
                        tf.int64))),
            tf.constant(0, dtype=tf.int64))

        angle_in_mol = tf.greater(
            tf.matmul(
                tf.where(
                    angle_is_connected_to_atoms,
                    tf.ones_like(
                        angle_is_connected_to_atoms,
                        tf.int64),
                    tf.zeros_like(
                        angle_is_connected_to_atoms,
                        tf.int64)),
                tf.where(
                    atom_in_mol,
                    tf.ones_like(
                        atom_in_mol,
                        tf.int64),
                    tf.zeros_like(
                        atom_in_mol,
                        tf.int64))),
            tf.constant(0, dtype=tf.int64))

        torsion_in_mol = tf.greater(
            tf.matmul(
                tf.where(
                    torsion_is_connected_to_atoms,
                    tf.ones_like(
                        torsion_is_connected_to_atoms,
                        tf.int64),
                    tf.zeros_like(
                        torsion_is_connected_to_atoms,
                        tf.int64)),
                tf.where(
                    atom_in_mol,
                    tf.ones_like(
                        atom_in_mol,
                        tf.int64),
                    tf.zeros_like(
                        atom_in_mol,
                        tf.int64))),
            tf.constant(0, dtype=tf.int64))

        def propagate_one_time(
                iter_idx,
                h_v, h_e, h_a, h_t, h_u,
                h_v_history, h_e_history, h_a_history,
                h_t_history, h_u_history,
                atom_in_mol=atom_in_mol, # (n_atoms, n_mols)
                bond_in_mol=bond_in_mol, # (n_bonds, n_mols)
                angle_in_mol=angle_in_mol,
                torsion_in_mol=torsion_in_mol
            ):

            # update $ e'_k $
            # $$
            # e'_k = \phi^e (e_k, v_{rk}, v_{sk}, u)
            # $$

            h_left = tf.gather(
                h_v,
                left_idxs)

            h_right = tf.gather(
                h_v,
                right_idxs)

            h_left_right = h_left + h_right

            # (n_bonds, d_e)
            h_e = self.phi_e(h_e, h_e_0, h_left_right,
                tf.reduce_sum(
                    tf.boolean_mask(
                        tf.tile(
                            tf.expand_dims(
                                h_u, # (n_mols, d_u)
                                0), # (1, n_mols, d_u)
                            [tf.shape(h_e)[0], 1, 1]),
                        bond_in_mol),
                    axis=1,
                    keepdims=True))

            h_e_history = tf.concat(
                [
                    h_e_history,
                    tf.expand_dims(
                        h_e,
                        1)
                ],
                axis=1)

            # aggregate $ \bar{e_i'} $
            # $$
            # \bar{e_i'} = \rho^{e \rightarrow v} (E'_i)
            # $$

            # (n_atoms, d_e)
            h_e_bar_i = self.rho_e_v(h_e, atom_is_connected_to_bonds)

            # update $ v'_i $
            # $$
            # v'_i = phi^v (\bar{e_i}, v_i, u)
            # $$
            # (n_atoms, d_v)
            h_v = self.phi_v(
                h_v, # (n_atoms, d_v)
                h_v_0, # (n_atoms, d_v)
                h_e_bar_i, # (n_atoms, d_v)
                tf.reduce_sum(
                    tf.where(
                        tf.tile(
                            tf.expand_dims(
                                atom_in_mol,
                                2),
                            [1, 1, tf.shape(h_u)[1]]),
                        tf.tile(
                            tf.expand_dims(
                                h_u,
                                0),
                            [n_atoms, 1, 1]),
                        tf.zeros_like(
                            tf.tile(
                                tf.expand_dims(
                                    h_u,
                                    0),
                                [n_atoms, 1, 1]))),
                    axis=1))

            h_v_history = tf.concat(
                [
                    h_v_history,
                    tf.expand_dims(
                        h_v,
                        1)
                ],
                axis=1)

            h_v_center = tf.gather(
                h_v,
                angle_idxs[:, 1])

            h_v_sides = tf.math.add(
                tf.gather(
                    h_v,
                    angle_idxs[:, 0]),
                tf.gather(
                    h_v,
                    angle_idxs[:, 2]))

            h_a = self.phi_a(
                h_a,
                h_a_0,
                h_v_center,
                h_v_sides,
                tf.reduce_sum(
                    tf.boolean_mask(
                        tf.tile(
                            tf.expand_dims(
                                h_u, # (n_mols, d_u)
                                0), # (1, n_mols, d_u)
                            [tf.shape(h_a)[0], 1, 1]),
                        angle_in_mol),
                    axis=1,
                    keepdims=True))

            h_a_history = tf.concat(
                [
                    h_a_history,
                    tf.expand_dims(h_a, 1)
                ],
                axis=1)

            h_v_center = tf.math.add(
                tf.gather(
                    h_v,
                    torsion_idxs[:, 1]),
                tf.gather(
                    h_v,
                    torsion_idxs[:, 2]))

            h_v_sides = tf.math.add(
                tf.gather(
                    h_v,
                    torsion_idxs[:, 0]),
                tf.gather(
                    h_v,
                    torsion_idxs[:, 2]))

            h_t = self.phi_t(
                h_t,
                h_t_0,
                h_v_center,
                h_v_sides,
                tf.reduce_sum(
                    tf.boolean_mask(
                        tf.tile(
                            tf.expand_dims(
                                h_u, # (n_mols, d_u)
                                0), # (1, n_mols, d_u)
                            [tf.shape(h_t)[0], 1, 1]),
                        torsion_in_mol),
                    axis=1,
                    keepdims=True))

            h_t_history = tf.concat(
                [
                    h_t_history,
                    tf.expand_dims(h_t, 1)
                ],
                axis=1)

            # aggregate $ \bar{e'} $
            # $$
            # \bar{e'} = \rhp^{e \rightarrow u} (E')
            # $$
            # (n_mols, d_e)
            h_e_bar = self.rho_e_u(h_e, bond_in_mol)

            # aggregate $ \bar{v'} $
            # $$
            # \bar{v'} = \rho^{v \rightarrow u} (V')
            # $$
            # (n_mols, d_v)
            h_v_bar = self.rho_v_u(h_v, atom_in_mol)

            # aggregate $ \bar{a'} $
            h_a_bar = self.rho_a_u(h_a, angle_in_mol)

            # aggregate $ \bar{t} $
            h_t_bar = self.rho_t_u(h_t, torsion_in_mol)

            # update $ u' $
            # $$
            # u' = \phi^u (\bar{e'}, \bar{v'}, u)
            # $$
            # (n_mols, d_u)
            h_u = self.phi_u(
                h_u,
                h_u_0,
                h_e_bar,
                h_v_bar,
                h_a_bar,
                h_t_bar)

            h_u_history = tf.concat(
                [
                    h_u_history,
                    tf.expand_dims(
                        h_u,
                        1)
                ],
                axis=1)

            return (
                iter_idx + 1,
                h_v, h_e, h_a, h_t, h_u,
                h_v_history, h_e_history, h_a_history,
                h_t_history, h_u_history)

        a = propagate_one_time(0, h_v, h_e, h_a, h_t, h_u, \
        h_v_history, h_e_history, h_a_history, \
        h_t_history, h_u_history)

        # use while loop to execute the graph multiple times
        iter_idx = tf.constant(0, dtype=tf.int64)

        iter_idx, h_v, h_e, h_a, h_t, h_u, \
        h_v_history, h_e_history, h_a_history, \
        h_t_history, h_u_history \
            = tf.while_loop(
            # condition
            lambda \
                iter_idx, \
                h_v, h_e, h_a, h_t, h_u, \
                h_v_history, h_e_history, h_a_history, h_t_history, \
                h_u_history: \
                    tf.less(iter_idx, self.repeat),

            # loop body
            propagate_one_time,

            # loop vars
            [
                iter_idx,
                h_v, h_e, h_a, h_t, h_u,
                h_v_history, h_e_history, h_a_history,
                h_t_history, h_u_history
            ],

            # shape_invariants
            shape_invariants = [
                iter_idx.get_shape(),
                h_v.get_shape(),
                h_e.get_shape(),
                h_a.get_shape(),
                h_t.get_shape(),
                h_u.get_shape(),
                tf.TensorShape((None, None, None)),
                tf.TensorShape((None, None, None)),
                tf.TensorShape((None, None, None)),
                tf.TensorShape((None, None, None)),
                tf.TensorShape((None, None, None)),
                ])

        y_bar = self.f_r(
            h_v, h_e, h_a, h_t, h_u,
            h_v_history, h_e_history, h_a_history,
            h_t_history, h_u_history,
            atom_in_mol, bond_in_mol, angle_in_mol, torsion_in_mol,
            adjacency_map, coordinates)

        return y_bar
Beispiel #38
0
def bboxes_matching(label,
                    scores,
                    bboxes,
                    glabels,
                    gbboxes,
                    gdifficults,
                    matching_threshold=0.5,
                    scope=None):
    """Matching a collection of detected boxes with groundtruth values.
    Does not accept batched-inputs.
    The algorithm goes as follows: for every detected box, check
    if one grountruth box is matching. If none, then considered as False Positive.
    If the grountruth box is already matched with another one, it also counts
    as a False Positive. We refer the Pascal VOC documentation for the details.

    Args:
      rclasses, rscores, rbboxes: N(x4) Tensors. Detected objects, sorted by score;
      glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence
        zero-class objects are ignored.
      matching_threshold: Threshold for a positive match.
    Return: Tuple of:
       n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from
         size because of zero padding).
       tp_match: (N,)-shaped boolean Tensor containing with True Positives.
       fp_match: (N,)-shaped boolean Tensor containing with False Positives.
    """
    with tf.name_scope(scope, 'bboxes_matching_single',
                       [scores, bboxes, glabels, gbboxes]):
        rsize = tf.size(scores)
        rshape = tf.shape(scores)
        rlabel = tf.cast(label, glabels.dtype)
        # Number of groundtruth boxes.
        gdifficults = tf.cast(gdifficults, tf.bool)
        n_gbboxes = tf.count_nonzero(
            tf.logical_and(tf.equal(glabels, label),
                           tf.logical_not(gdifficults)))
        # Grountruth matching arrays.
        gmatch = tf.zeros(tf.shape(glabels), dtype=tf.bool)
        grange = tf.range(tf.size(glabels), dtype=tf.int32)
        # True/False positive matching TensorArrays.
        sdtype = tf.bool
        ta_tp_bool = tf.TensorArray(sdtype,
                                    size=rsize,
                                    dynamic_size=False,
                                    infer_shape=True)
        ta_fp_bool = tf.TensorArray(sdtype,
                                    size=rsize,
                                    dynamic_size=False,
                                    infer_shape=True)

        # Loop over returned objects.
        def m_condition(i, ta_tp, ta_fp, gmatch):
            r = tf.less(i, rsize)
            return r

        def m_body(i, ta_tp, ta_fp, gmatch):
            # Jaccard score with groundtruth bboxes.
            rbbox = bboxes[i]
            jaccard = bboxes_jaccard(rbbox, gbboxes)
            jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel),
                                        dtype=jaccard.dtype)

            # Best fit, checking it's above threshold.
            idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32)
            jcdmax = jaccard[idxmax]
            match = jcdmax > matching_threshold
            existing_match = gmatch[idxmax]
            not_difficult = tf.logical_not(gdifficults[idxmax])

            # TP: match & no previous match and FP: previous match | no match.
            # If difficult: no record, i.e FP=False and TP=False.
            tp = tf.logical_and(
                not_difficult,
                tf.logical_and(match, tf.logical_not(existing_match)))
            ta_tp = ta_tp.write(i, tp)
            fp = tf.logical_and(
                not_difficult,
                tf.logical_or(existing_match, tf.logical_not(match)))
            ta_fp = ta_fp.write(i, fp)
            # Update grountruth match.
            mask = tf.logical_and(tf.equal(grange, idxmax),
                                  tf.logical_and(not_difficult, match))
            gmatch = tf.logical_or(gmatch, mask)

            return [i + 1, ta_tp, ta_fp, gmatch]

        # Main loop definition.
        i = 0
        [i, ta_tp_bool, ta_fp_bool, gmatch] = \
            tf.while_loop(m_condition, m_body,
                          [i, ta_tp_bool, ta_fp_bool, gmatch],
                          parallel_iterations=1,
                          back_prop=False)
        # TensorArrays to Tensors and reshape.
        tp_match = tf.reshape(ta_tp_bool.stack(), rshape)
        fp_match = tf.reshape(ta_fp_bool.stack(), rshape)

        # Some debugging information...
        # tp_match = tf.Print(tp_match,
        #                     [n_gbboxes,
        #                      tf.reduce_sum(tf.cast(tp_match, tf.int64)),
        #                      tf.reduce_sum(tf.cast(fp_match, tf.int64)),
        #                      tf.reduce_sum(tf.cast(gmatch, tf.int64))],
        #                     'Matching (NG, TP, FP, GM): ')
        return n_gbboxes, tp_match, fp_match
Beispiel #39
0
    def body(time, outputs_ta, state, inputs, finished, sequence_lengths):
        r"""Internal while_loop body.

        Args:
            time: scalar int32 tensor.
            outputs_ta: structure of TensorArray.
            state: (structure of) state tensors and TensorArrays.
            inputs: (structure of) input tensors.
            finished: bool tensor (keeping track of what's finished).
            sequence_lengths: int32 tensor (keeping track of time of finish).

        Returns:
            `(time + 1, outputs_ta, next_state, next_inputs, next_finished,
            next_sequence_lengths)`.
        """
        (next_outputs, state) = decoder.step(time, inputs, state)

        # Check if the maximum iteration is met. If it is met, do not compute
        # the next inputs.
        reach_max = tf.equal(time+1, maximum_iterations)
        (decoder_finished, next_inputs, decoder_state) = tf.cond(
            reach_max,
            lambda: (tf.cast(tf.ones_like(finished), tf.bool),
                     inputs, state),
            lambda: decoder.next_inputs(time, next_outputs, state)
        )
        if decoder.tracks_own_finished:
            next_finished = decoder_finished
        else:
            next_finished = tf.logical_or(decoder_finished, finished)
        next_sequence_lengths = tf.where(
            tf.logical_not(finished),
            tf.fill(tf.shape(sequence_lengths), time + 1),
            sequence_lengths)

        nest.assert_same_structure(state, decoder_state)
        nest.assert_same_structure(outputs_ta, next_outputs)
        nest.assert_same_structure(inputs, next_inputs)

        # Zero out output values past finish
        if impute_finished:
            emit = nest.map_structure(
                lambda out, zero: tf.where(finished, zero, out),
                next_outputs,
                zero_outputs)
        else:
            emit = next_outputs

        # Copy through states past finish
        def _maybe_copy_state(new, cur):
            # TensorArrays and scalar states get passed through.
            if isinstance(cur, tf.TensorArray):
                pass_through = True
            else:
                new.set_shape(cur.shape)
                pass_through = (new.shape.ndims == 0)
            return new if pass_through else tf.where(finished, cur, new)

        if impute_finished:
            next_state = nest.map_structure(
                _maybe_copy_state, decoder_state, state)
        else:
            next_state = decoder_state

        outputs_ta = nest.map_structure(lambda ta, out: ta.write(time, out),
                                        outputs_ta, emit)
        return (time + 1, outputs_ta, next_state, next_inputs, next_finished,
                next_sequence_lengths)
Beispiel #40
0
    def __call__(self, inputs, state, scope=None):
        if not isinstance(state, CopyNetWrapperState):
            raise TypeError(
                'Expected state to be instance of CopyNetWrapperState. Received type {} instead.'
                .format(type(state)))
        prev_cell_state = state.cell_state
        prev_time = state.time
        prev_predicted_ids = state.predicted_ids
        prev_alignments = state.alignments
        prev_coverage = state.coverage
        prev_alignment_history = state.alignment_history

        mask = tf.cast(tf.equal(prev_predicted_ids, self._encoder_input_ids),
                       tf.float32)
        mask = tf.math.divide_no_nan(
            mask, tf.reduce_sum(mask, axis=-1, keepdims=True))
        rou = mask * prev_alignments
        selective_read = tf.einsum('ijk,ij->ik', self._encoder_outputs, rou)
        inputs = tf.concat(
            [inputs, selective_read],
            axis=-1)  # (batch_size, embedding_size + encoder_state_size)

        cell_outputs, cell_state = self._cell(inputs, prev_cell_state, scope)
        generate_score = self._projection(
            cell_outputs)  # (batch_size, gen_vocab_size)

        copy_score = tf.einsum('ijk,km->ijm', self._encoder_outputs,
                               self._copy_weight)
        copy_score = tf.nn.tanh(copy_score)
        copy_score = tf.einsum('ijm,im->ij', copy_score,
                               cell_outputs)  # (batch_size, seq_len)

        if self._encoder_input_length is not None:
            mask = tf.sequence_mask(self._encoder_input_length)
            mask = tf.cast(tf.logical_not(mask), dtype=tf.float32)
            copy_score += -1e9 * mask
        mixed_score = tf.concat([generate_score, copy_score], axis=-1)
        mixed_prob = tf.math.softmax(mixed_score, axis=-1)
        generate_prob = mixed_prob[:, :self._gen_vocab_size]
        copy_prob = mixed_prob[:, self._gen_vocab_size:]

        # expand probability to [batch_size, whole_vocab_size]
        expanded_generate_prob = tf.pad(
            generate_prob,
            [[0, 0], [0, self._whole_vocab_size - self._gen_vocab_size]])
        expanded_copy_prob = self._expand_copy_prob(copy_prob)
        outputs = expanded_generate_prob + expanded_copy_prob  # the output is probability not logits

        predicted_ids = tf.expand_dims(tf.argmax(outputs,
                                                 axis=-1,
                                                 output_type=tf.int32),
                                       axis=-1)
        alignments = copy_prob
        coverage = prev_coverage + copy_prob
        if self._alignment_history:
            alignment_history = prev_alignment_history.write(
                prev_time, copy_prob)
        else:
            alignment_history = prev_alignment_history
        state = CopyNetWrapperState(cell_state=cell_state,
                                    time=prev_time + 1,
                                    predicted_ids=predicted_ids,
                                    alignments=alignments,
                                    coverage=coverage,
                                    alignment_history=alignment_history)
        return outputs, state
def main(_):
    if not FLAGS.data_dir:
        raise ValueError('You must supply the dataset directory with --data_dir')
    num_gpus = FLAGS.num_gpus
    if num_gpus < 1: num_gpus = 1

    # ps_spec = FLAGS.ps_hosts.split(",")
    # worker_spec = FLAGS.worker_hosts.split(",")
    # num_workers = len(worker_spec)
    # cluster = tf.train.ClusterSpec({
    #     "ps": ps_spec,
    #     "worker": worker_spec})
    # server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index)
    # if FLAGS.job_name == "ps":
    #     with tf.device("/cpu:0"):
    #         server.join()
    #     return

    tf.logging.set_verbosity(tf.logging.DEBUG)
    with tf.device('/cpu:0'):
        global_step = slim.create_global_step()

        # Select the dataset.
        dataset = dataset_factory.get_dataset(
            FLAGS.dataset_name, FLAGS.dataset_split_name, FLAGS.data_dir)

        # Get the RON network and its anchors.
        ron_class = nets_factory.get_network(FLAGS.model_name)
        ron_params = ron_class.default_params._replace(num_classes=FLAGS.num_classes)
        ron_net = ron_class(ron_params)
        ron_shape = ron_net.params.img_shape
        ron_anchors = ron_net.anchors(ron_shape)

        # =================================================================== #
        # Create a dataset provider and batches.
        # =================================================================== #
        with tf.name_scope(FLAGS.dataset_name + '_data_provider'):
            provider = slim.dataset_data_provider.DatasetDataProvider(
                dataset,
                num_readers=FLAGS.num_readers,
                common_queue_capacity=120 * FLAGS.batch_size * num_gpus,
                common_queue_min=80 * FLAGS.batch_size * num_gpus,
                shuffle=True)
        # Get for RON network: image, labels, bboxes.
        # (ymin, xmin, ymax, xmax) fro gbboxes
        [image, shape, glabels, gbboxes, isdifficult] = provider.get(['image', 'shape',
                                                         'object/label',
                                                         'object/bbox',
                                                         'object/difficult'])
        isdifficult_mask =tf.cond(tf.reduce_sum(tf.cast(tf.logical_not(tf.equal(tf.ones_like(isdifficult), isdifficult)), tf.float32)) < 1., lambda : tf.one_hot(0, tf.shape(isdifficult)[0], on_value=True, off_value=False, dtype=tf.bool), lambda : isdifficult < tf.ones_like(isdifficult))

        glabels = tf.boolean_mask(glabels, isdifficult_mask)
        gbboxes = tf.boolean_mask(gbboxes, isdifficult_mask)

        # Select the preprocessing function.
        preprocessing_name = FLAGS.preprocessing_name or FLAGS.model_name
        image_preprocessing_fn = preprocessing_factory.get_preprocessing(
            preprocessing_name, is_training=True)

        # Pre-processing image, labels and bboxes.
        image, glabels, gbboxes = image_preprocessing_fn(image, glabels, gbboxes,
                                   out_shape=ron_shape,
                                   data_format=DATA_FORMAT)
        # Encode groundtruth labels and bboxes.
        # glocalisations is our regression object
        # gclasses is the ground_trutuh label
        # gscores is the the jaccard score with ground_truth
        gclasses, glocalisations, gscores = \
            ron_net.bboxes_encode(glabels, gbboxes, ron_anchors, positive_threshold=FLAGS.match_threshold, ignore_threshold=FLAGS.neg_threshold)

        # each size of the batch elements
        # include one image, three others(gclasses, glocalisations, gscores)
        batch_shape = [1] + [len(ron_anchors)] * 3

        # Training batches and queue.
        r = tf.train.batch(
            tf_utils.reshape_list([image, gclasses, glocalisations, gscores]),
            batch_size=FLAGS.batch_size * num_gpus,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=120 * FLAGS.batch_size * num_gpus)
        all_batch = tf_utils.reshape_list(r, batch_shape)
        b_image = tf.split(all_batch[0], num_or_size_splits=num_gpus, axis=0)
        _b_gclasses = [tf.split(b, num_or_size_splits=num_gpus, axis=0) for b in all_batch[1]]
        b_gclasses = [_ for _ in zip(*_b_gclasses)]
        _b_glocalisations = [tf.split(b, num_or_size_splits=num_gpus, axis=0) for b in all_batch[2]]
        b_glocalisations = [_ for _ in zip(*_b_glocalisations)]
        _b_gscores = [tf.split(b, num_or_size_splits=num_gpus, axis=0) for b in all_batch[3]]
        b_gscores = [_ for _ in zip(*_b_gscores)]

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    # =================================================================== #
    # Configure the optimization procedure.
    # =================================================================== #
    learning_rate = tf_utils.configure_learning_rate(FLAGS,
                                                     dataset.num_samples,
                                                     global_step)
    optimizer = tf_utils.configure_optimizer(FLAGS, learning_rate)
    summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    # Construct RON network.
    arg_scope = ron_net.arg_scope(weight_decay=FLAGS.weight_decay, data_format=DATA_FORMAT)

    reuse_variables = False
    tower_grads = []
    loss_list = []
    with slim.arg_scope(arg_scope):
        for index in range(num_gpus):
            with tf.device('/gpu:%d' % index):
                predictions, logits, objness_pred, objness_logits, localisations, end_points = ron_net.net(b_image[index], is_training=True, reuse = reuse_variables)
                # Add loss function.
                ron_net.losses(logits, localisations, objness_logits, objness_pred,
                               b_gclasses[index], b_glocalisations[index], b_gscores[index],
                               match_threshold = FLAGS.match_threshold,
                               neg_threshold = FLAGS.neg_threshold,
                               objness_threshold = FLAGS.objectness_thres,
                               negative_ratio=FLAGS.negative_ratio,
                               alpha=FLAGS.loss_alpha,
                               beta=FLAGS.loss_beta,
                               label_smoothing=FLAGS.label_smoothing)
                reuse_variables = True
                # and returns a train_tensor and summary_op
                loss = tf.losses.get_total_loss()
                loss_list.append(loss)
                # Variables to train.
                variables_to_train = tf_utils.get_variables_to_train(FLAGS)
                # Create gradient updates.
                grads = optimizer.compute_gradients(loss, variables_to_train)
                tower_grads.append(grads)

    reduce_grads = average_gradients(tower_grads)
    total_loss = tf.reduce_mean(tf.stack(loss_list, axis=0), axis=0)
    # Add total_loss to summary.
    summaries.add(tf.summary.scalar('total_loss', total_loss))
    # =================================================================== #
    # Configure the moving averages.
    # =================================================================== #
    if FLAGS.moving_average_decay:
        moving_average_variables = slim.get_model_variables()
        variable_averages = tf.train.ExponentialMovingAverage(
            FLAGS.moving_average_decay, global_step)
    else:
        moving_average_variables, variable_averages = None, None

    if FLAGS.moving_average_decay:
        # Update ops executed locally by trainer.
        update_ops.append(variable_averages.apply(moving_average_variables))

    grad_updates = optimizer.apply_gradients(reduce_grads, global_step=global_step)
    update_ops.append(grad_updates)
    update_op = tf.group(*update_ops)
    train_tensor = control_flow_ops.with_dependencies([update_op], total_loss, name='train_op')

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries), name='summary_op')
    # =================================================================== #
    # Kicks off the training.
    # =================================================================== #
    config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    saver = tf.train.Saver(max_to_keep=5,
                           keep_checkpoint_every_n_hours = FLAGS.save_interval_secs/3600.,
                           write_version=2,
                           pad_step_number=False)

    slim.learning.train(
        train_tensor,
        logdir=FLAGS.model_dir,
        master='',
        is_chief=True,
        init_fn=tf_utils.get_init_fn(FLAGS, os.path.join(FLAGS.data_dir, 'vgg_16.ckpt')),
        summary_op=summary_op,
        number_of_steps=FLAGS.max_number_of_steps,
        log_every_n_steps=FLAGS.log_every_n_steps,
        save_summaries_secs=FLAGS.save_summaries_secs,
        saver=saver,
        save_interval_secs=FLAGS.save_interval_secs,
        session_config=config,
        session_wrapper=None,
        sync_optimizer=None)
Beispiel #42
0
 def condition2(sigma, ak, am):
     sigma = tf.matmul(sigma, testOp2)
     return tf.logical_not(
         tf.reduce_all(tf.equal(sigma, tf.zeros([4], dtype=tf.float64))))
Beispiel #43
0
    def rpn_losses(self):
        with tf.variable_scope('rpn_losses'):
            minibatch_indices, minibatch_anchor_matched_gtboxes, \
            object_mask, minibatch_labels_one_hot = self.make_minibatch(self.anchors)

            minibatch_anchors = tf.gather(self.anchors, minibatch_indices)
            minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes,
                                               minibatch_indices)
            minibatch_boxes_scores = tf.gather(self.rpn_scores,
                                               minibatch_indices)

            # encode gtboxes
            minibatch_encode_gtboxes = encode_and_decode.encode_boxes(
                unencode_boxes=minibatch_anchor_matched_gtboxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            positive_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(object_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0])

            negative_mask = tf.cast(
                tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32)
            negative_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0])

            minibatch_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=minibatch_encode_boxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            tf.summary.image('/positive_anchors', positive_anchors_in_img)
            tf.summary.image('/negative_anchors', negative_anchors_in_img)

            minibatch_boxes_softmax_scores = tf.gather(
                slim.softmax(self.rpn_scores), minibatch_indices)
            top_k_scores, top_k_indices = tf.nn.top_k(
                minibatch_boxes_softmax_scores[:, 1], k=20)

            top_k_boxes = tf.gather(minibatch_decode_boxes, top_k_indices)
            top_detections_in_img = draw_boxes_with_scores(self.img_batch,
                                                           boxes=top_k_boxes,
                                                           scores=top_k_scores)

            tf.summary.image('/top_20', top_detections_in_img)

            temp_indices = tf.reshape(
                tf.where(tf.greater(top_k_scores, cfgs.FINAL_SCORE_THRESHOLD)),
                [-1])
            rpn_predict_boxes = tf.gather(top_k_boxes, temp_indices)
            rpn_predict_scores = tf.gather(top_k_scores, temp_indices)

            # losses
            with tf.variable_scope('rpn_location_loss'):
                location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=object_mask)
                slim.losses.add_loss(
                    location_loss)  # add smooth l1 loss to losses collection

            with tf.variable_scope('rpn_classification_loss'):
                classification_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_boxes_scores,
                    onehot_labels=minibatch_labels_one_hot)

            return location_loss, classification_loss, rpn_predict_boxes, rpn_predict_scores
def custom_dynamic_rnn(cell, inputs, inputs_len, initial_state=None):
    """
    Implements a dynamic rnn that can store scores in the pointer network,
    the reason why we implements this is that the raw_rnn or dynamic_rnn function in Tensorflow
    seem to require the hidden unit and memory unit has the same dimension, and we cannot
    store the scores directly in the hidden unit.
    Args:
        cell: RNN cell
        inputs: the input sequence to rnn
        inputs_len: valid length
        initial_state: initial_state of the cell
    Returns:
        outputs and state
    """
    batch_size, max_time = tf.shape(inputs)[0], tf.shape(inputs)[1]


    inputs_ta = tf.TensorArray(dtype=tf.float32, size=max_time)
    inputs_ta = inputs_ta.unstack(tf.transpose(inputs, [1, 0, 2]))

    # record cells
    emit_ta = tf.TensorArray(dtype=tf.float32, dynamic_size=True, size=0)

    # iter timesteps
    t0 = tf.constant(0, dtype=tf.int32)
    if initial_state is not None:

        # initial state
        s0 = initial_state
    else:
        s0 = cell.zero_state(batch_size, dtype=tf.float32)
    #
    f0 = tf.zeros([batch_size], dtype=tf.bool)

    def loop_fn(t, prev_s, emit_ta, finished):
        """
        the loop function of rnn
        """
        cur_x = inputs_ta.read(t)
        # use pre cell state and current input to predict the scores and current state
        ### dimension of scores: (batchsize, hiddensize) equal to cur_x
        ### the score is the logit of each position at each sample
        
        ### current state is a tuple (hidden state, cell state)
        scores, cur_state = cell(cur_x, prev_s)

        # copy through
        scores = tf.where(finished, tf.zeros_like(scores), scores)

        if isinstance(cell, tc.rnn.LSTMCell):
            cur_c, cur_h = cur_state
            prev_c, prev_h = prev_s
            cur_state = tc.rnn.LSTMStateTuple(tf.where(finished, prev_c, cur_c),
                                              tf.where(finished, prev_h, cur_h))
        else:
            cur_state = tf.where(finished, prev_s, cur_state)

        ### store the logit scores of each step
        emit_ta = emit_ta.write(t, scores)
        finished = tf.greater_equal(t + 1, inputs_len)
        return [t + 1, cur_state, emit_ta, finished]

    _, state, emit_ta, _ = tf.while_loop(
        cond=lambda _1, _2, _3, finished: tf.logical_not(tf.reduce_all(finished)),
        body=loop_fn,
        loop_vars=(t0, s0, emit_ta, f0),
        parallel_iterations=32,
        swap_memory=False)

    outputs = tf.transpose(emit_ta.stack(), [1, 0, 2])
    return outputs, state
Beispiel #45
0
def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size,
                      beam_width, end_token, length_penalty_weight,
                      coverage_penalty_weight):
    """Performs a single step of Beam Search Decoding.

    Args:
      time: Beam search time step, should start at 0. At time 0 we assume
        that all beams are equal and consider only the first beam for
        continuations.
      logits: Logits at the current time step. A tensor of shape
        `[batch_size, beam_width, vocab_size]`
      next_cell_state: The next state from the cell, e.g. an instance of
        AttentionWrapperState if the cell is attentional.
      beam_state: Current state of the beam search.
        An instance of `BeamSearchDecoderState`.
      batch_size: The batch size for this input.
      beam_width: Python int.  The size of the beams.
      end_token: The int32 end token.
      length_penalty_weight: Float weight to penalize length. Disabled with
        0.0.
      coverage_penalty_weight: Float weight to penalize the coverage of source
        sentence. Disabled with 0.0.

    Returns:
      A new beam state.
    """
    static_batch_size = tf.get_static_value(batch_size)

    # Calculate the current lengths of the predictions
    prediction_lengths = beam_state.lengths
    previously_finished = beam_state.finished
    not_finished = tf.logical_not(previously_finished)

    # Calculate the total log probs for the new hypotheses
    # Final Shape: [batch_size, beam_width, vocab_size]
    step_log_probs = tf.nn.log_softmax(logits)
    step_log_probs = _mask_probs(step_log_probs, end_token,
                                 previously_finished)
    total_probs = tf.expand_dims(beam_state.log_probs, 2) + step_log_probs

    # Calculate the continuation lengths by adding to all continuing beams.
    vocab_size = logits.shape.dims[-1].value or tf.shape(logits)[-1]
    lengths_to_add = tf.one_hot(
        indices=tf.fill([batch_size, beam_width], end_token),
        depth=vocab_size,
        on_value=np.int64(0),
        off_value=np.int64(1),
        dtype=tf.int64)
    add_mask = tf.cast(not_finished, tf.int64)
    lengths_to_add *= tf.expand_dims(add_mask, 2)
    new_prediction_lengths = (
        lengths_to_add + tf.expand_dims(prediction_lengths, 2))

    # Calculate the accumulated attention probabilities if coverage penalty is
    # enabled.
    accumulated_attention_probs = None
    attention_probs = get_attention_probs(next_cell_state,
                                          coverage_penalty_weight)
    if attention_probs is not None:
        attention_probs *= tf.expand_dims(tf.cast(not_finished, tf.float32), 2)
        accumulated_attention_probs = (
            beam_state.accumulated_attention_probs + attention_probs)

    # Calculate the scores for each beam
    scores = _get_scores(
        log_probs=total_probs,
        sequence_lengths=new_prediction_lengths,
        length_penalty_weight=length_penalty_weight,
        coverage_penalty_weight=coverage_penalty_weight,
        finished=previously_finished,
        accumulated_attention_probs=accumulated_attention_probs)

    time = tf.convert_to_tensor(time, name="time")
    # During the first time step we only consider the initial beam
    scores_flat = tf.reshape(scores, [batch_size, -1])

    # Pick the next beams according to the specified successors function
    next_beam_size = tf.convert_to_tensor(
        beam_width, dtype=tf.int32, name="beam_width")
    next_beam_scores, word_indices = tf.math.top_k(
        scores_flat, k=next_beam_size)

    next_beam_scores.set_shape([static_batch_size, beam_width])
    word_indices.set_shape([static_batch_size, beam_width])

    # Pick out the probs, beam_ids, and states according to the chosen
    # predictions
    next_beam_probs = _tensor_gather_helper(
        gather_indices=word_indices,
        gather_from=total_probs,
        batch_size=batch_size,
        range_size=beam_width * vocab_size,
        gather_shape=[-1],
        name="next_beam_probs")
    # Note: just doing the following
    #   tf.to_int32(word_indices % vocab_size,
    #       name="next_beam_word_ids")
    # would be a lot cleaner but for reasons unclear, that hides the results of
    # the op which prevents capturing it with tfdbg debug ops.
    raw_next_word_ids = tf.math.floormod(
        word_indices, vocab_size, name="next_beam_word_ids")
    next_word_ids = tf.cast(raw_next_word_ids, tf.int32)
    next_beam_ids = tf.cast(
        word_indices / vocab_size, tf.int32, name="next_beam_parent_ids")

    # Append new ids to current predictions
    previously_finished = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=previously_finished,
        batch_size=batch_size,
        range_size=beam_width,
        gather_shape=[-1])
    next_finished = tf.logical_or(
        previously_finished,
        tf.equal(next_word_ids, end_token),
        name="next_beam_finished")

    # Calculate the length of the next predictions.
    # 1. Finished beams remain unchanged.
    # 2. Beams that are now finished (EOS predicted) have their length
    #    increased by 1.
    # 3. Beams that are not yet finished have their length increased by 1.
    lengths_to_add = tf.cast(tf.logical_not(previously_finished), tf.int64)
    next_prediction_len = _tensor_gather_helper(
        gather_indices=next_beam_ids,
        gather_from=beam_state.lengths,
        batch_size=batch_size,
        range_size=beam_width,
        gather_shape=[-1])
    next_prediction_len += lengths_to_add
    next_accumulated_attention_probs = ()
    if accumulated_attention_probs is not None:
        next_accumulated_attention_probs = _tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=accumulated_attention_probs,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1],
            name="next_accumulated_attention_probs")

    # Pick out the cell_states according to the next_beam_ids. We use a
    # different gather_shape here because the cell_state tensors, i.e.
    # the tensors that would be gathered from, all have dimension
    # greater than two and we need to preserve those dimensions.
    next_cell_state = tf.nest.map_structure(
        lambda gather_from: _maybe_tensor_gather_helper(
            gather_indices=next_beam_ids,
            gather_from=gather_from,
            batch_size=batch_size,
            range_size=beam_width,
            gather_shape=[batch_size * beam_width, -1]), next_cell_state)

    next_state = BeamSearchDecoderState(
        cell_state=next_cell_state,
        log_probs=next_beam_probs,
        lengths=next_prediction_len,
        finished=next_finished,
        accumulated_attention_probs=next_accumulated_attention_probs)

    output = BeamSearchDecoderOutput(
        scores=next_beam_scores,
        predicted_ids=next_word_ids,
        parent_ids=next_beam_ids)

    return output, next_state
 def compute_mask(self, inputs, mask=None):
     return tf.logical_not(tf.math.is_nan(inputs))
def assign_and_sample_proposals(proposed_boxes,
                                gt_boxes,
                                gt_classes,
                                num_samples_per_image=512,
                                mix_gt_boxes=True,
                                fg_fraction=0.25,
                                fg_iou_thresh=0.5,
                                bg_iou_thresh_hi=0.5,
                                bg_iou_thresh_lo=0.0):
  """Assigns the proposals with groundtruth classes and performs subsmpling.

  Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the
  following algorithm to generate the final `num_samples_per_image` RoIs.
    1. Calculates the IoU between each proposal box and each gt_boxes.
    2. Assigns each proposed box with a groundtruth class and box by choosing
       the largest IoU overlap.
    3. Samples `num_samples_per_image` boxes from all proposed boxes, and
       returns box_targets, class_targets, and RoIs.

  Args:
    proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number of
      proposals before groundtruth assignment. The last dimension is the box
      coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format.
    gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
      coordinates of gt_boxes are in the pixel coordinates of the scaled image.
      This tensor might have padding of values -1 indicating the invalid box
      coordinates.
    gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
      tensor might have paddings with values of -1 indicating the invalid
      classes.
    num_samples_per_image: a integer represents RoI minibatch size per image.
    mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes before
      sampling proposals.
    fg_fraction: a float represents the target fraction of RoI minibatch that is
      labeled foreground (i.e., class > 0).
    fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to be
      considered foreground (if >= fg_iou_thresh).
    bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI to
      be considered background (class = 0 if overlap in [LO, HI)).
    bg_iou_thresh_lo: a float represents the IoU overlap threshold for an RoI to
      be considered background (class = 0 if overlap in [LO, HI)).

  Returns:
    sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
      coordinates of the sampled RoIs, where K is the number of the sampled
      RoIs, i.e. K = num_samples_per_image.
    sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
      box coordinates of the matched groundtruth boxes of the samples RoIs.
    sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
      classes of the matched groundtruth boxes of the sampled RoIs.
    sampled_gt_indices: a tensor of shape of [batch_size, K], storing the
      indices of the sampled groudntruth boxes in the original `gt_boxes`
      tensor, i.e. gt_boxes[sampled_gt_indices[:, i]] = sampled_gt_boxes[:, i].
  """

  with tf.name_scope('sample_proposals'):
    if mix_gt_boxes:
      boxes = tf.concat([proposed_boxes, gt_boxes], axis=1)
    else:
      boxes = proposed_boxes

    (matched_gt_boxes, matched_gt_classes, matched_gt_indices, matched_iou,
     _) = box_matching(boxes, gt_boxes, gt_classes)

    positive_match = tf.greater(matched_iou, fg_iou_thresh)
    negative_match = tf.logical_and(
        tf.greater_equal(matched_iou, bg_iou_thresh_lo),
        tf.less(matched_iou, bg_iou_thresh_hi))
    ignored_match = tf.less(matched_iou, 0.0)

    # re-assign negatively matched boxes to the background class.
    matched_gt_classes = tf.where(negative_match,
                                  tf.zeros_like(matched_gt_classes),
                                  matched_gt_classes)
    matched_gt_indices = tf.where(negative_match,
                                  tf.zeros_like(matched_gt_indices),
                                  matched_gt_indices)

    sample_candidates = tf.logical_and(
        tf.logical_or(positive_match, negative_match),
        tf.logical_not(ignored_match))

    sampler = (
        balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
            positive_fraction=fg_fraction, is_static=True))

    batch_size, _ = sample_candidates.get_shape().as_list()
    sampled_indicators = []
    for i in range(batch_size):
      sampled_indicator = sampler.subsample(sample_candidates[i],
                                            num_samples_per_image,
                                            positive_match[i])
      sampled_indicators.append(sampled_indicator)
    sampled_indicators = tf.stack(sampled_indicators)
    _, sampled_indices = tf.nn.top_k(
        tf.cast(sampled_indicators, dtype=tf.int32),
        k=num_samples_per_image,
        sorted=True)

    sampled_indices_shape = tf.shape(sampled_indices)
    batch_indices = (
        tf.expand_dims(tf.range(sampled_indices_shape[0]), axis=-1) *
        tf.ones([1, sampled_indices_shape[-1]], dtype=tf.int32))
    gather_nd_indices = tf.stack([batch_indices, sampled_indices], axis=-1)

    sampled_rois = tf.gather_nd(boxes, gather_nd_indices)
    sampled_gt_boxes = tf.gather_nd(matched_gt_boxes, gather_nd_indices)
    sampled_gt_classes = tf.gather_nd(matched_gt_classes, gather_nd_indices)
    sampled_gt_indices = tf.gather_nd(matched_gt_indices, gather_nd_indices)

    return (sampled_rois, sampled_gt_boxes, sampled_gt_classes,
            sampled_gt_indices)
    def get_predictions_and_loss(self, tokens, context_word_emb, head_word_emb,
                                 lm_emb, char_index, text_len, speaker_ids,
                                 genre, is_training, gold_starts, gold_ends,
                                 cluster_ids):
        self.dropout = self.get_dropout(self.config["dropout_rate"],
                                        is_training)
        self.lexical_dropout = self.get_dropout(
            self.config["lexical_dropout_rate"], is_training)
        self.lstm_dropout = self.get_dropout(self.config["lstm_dropout_rate"],
                                             is_training)

        num_sentences = tf.shape(context_word_emb)[0]
        max_sentence_length = tf.shape(context_word_emb)[1]

        context_emb_list = [context_word_emb]
        head_emb_list = [head_word_emb]

        if self.config["char_embedding_size"] > 0:
            char_emb = tf.gather(
                tf.get_variable(
                    "char_embeddings",
                    [len(self.char_dict), self.config["char_embedding_size"]]),
                char_index
            )  # [num_sentences, max_sentence_length, max_word_length, emb]
            flattened_char_emb = tf.reshape(char_emb, [
                num_sentences * max_sentence_length,
                util.shape(char_emb, 2),
                util.shape(char_emb, 3)
            ])  # [num_sentences * max_sentence_length, max_word_length, emb]
            flattened_aggregated_char_emb = util.cnn(
                flattened_char_emb, self.config["filter_widths"],
                self.config["filter_size"]
            )  # [num_sentences * max_sentence_length, emb]
            aggregated_char_emb = tf.reshape(flattened_aggregated_char_emb, [
                num_sentences, max_sentence_length,
                util.shape(flattened_aggregated_char_emb, 1)
            ])  # [num_sentences, max_sentence_length, emb]
            context_emb_list.append(aggregated_char_emb)
            head_emb_list.append(aggregated_char_emb)

        if not self.lm_file:
            elmo_module = hub.Module("https://tfhub.dev/google/elmo/2")
            lm_embeddings = elmo_module(inputs={
                "tokens": tokens,
                "sequence_len": text_len
            },
                                        signature="tokens",
                                        as_dict=True)
            word_emb = lm_embeddings[
                "word_emb"]  # [num_sentences, max_sentence_length, 512]
            lm_emb = tf.stack([
                tf.concat([word_emb, word_emb], -1),
                lm_embeddings["lstm_outputs1"], lm_embeddings["lstm_outputs2"]
            ], -1)  # [num_sentences, max_sentence_length, 1024, 3]
        lm_emb_size = util.shape(lm_emb, 2)
        lm_num_layers = util.shape(lm_emb, 3)
        with tf.variable_scope("lm_aggregation"):
            self.lm_weights = tf.nn.softmax(
                tf.get_variable("lm_scores", [lm_num_layers],
                                initializer=tf.constant_initializer(0.0)))
            self.lm_scaling = tf.get_variable(
                "lm_scaling", [], initializer=tf.constant_initializer(1.0))
        flattened_lm_emb = tf.reshape(
            lm_emb,
            [num_sentences * max_sentence_length * lm_emb_size, lm_num_layers])
        flattened_aggregated_lm_emb = tf.matmul(
            flattened_lm_emb, tf.expand_dims(
                self.lm_weights,
                1))  # [num_sentences * max_sentence_length * emb, 1]
        aggregated_lm_emb = tf.reshape(
            flattened_aggregated_lm_emb,
            [num_sentences, max_sentence_length, lm_emb_size])
        aggregated_lm_emb *= self.lm_scaling
        context_emb_list.append(aggregated_lm_emb)

        context_emb = tf.concat(context_emb_list,
                                2)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.concat(head_emb_list,
                             2)  # [num_sentences, max_sentence_length, emb]
        context_emb = tf.nn.dropout(
            context_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]
        head_emb = tf.nn.dropout(
            head_emb,
            self.lexical_dropout)  # [num_sentences, max_sentence_length, emb]

        text_len_mask = tf.sequence_mask(
            text_len,
            maxlen=max_sentence_length)  # [num_sentence, max_sentence_length]

        context_outputs = self.lstm_contextualize(
            context_emb, text_len, text_len_mask)  # [num_words, emb]
        num_words = util.shape(context_outputs, 0)

        genre_emb = tf.gather(
            tf.get_variable("genre_embeddings",
                            [len(self.genres), self.config["feature_size"]]),
            genre)  # [emb]

        sentence_indices = tf.tile(
            tf.expand_dims(tf.range(num_sentences), 1),
            [1, max_sentence_length])  # [num_sentences, max_sentence_length]
        flattened_sentence_indices = self.flatten_emb_by_sentence(
            sentence_indices, text_len_mask)  # [num_words]
        flattened_head_emb = self.flatten_emb_by_sentence(
            head_emb, text_len_mask)  # [num_words]

        candidate_starts = tf.tile(
            tf.expand_dims(tf.range(num_words), 1),
            [1, self.max_span_width])  # [num_words, max_span_width]
        candidate_ends = candidate_starts + tf.expand_dims(
            tf.range(self.max_span_width), 0)  # [num_words, max_span_width]
        candidate_start_sentence_indices = tf.gather(
            flattened_sentence_indices,
            candidate_starts)  # [num_words, max_span_width]
        candidate_end_sentence_indices = tf.gather(
            flattened_sentence_indices,
            tf.minimum(candidate_ends,
                       num_words - 1))  # [num_words, max_span_width]
        candidate_mask = tf.logical_and(
            candidate_ends < num_words,
            tf.equal(
                candidate_start_sentence_indices,
                candidate_end_sentence_indices))  # [num_words, max_span_width]
        flattened_candidate_mask = tf.reshape(
            candidate_mask, [-1])  # [num_words * max_span_width]
        candidate_starts = tf.boolean_mask(
            tf.reshape(candidate_starts,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_ends = tf.boolean_mask(
            tf.reshape(candidate_ends,
                       [-1]), flattened_candidate_mask)  # [num_candidates]
        candidate_sentence_indices = tf.boolean_mask(
            tf.reshape(candidate_start_sentence_indices, [-1]),
            flattened_candidate_mask)  # [num_candidates]

        candidate_cluster_ids = self.get_candidate_labels(
            candidate_starts, candidate_ends, gold_starts, gold_ends,
            cluster_ids)  # [num_candidates]

        candidate_span_emb = self.get_span_emb(
            flattened_head_emb, context_outputs, candidate_starts,
            candidate_ends)  # [num_candidates, emb]
        candidate_mention_scores = self.get_mention_scores(
            candidate_span_emb)  # [k, 1]
        candidate_mention_scores = tf.squeeze(candidate_mention_scores,
                                              1)  # [k]

        k = tf.to_int32(
            tf.floor(
                tf.to_float(tf.shape(context_outputs)[0]) *
                self.config["top_span_ratio"]))
        top_span_indices = coref_ops.extract_spans(
            tf.expand_dims(candidate_mention_scores, 0),
            tf.expand_dims(candidate_starts, 0),
            tf.expand_dims(candidate_ends, 0), tf.expand_dims(k, 0),
            util.shape(context_outputs, 0), True)  # [1, k]
        top_span_indices.set_shape([1, None])
        top_span_indices = tf.squeeze(top_span_indices, 0)  # [k]

        top_span_starts = tf.gather(candidate_starts, top_span_indices)  # [k]
        top_span_ends = tf.gather(candidate_ends, top_span_indices)  # [k]
        top_span_emb = tf.gather(candidate_span_emb,
                                 top_span_indices)  # [k, emb]
        top_span_cluster_ids = tf.gather(candidate_cluster_ids,
                                         top_span_indices)  # [k]
        top_span_mention_scores = tf.gather(candidate_mention_scores,
                                            top_span_indices)  # [k]
        top_span_sentence_indices = tf.gather(candidate_sentence_indices,
                                              top_span_indices)  # [k]
        top_span_speaker_ids = tf.gather(speaker_ids, top_span_starts)  # [k]

        c = tf.minimum(self.config["max_top_antecedents"], k)

        if self.config["coarse_to_fine"]:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.coarse_to_fine_pruning(
                top_span_emb, top_span_mention_scores, c)
        else:
            top_antecedents, top_antecedents_mask, top_fast_antecedent_scores, top_antecedent_offsets = self.distance_pruning(
                top_span_emb, top_span_mention_scores, c)

        dummy_scores = tf.zeros([k, 1])  # [k, 1]
        for i in range(self.config["coref_depth"]):
            with tf.variable_scope("coref_layer", reuse=(i > 0)):
                top_antecedent_emb = tf.gather(top_span_emb,
                                               top_antecedents)  # [k, c, emb]
                top_antecedent_scores = top_fast_antecedent_scores + self.get_slow_antecedent_scores(
                    top_span_emb, top_antecedents, top_antecedent_emb,
                    top_antecedent_offsets, top_span_speaker_ids,
                    genre_emb)  # [k, c]
                top_antecedent_weights = tf.nn.softmax(
                    tf.concat([dummy_scores, top_antecedent_scores],
                              1))  # [k, c + 1]
                top_antecedent_emb = tf.concat(
                    [tf.expand_dims(top_span_emb, 1), top_antecedent_emb],
                    1)  # [k, c + 1, emb]
                attended_span_emb = tf.reduce_sum(
                    tf.expand_dims(top_antecedent_weights, 2) *
                    top_antecedent_emb, 1)  # [k, emb]
                with tf.variable_scope("f"):
                    f = tf.sigmoid(
                        util.projection(
                            tf.concat([top_span_emb, attended_span_emb], 1),
                            util.shape(top_span_emb, -1)))  # [k, emb]
                    top_span_emb = f * attended_span_emb + (
                        1 - f) * top_span_emb  # [k, emb]

        top_antecedent_scores = tf.concat(
            [dummy_scores, top_antecedent_scores], 1)  # [k, c + 1]

        top_antecedent_cluster_ids = tf.gather(top_span_cluster_ids,
                                               top_antecedents)  # [k, c]
        top_antecedent_cluster_ids += tf.to_int32(
            tf.log(tf.to_float(top_antecedents_mask)))  # [k, c]
        same_cluster_indicator = tf.equal(top_antecedent_cluster_ids,
                                          tf.expand_dims(
                                              top_span_cluster_ids,
                                              1))  # [k, c]
        non_dummy_indicator = tf.expand_dims(top_span_cluster_ids > 0,
                                             1)  # [k, 1]
        pairwise_labels = tf.logical_and(same_cluster_indicator,
                                         non_dummy_indicator)  # [k, c]
        dummy_labels = tf.logical_not(
            tf.reduce_any(pairwise_labels, 1, keepdims=True))  # [k, 1]
        top_antecedent_labels = tf.concat([dummy_labels, pairwise_labels],
                                          1)  # [k, c + 1]
        loss = self.softmax_loss(top_antecedent_scores,
                                 top_antecedent_labels)  # [k]
        loss = tf.reduce_sum(loss)  # []

        return [
            candidate_starts, candidate_ends, candidate_mention_scores,
            top_span_starts, top_span_ends, top_antecedents,
            top_antecedent_scores
        ], loss
Beispiel #49
0
def ssd_losses(logits,
               localisations,
               gclasses,
               glocalisations,
               gscores,
               match_threshold=0.5,
               negative_ratio=3.,
               alpha=1.,
               label_smoothing=0.,
               scope=None):
    """Loss functions for training the SSD 300 VGG network.

    This function defines the different loss components of the SSD, and
    adds them to the TF loss collection.

    Arguments:
      logits: (list of) predictions logits Tensors;
      localisations: (list of) localisations Tensors;
      gclasses: (list of) groundtruth labels Tensors;
      glocalisations: (list of) groundtruth localisations Tensors;
      gscores: (list of) groundtruth score Tensors;
    """
    with tf.name_scope(scope, 'ssd_losses'):
        l_cross_pos = []
        l_cross_neg = []
        l_loc = []
        for i in range(len(logits)):
            dtype = logits[i].dtype
            with tf.name_scope('block_%i' % i):
                # Determine weights Tensor.
                pmask = gscores[
                    i] > match_threshold  # treat as positive (matched) if score is greater than some threshold !!!
                fpmask = tf.cast(pmask, dtype)
                n_positives = tf.reduce_sum(fpmask)

                # Select some random negative entries.
                # n_entries = np.prod(gclasses[i].get_shape().as_list())
                # r_positive = n_positives / n_entries
                # r_negative = negative_ratio * n_positives / (n_entries - n_positives)

                # Negative mask.
                no_classes = tf.cast(pmask, tf.int32)
                predictions = slim.softmax(logits[i])
                nmask = tf.logical_and(
                    tf.logical_not(pmask),  # treat rest as negative
                    gscores[i] > -0.5)
                fnmask = tf.cast(nmask, dtype)
                nvalues = tf.where(nmask, predictions[:, :, :, :, 0],
                                   1. - fnmask)
                nvalues_flat = tf.reshape(nvalues, [-1])
                # Number of negative entries to select.
                n_neg = tf.cast(negative_ratio * n_positives, tf.int32)
                n_neg = tf.maximum(n_neg, tf.size(nvalues_flat) // 8)
                n_neg = tf.maximum(n_neg, tf.shape(nvalues)[0] * 4)
                max_neg_entries = 1 + tf.cast(tf.reduce_sum(fnmask), tf.int32)
                n_neg = tf.minimum(n_neg, max_neg_entries)

                val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
                minval = val[-1]
                # Final negative mask.
                nmask = tf.logical_and(nmask, -nvalues > minval)
                fnmask = tf.cast(nmask, dtype)

                # Add cross-entropy loss.
                with tf.name_scope('cross_entropy_pos'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits[i], labels=gclasses[i])
                    loss = tf.losses.compute_weighted_loss(
                        loss,
                        fpmask)  # use positive mask for cross entropy positive
                    l_cross_pos.append(loss)  # positive cross entropy loss

                with tf.name_scope('cross_entropy_neg'):
                    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits[i], labels=no_classes)
                    loss = tf.losses.compute_weighted_loss(
                        loss,
                        fnmask)  # use negative mask for cross entropy negative
                    l_cross_neg.append(loss)  # negative cross entropy loss

                # Add localization loss: smooth L1, L2, ...
                with tf.name_scope('localization'):
                    # Weights Tensor: positive mask + random negative.
                    weights = tf.expand_dims(
                        alpha * fpmask, axis=-1
                    )  # alpha is just 1 here ... (see p.5 of paper end of paragraph "Training objective")
                    loss = custom_layers.abs_smooth(
                        localisations[i] - glocalisations[i]
                    )  # smooth L1 oss (see eq 2 on p.5 of paper !)
                    loss = tf.losses.compute_weighted_loss(loss, weights)
                    l_loc.append(loss)  # localization loss

        # Additional total losses...
        with tf.name_scope('total'):
            total_cross_pos = tf.add_n(l_cross_pos, 'cross_entropy_pos')
            total_cross_neg = tf.add_n(l_cross_neg, 'cross_entropy_neg')
            total_cross = tf.add(
                total_cross_pos, total_cross_neg, 'cross_entropy'
            )  # add positive and negative cross entropies to get total cross entropy
            total_loc = tf.add_n(l_loc, 'localization')  # localiation loss

            # Add to EXTRA LOSSES TF.collection
            tf.add_to_collection('EXTRA_LOSSES', total_cross_pos)
            tf.add_to_collection('EXTRA_LOSSES', total_cross_neg)
            tf.add_to_collection('EXTRA_LOSSES', total_cross)
            tf.add_to_collection('EXTRA_LOSSES', total_loc)
Beispiel #50
0
class Dummy:
    pass


env = Dummy()

with tf.variable_scope('model'):
    env.x = tf.placeholder(tf.float32, (None, img_size, img_size, img_chan),
                           name='x')
    env.y = tf.placeholder(tf.float32, (None, 1), name='y')
    env.training = tf.placeholder_with_default(False, (), name='mode')

    env.ybar = model(env.x, training=env.training)

    with tf.variable_scope('acc'):
        count = tf.logical_not(
            tf.logical_xor(tf.greater(env.y, 0.0), tf.greater(env.ybar, 0.0)))
        env.acc = tf.reduce_mean(tf.cast(count, tf.float32), name='acc')

    env.loss = tf.losses.mean_squared_error(labels=env.y,
                                            predictions=env.ybar,
                                            scope='loss')

    with tf.variable_scope('train_op'):
        optimizer = tf.train.AdamOptimizer()
        env.train_op = optimizer.minimize(env.loss)

    env.saver = tf.train.Saver()

with tf.variable_scope('model', reuse=True):
    env.adv_epochs = tf.placeholder(tf.int32, (), name='adv_epochs')
    env.xadv = deepfool(model, env.x, epochs=env.adv_epochs, batch=True)
Beispiel #51
0
    def adaptive_search(self, inputs, closed, last_beam_size, beam_size,
                        natural_order_tokens, natural_order_pos, **kwargs):
        """A function that implements a forward pass and updates the decoding
        partial sequence using a beam search

        Arguments:

        inputs: Dataclass
            a dataclass that stores partial decoding information that will
            be mutated by this layer during decoding
        closed: tf.Tensor
            a boolean tensor where true values indicate that a beam has
            finished decoding and should not be modified
        last_beam_size: int
            the number of beams that were expanded by the last layer in an
            autoregressive model
        beam_size: int
            the number of beams to be expanded by this layer in an
            autoregressive model
        natural_order_tokens: tf.Tensor
            a batch of sequences representing the generation index of tokens
            in natural order that are yet to be decoded.
        natural_order_pos: tf.Tensor
            a batch of sequences representing the word ids of tokens
            in natural order that are yet to be decoded.

        Returns:

        decoding: Dataclass
            a dataclass that stores partial decoding information that will
            be mutated by this layer during decoding
        closed: tf.Tensor
            a boolean tensor where true values indicate that a beam has
            finished decoding and should not be modified
        beam_size: int
            the number of beams to be expanded by this layer in an
            autoregressive model
        natural_order: tf.Tensor
            a batch of sequences representing the words in natural order
            that are yet to be decoded."""

        # unpack all the requires model inputs, some might be empty tensors:
        [
            queries, values, queries_mask, values_mask, ids, permutation,
            absolute_positions, relative_positions, pointer_labels,
            logits_labels, partial_pos, pointer_probs, log_probs,
            object_detections, object_features, object_boxes
        ] = inputs

        # compute a distribution over tokens
        logits = self.logits_before_softmax(queries, **kwargs)[:, -1]

        # calculate a mask over the vocab
        mask = tf.reduce_sum(tf.one_hot(natural_order_tokens,
                                        tf.shape(logits)[1],
                                        axis=2),
                             axis=1)

        # make sure the mask is clipped to be 0.0 or 1.0
        mask = tf.clip_by_value(mask, 0.0, 1.0)

        # true if the mask contains <unk> or any word
        is_token = tf.logical_or(
            tf.equal(mask[:, 1], 1),
            tf.reduce_any(tf.equal(mask[:, 4:], 1), axis=1))

        # true if the mask does not contain any words, and contains <end>
        is_end = tf.logical_and(tf.logical_not(is_token),
                                tf.equal(mask[:, 3], 1))

        # a mask that contains only the <end> token
        end_mask = tf.one_hot(tf.fill([tf.shape(logits)[0]], 3),
                              tf.shape(logits)[1],
                              axis=1)

        # a mask that contains only the <pad> token
        pad_mask = tf.one_hot(tf.fill([tf.shape(logits)[0]], 0),
                              tf.shape(logits)[1],
                              axis=1)

        # a mask that contains only words
        token_mask = tf.clip_by_value(mask - end_mask - pad_mask, 0.0, 1.0)

        # create a batch of different masks
        mask = tf.where(
            is_end[:, tf.newaxis], end_mask,
            tf.where(is_token[:, tf.newaxis], token_mask, pad_mask))

        # convert the masks into offsets for the softmax op: 0 -> -\infty
        offset = (1.0 - mask) * 999999.0
        logits = tf.math.log_softmax(logits - offset)
        batch_size = tf.shape(logits)[0] // last_beam_size

        # sample the top beam_size candidates
        _log_probs, _ids = tf.math.top_k(logits, k=beam_size)

        # when a beam is closed all candidates are the same
        # this prevents the same candidates from being sampled twice
        first = tf.one_hot(tf.fill(tf.shape(_log_probs)[:1], 0), beam_size)
        closed_log_probs = tf.where(tf.equal(first, 0),
                                    tf.fill(tf.shape(first), -999999.),
                                    tf.fill(tf.shape(first), 0.))

        # when a beam is closed special behavior is required
        # do not change the log probability and append only pad tokens
        mask = closed[:, tf.newaxis]
        _log_probs = tf.where(mask, closed_log_probs, _log_probs)
        _ids = tf.where(mask, tf.zeros_like(_ids), _ids)

        # manipulate the log probabilities to extract all possible
        # next beam candidates and their probability
        _log_probs = tf.reshape(_log_probs,
                                [batch_size, last_beam_size, beam_size])
        _log_probs = tf.reshape(log_probs,
                                [batch_size, last_beam_size, 1]) + _log_probs
        _log_probs = tf.reshape(_log_probs,
                                [batch_size, last_beam_size * beam_size])

        # select the top beam_size candidates
        _log_probs, beam_ids = tf.math.top_k(_log_probs, k=beam_size)

        # these indices may be a bit subtle; they work as follows
        # the last dim has last_beam_size * beam_size elements
        # the first beam_size elements represent candidate proposals
        # from a single original beam
        old_beam_ids = tf.math.floordiv(beam_ids, beam_size)

        # select the ids based on their beams that are from the beams with
        # highest log probability
        _ids = tf.reshape(_ids, [batch_size, last_beam_size * beam_size])
        _ids = tf.gather(_ids, beam_ids, batch_dims=1)
        _ids = tf.reshape(_ids, [batch_size * beam_size, 1])

        # this function helps select the hidden activations from
        # inputs that correspond to old selected beams
        # this is necessary because future layers may depend on activations
        # that are a function of which beam was selected
        def select(x):
            if x is None:
                return x
            shape = tf.shape(x)[1:]
            s0 = tf.concat([[batch_size, last_beam_size], shape], axis=0)
            s1 = tf.concat([[batch_size * beam_size], shape], axis=0)
            return tf.reshape(
                tf.gather(tf.reshape(x, s0), old_beam_ids, batch_dims=1), s1)

        # select which old beams are propagated forward
        # this is necessary because some beams have content-aware state
        queries = select(queries)
        values = select(values)
        queries_mask = select(queries_mask)
        values_mask = select(values_mask)
        ids = select(ids)
        permutation = select(permutation)
        absolute_positions = select(absolute_positions)
        relative_positions = select(relative_positions)
        partial_pos = select(partial_pos)
        pointer_labels = select(pointer_labels)
        logits_labels = select(logits_labels)
        closed = select(closed)
        natural_order_tokens = select(natural_order_tokens)
        natural_order_pos = select(natural_order_pos)

        # TODO: Brandon -> handle the image features as well.
        object_detections = select(object_detections)
        object_features = select(object_features)
        object_boxes = select(object_boxes)

        # concatenate the sampled tokens to the beam and prepare the
        # model outputs for the next layer; also compute if we
        # has finished decoding by predicting the end token
        ids = tf.concat([ids, _ids], 1)
        log_probs = tf.reshape(_log_probs, [batch_size * beam_size])
        return ([
            queries, values, queries_mask, values_mask, ids, permutation,
            absolute_positions, relative_positions, pointer_labels,
            logits_labels, partial_pos, pointer_probs, log_probs,
            object_detections, object_features, object_boxes
        ], tf.logical_or(closed, tf.equal(_ids[:, 0], 3)), beam_size,
                natural_order_tokens, natural_order_pos)
Beispiel #52
0
  def _calc_oicr_loss(self,
                      labels,
                      num_proposals,
                      proposals,
                      scores_0,
                      scores_1,
                      scope,
                      iou_threshold=0.5):
    """Calculates the OICR loss at refinement stage `i`.

    Args:
      labels: A [batch, num_classes] float tensor.
      num_proposals: A [batch] int tensor.
      proposals: A [batch, max_num_proposals, 4] float tensor.
      scores_0: A [batch, max_num_proposal, 1 + num_classes] float tensor, 
        representing the proposal score at `k-th` refinement.
      scores_1: A [batch, max_num_proposal, 1 + num_classes] float tensor,
        representing the proposal score at `(k+1)-th` refinement.

    Returns:
      oicr_cross_entropy_loss: a scalar float tensor.
    """
    with tf.name_scope(scope):
      (batch, max_num_proposals,
       num_classes_plus_one) = utils.get_tensor_shape(scores_0)
      num_classes = num_classes_plus_one - 1

      # For each class, look for the most confident proposal.
      #   proposal_ind shape = [batch, num_classes].

      proposal_mask = tf.sequence_mask(
          num_proposals, maxlen=max_num_proposals, dtype=tf.float32)
      proposal_ind = utils.masked_argmax(
          tf.nn.softmax(scores_0, axis=-1)[:, :, 1:],
          tf.expand_dims(proposal_mask, axis=-1),
          dim=1)

      # Deal with the most confident proposal per each class.
      #   Unstack the `proposal_ind`, `labels`.
      #   proposal_labels shape = [batch, max_num_proposals, num_classes].

      proposal_labels = []
      indices_0 = tf.range(batch, dtype=tf.int64)
      for indices_1, label_per_class in zip(
          tf.unstack(proposal_ind, axis=-1), tf.unstack(labels, axis=-1)):

        # Gather the most confident proposal for the class.
        #   confident_proosal shape = [batch, 4].

        indices = tf.stack([indices_0, indices_1], axis=-1)
        confident_proposal = tf.gather_nd(proposals, indices)

        # Get the Iou from all the proposals to the most confident proposal.
        #   iou shape = [batch, max_num_proposals].

        confident_proposal_tiled = tf.tile(
            tf.expand_dims(confident_proposal, axis=1),
            [1, max_num_proposals, 1])
        iou = box_utils.iou(
            tf.reshape(proposals, [-1, 4]),
            tf.reshape(confident_proposal_tiled, [-1, 4]))
        iou = tf.reshape(iou, [batch, max_num_proposals])

        # Filter out irrelevant predictions using image-level label.

        target = tf.to_float(tf.greater_equal(iou, iou_threshold))
        target = tf.where(
            label_per_class > 0, x=target, y=tf.zeros_like(target))
        proposal_labels.append(target)

      proposal_labels = tf.stack(proposal_labels, axis=-1)

      # Add background targets, and normalize the sum value to 1.0.
      #   proposal_labels shape = [batch, max_num_proposals, 1 + num_classes].

      bkg = tf.logical_not(tf.reduce_sum(proposal_labels, axis=-1) > 0)
      proposal_labels = tf.concat(
          [tf.expand_dims(tf.to_float(bkg), axis=-1), proposal_labels], axis=-1)

      proposal_labels = tf.div(
          proposal_labels, tf.reduce_sum(
              proposal_labels, axis=-1, keepdims=True))

      assert_op = tf.Assert(
          tf.reduce_all(
              tf.abs(tf.reduce_sum(proposal_labels, axis=-1) - 1) < 1e-6),
          ["Probabilities not sum to ONE", proposal_labels])

      # Compute the loss.

      with tf.control_dependencies([assert_op]):
        losses = tf.nn.softmax_cross_entropy_with_logits(
            labels=tf.stop_gradient(proposal_labels), logits=scores_1)
        oicr_cross_entropy_loss = tf.reduce_mean(
            utils.masked_avg(data=losses, mask=proposal_mask, dim=1))

    return oicr_cross_entropy_loss
Beispiel #53
0
 def loop_cond(i, decodes_BxT, unused_cache_BxU_dict):
   finished_B = tf.reduce_any(tf.equal(decodes_BxT, eos_id), axis=1)
   return tf.logical_and(i < max_decode_len,
                         tf.logical_not(tf.reduce_all(finished_B)))
Beispiel #54
0
 def __invert__(self):
     return tf.logical_not(self)
Beispiel #55
0
def _fp(y_true, y_pred, typecast='float32'):
    bad_preds = K.cast(tf.logical_not(K.equal(y_true, y_pred)), typecast)
    false_pos = K.cast(K.sum(bad_preds * K.cast(K.equal(y_true, 0), typecast)),
                       typecast)
    return false_pos
def main(logdir='./logs/cla'):
    data_batch, label_batch = read_batch(
        './ISBI2016_ISIC_Part3B_Training_Data_tight_cropped',
        './ISBI2016_ISIC_Part3B_Training_GroundTruth.csv', batch_size)
    data, label = preprocess(data_batch, label_batch)

    result, pretrained_saver, keep_prob = model(data)

    with tf.name_scope('softmax_with_loss'):
        cross_entropy = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=result,
                                                    labels=label,
                                                    dim=1))

    # train_op = optimize_with_two_lr(
    #    tf.train.AdamOptimizer, cross_entropy,
    #    var_list1, var_list2, 0.001, 0.0001)
    train_op = tf.train.AdamOptimizer(0.0001).minimize(cross_entropy)

    with tf.name_scope('evaluation'):
        prediction = tf.cast(tf.argmax(result, 1), tf.bool)
        ground_truth = tf.cast(tf.argmax(label, 1), tf.bool)
        TP = tf.reduce_sum(
            tf.cast(tf.logical_and(prediction, ground_truth), tf.int32))
        TN = tf.reduce_sum(
            tf.cast(tf.logical_not(tf.logical_or(prediction, ground_truth)),
                    tf.int32))
        FP = tf.reduce_sum(
            tf.cast(tf.logical_and(prediction, tf.logical_not(ground_truth)),
                    tf.int32))
        FN = tf.reduce_sum(
            tf.cast(tf.logical_and(tf.logical_not(prediction), ground_truth),
                    tf.int32))

    summary_writer = tf.summary.FileWriter(logdir, tf.get_default_graph())
    saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=1)

    init = tf.global_variables_initializer()
    with tf.Session() as sess:
        sess.run(init)
        pretrained_saver.restore(sess, './ResNet-L50.ckpt')
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        tp = 0
        tn = 0
        fp = 0
        fn = 0
        loss = 0
        for i in range(epochs):
            for j in range(900 / batch_size):
                TP_step, TN_step, FP_step, FN_step, loss_step, _ = sess.run(
                    [TP, TN, FP, FN, cross_entropy, train_op],
                    feed_dict={keep_prob: 0.5})
                tp += TP_step
                tn += TN_step
                fp += FP_step
                fn += FN_step
                loss += loss_step

                if j % 25 == 24:
                    acc = (tp + tn) / float(tp + tn + fp + fn)
                    se = tp / float(tp + fn)
                    sp = tn / float(tn + fp)
                    loss /= 25.0
                    my_summary = tf.Summary(value=[
                        tf.Summary.Value(tag="accuracy", simple_value=acc),
                        tf.Summary.Value(tag="sensitivity", simple_value=se),
                        tf.Summary.Value(tag="specificity", simple_value=sp),
                        tf.Summary.Value(tag="loss", simple_value=loss),
                    ])
                    summary_writer.add_summary(my_summary,
                                               i * 900 / batch_size + j)
                    print 'epoch', i + 1, 'batch', j + 1
                    print 'accuracy', acc, 'cross_entropy', loss
                    acc = 0
                    loss = 0
            saver.save(sess,
                       'ckpts/cla/cla',
                       global_step=(i + 1) * 900 / batch_size)

        coord.request_stop()
        coord.join(threads)
Beispiel #57
0
def batch_hard(dists, pids, margin, batch_precision_at_k=None):
    """Computes the batch-hard loss from arxiv.org/abs/1703.07737.

    Args:
        dists (2D tensor): A square all-to-all distance matrix as given by cdist.
        pids (1D tensor): The identities of the entries in `batch`, shape (B,).
            This can be of any type that can be compared, thus also a string.
        margin: The value of the margin if a number, alternatively the string
            'soft' for using the soft-margin formulation, or `None` for not
            using a margin at all.

    Returns:
        A 1D tensor of shape (B,) containing the loss value for each sample.
    """
    with tf.name_scope("batch_hard"):
        same_identity_mask = tf.equal(tf.expand_dims(pids, axis=1),
                                      tf.expand_dims(pids, axis=0))
        negative_mask = tf.logical_not(same_identity_mask)
        positive_mask = tf.logical_xor(same_identity_mask,
                                       tf.eye(tf.shape(pids)[0], dtype=tf.bool))

        furthest_positive = tf.reduce_max(dists*tf.cast(positive_mask, tf.float32), axis=1)
        closest_negative = tf.map_fn(lambda x: tf.reduce_min(tf.boolean_mask(x[0], x[1])),
                                    (dists, negative_mask), tf.float32)
        # Another way of achieving the same, though more hacky:
        # closest_negative = tf.reduce_min(dists + 1e5*tf.cast(same_identity_mask, tf.float32), axis=1)

        diff = furthest_positive - closest_negative
        if isinstance(margin, numbers.Real):
            diff = tf.maximum(diff + margin, 0.0)
        elif margin == 'soft':
            diff = tf.nn.softplus(diff)
        elif margin.lower() == 'none':
            pass
        else:
            raise NotImplementedError(
                'The margin {} is not implemented in batch_hard'.format(margin))

    if batch_precision_at_k is None:
        return diff

    # For monitoring, compute the within-batch top-1 accuracy and the
    # within-batch precision-at-k, which is somewhat more expressive.
    with tf.name_scope("monitoring"):
        # This is like argsort along the last axis. Add one to K as we'll
        # drop the diagonal.
        _, indices = tf.nn.top_k(-dists, k=batch_precision_at_k+1)

        # Drop the diagonal (distance to self is always least).
        indices = indices[:,1:]

        # Generate the index indexing into the batch dimension.
        # This is simething like [[0,0,0],[1,1,1],...,[B,B,B]]
        batch_index = tf.tile(
            tf.expand_dims(tf.range(tf.shape(indices)[0]), 1),
            (1, tf.shape(indices)[1]))

        # Stitch the above together with the argsort indices to get the
        # indices of the top-k of each row.
        topk_indices = tf.stack((batch_index, indices), -1)

        # See if the topk belong to the same person as they should, or not.
        topk_is_same = tf.gather_nd(same_identity_mask, topk_indices)

        # All of the above could be reduced to the simpler following if k==1
        #top1_is_same = get_at_indices(same_identity_mask, top_idxs[:,1])

        topk_is_same_f32 = tf.cast(topk_is_same, tf.float32)
        top1 = tf.reduce_mean(topk_is_same_f32[:,0])
        prec_at_k = tf.reduce_mean(topk_is_same_f32)

        # Finally, let's get some more info that can help in debugging while
        # we're at it!
        negative_dists = tf.boolean_mask(dists, negative_mask)
        positive_dists = tf.boolean_mask(dists, positive_mask)

        return diff, top1, prec_at_k, topk_is_same, negative_dists, positive_dists
Beispiel #58
0
def _fn(y_true, y_pred, typecast='float32'):
    bad_preds = K.cast(tf.logical_not(K.equal(y_true, y_pred)), typecast)
    false_neg = K.cast(K.sum(bad_preds * y_true), typecast)
    return false_neg
Beispiel #59
0
    def connect_data_and_network(self,
                                 outputs_collector=None,
                                 gradients_collector=None):

        print('connect data and network')

        def switch_sampler(for_training):
            with tf.name_scope('train' if for_training else 'validation'):
                sampler = self.get_sampler()[0][0 if for_training else -1]
                return sampler.pop_batch_op()

        def mixup_switch_sampler(for_training):
            # get first set of samples
            d_dict = switch_sampler(for_training=for_training)

            mix_fields = ('image', 'weight', 'label')

            if not for_training:
                with tf.name_scope('nomix'):
                    # ensure label is appropriate for dense loss functions
                    ground_truth = tf.cast(d_dict['label'], tf.int32)
                    one_hot = tf.one_hot(
                        tf.squeeze(ground_truth, axis=-1),
                        depth=self.segmentation_param.num_classes)
                    d_dict['label'] = one_hot
            else:
                with tf.name_scope('mixup'):
                    # get the mixing parameter from the Beta distribution
                    alpha = self.segmentation_param.mixup_alpha
                    beta = tf.distributions.Beta(alpha,
                                                 alpha)  # 1, 1: uniform:
                    rand_frac = beta.sample()

                    # get another minibatch
                    d_dict_to_mix = switch_sampler(for_training=True)

                    # look at binarised labels: sort them
                    if self.segmentation_param.mix_match:
                        # sum up the positive labels to sort by their volumes
                        inds1 = tf.argsort(
                            tf.map_fn(tf.reduce_sum,
                                      tf.cast(d_dict['label'], tf.int64)))
                        inds2 = tf.argsort(
                            tf.map_fn(
                                tf.reduce_sum,
                                tf.cast(d_dict_to_mix['label'] > 0, tf.int64)))
                        for field in [
                                field for field in mix_fields
                                if field in d_dict
                        ]:
                            d_dict[field] = tf.gather(d_dict[field],
                                                      indices=inds1)
                            # note: sorted for opposite directions for d_dict_to_mix
                            d_dict_to_mix[field] = tf.gather(
                                d_dict_to_mix[field], indices=inds2[::-1])

                    # making the labels dense and one-hot
                    for d in (d_dict, d_dict_to_mix):
                        ground_truth = tf.cast(d['label'], tf.int32)
                        one_hot = tf.one_hot(
                            tf.squeeze(ground_truth, axis=-1),
                            depth=self.segmentation_param.num_classes)
                        d['label'] = one_hot

                    # do the mixing for any fields that are relevant and present
                    mixed_up = {
                        field: d_dict[field] * rand_frac +
                        d_dict_to_mix[field] * (1 - rand_frac)
                        for field in mix_fields if field in d_dict
                    }
                    # reassign all relevant values in d_dict
                    d_dict.update(mixed_up)

            return d_dict

        if self.is_training:
            if not self.segmentation_param.do_mixup:
                data_dict = tf.cond(tf.logical_not(self.is_validation),
                                    lambda: switch_sampler(for_training=True),
                                    lambda: switch_sampler(for_training=False))
            else:
                # mix up the samples if not in validation phase
                data_dict = tf.cond(
                    tf.logical_not(self.is_validation),
                    lambda: mixup_switch_sampler(for_training=True),
                    lambda: mixup_switch_sampler(for_training=False
                                                 ))  # don't mix the validation

            image = tf.cast(data_dict['image'], tf.float32)
            net_args = {
                'is_training': self.is_training,
                'keep_prob': self.net_param.keep_prob
            }
            net_out = self.net(image, **net_args)

            with tf.name_scope('Optimiser'):
                optimiser_class = OptimiserFactory.create(
                    name=self.action_param.optimiser)
                self.optimiser = optimiser_class.get_instance(
                    learning_rate=self.action_param.lr)
            loss_func = LossFunction(
                n_class=self.segmentation_param.num_classes,
                loss_type=self.action_param.loss_type,
                softmax=self.segmentation_param.softmax)
            data_loss = loss_func(prediction=net_out,
                                  ground_truth=data_dict.get('label', None),
                                  weight_map=data_dict.get('weight', None))
            reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            if self.net_param.decay > 0.0 and reg_losses:
                reg_loss = tf.reduce_mean(
                    [tf.reduce_mean(reg_loss) for reg_loss in reg_losses])
                loss = data_loss + reg_loss
            else:
                loss = data_loss

            # Get all vars
            to_optimise = tf.trainable_variables()
            vars_to_freeze = \
                self.action_param.vars_to_freeze or \
                self.action_param.vars_to_restore
            if vars_to_freeze:
                import re
                var_regex = re.compile(vars_to_freeze)
                # Only optimise vars that are not frozen
                to_optimise = \
                    [v for v in to_optimise if not var_regex.search(v.name)]
                tf.logging.info(
                    "Optimizing %d out of %d trainable variables, "
                    "the other variables fixed (--vars_to_freeze %s)",
                    len(to_optimise), len(tf.trainable_variables()),
                    vars_to_freeze)

            grads = self.optimiser.compute_gradients(
                loss, var_list=to_optimise, colocate_gradients_with_ops=True)

            self.total_loss = loss

            # collecting gradients variables
            gradients_collector.add_to_collection([grads])

            # collecting output variables
            outputs_collector.add_to_collection(var=self.total_loss,
                                                name='total_loss',
                                                average_over_devices=True,
                                                collection=CONSOLE)
            outputs_collector.add_to_collection(var=self.total_loss,
                                                name='total_loss',
                                                average_over_devices=True,
                                                summary_type='scalar',
                                                collection=TF_SUMMARIES)
            outputs_collector.add_to_collection(var=data_loss,
                                                name='loss',
                                                average_over_devices=False,
                                                collection=CONSOLE)
            outputs_collector.add_to_collection(var=data_loss,
                                                name='loss',
                                                average_over_devices=True,
                                                summary_type='scalar',
                                                collection=TF_SUMMARIES)

            # outputs_collector.add_to_collection(
            #    var=image*180.0, name='image',
            #    average_over_devices=False, summary_type='image3_sagittal',
            #    collection=TF_SUMMARIES)

            # outputs_collector.add_to_collection(
            #    var=image, name='image',
            #    average_over_devices=False,
            #    collection=NETWORK_OUTPUT)

            # outputs_collector.add_to_collection(
            #    var=tf.reduce_mean(image), name='mean_image',
            #    average_over_devices=False, summary_type='scalar',
            #    collection=CONSOLE)
        elif self.is_inference:
            # converting logits into final output for
            # classification probabilities or argmax classification labels
            data_dict = switch_sampler(for_training=False)
            image = tf.cast(data_dict['image'], tf.float32)
            net_args = {
                'is_training': self.is_training,
                'keep_prob': self.net_param.keep_prob
            }
            net_out = self.net(image, **net_args)

            output_prob = self.segmentation_param.output_prob
            num_classes = self.segmentation_param.num_classes
            if output_prob and num_classes > 1:
                post_process_layer = PostProcessingLayer(
                    'SOFTMAX', num_classes=num_classes)
            elif not output_prob and num_classes > 1:
                post_process_layer = PostProcessingLayer(
                    'ARGMAX', num_classes=num_classes)
            else:
                post_process_layer = PostProcessingLayer(
                    'IDENTITY', num_classes=num_classes)
            net_out = post_process_layer(net_out)

            outputs_collector.add_to_collection(var=net_out,
                                                name='window',
                                                average_over_devices=False,
                                                collection=NETWORK_OUTPUT)
            outputs_collector.add_to_collection(
                var=data_dict['image_location'],
                name='location',
                average_over_devices=False,
                collection=NETWORK_OUTPUT)
            self.initialise_aggregator()
        elif self.is_export:
            data_dict = switch_sampler(for_training=False)
            output_prob = self.segmentation_param.output_prob
            num_classes = self.segmentation_param.num_classes
            image = tf.cast(data_dict['image'], tf.float32)
            net_args = {
                'is_training': self.is_training,
                'keep_prob': self.net_param.keep_prob
            }

            net_out = self.net(image, **net_args)
            post_process_layer = PostProcessingLayer('SOFTMAX',
                                                     num_classes=num_classes)
            net_out = post_process_layer(net_out)
            self.initialise_aggregator()
Beispiel #60
0
    def __init__(self, dataset, config):
        flat_inputs = dataset.flat_inputs
        self.config = config
        # Path of the result folder
        if self.config.saving:
            if self.config.saving_path is None:
                self.saving_path = time.strftime(
                    'results/Log_%Y-%m-%d_%H-%M-%S', time.gmtime())
            else:
                self.saving_path = self.config.saving_path
            makedirs(
                self.saving_path) if not exists(self.saving_path) else None

        with tf.variable_scope('inputs'):
            self.inputs = dict()
            num_layers = self.config.num_layers
            self.inputs['xyz'] = flat_inputs[:num_layers]
            self.inputs['neigh_idx'] = flat_inputs[num_layers:2 * num_layers]
            self.inputs['sub_idx'] = flat_inputs[2 * num_layers:3 * num_layers]
            self.inputs['interp_idx'] = flat_inputs[3 * num_layers:4 *
                                                    num_layers]
            self.inputs['features'] = flat_inputs[4 * num_layers]
            self.inputs['labels'] = flat_inputs[4 * num_layers + 1]
            self.inputs['input_inds'] = flat_inputs[4 * num_layers + 2]
            self.inputs['cloud_inds'] = flat_inputs[4 * num_layers + 3]

            self.labels = self.inputs['labels']
            self.is_training = tf.placeholder(tf.bool, shape=())
            self.training_step = 1
            self.training_epoch = 0
            self.correct_prediction = 0
            self.accuracy = 0
            self.mIou_list = [0]
            self.class_weights = DP.get_class_weights(dataset.name)
            self.Log_file = open(
                'log_train_' + dataset.name + '_' + str(dataset.val_split) +
                time.strftime('_%Y-%m-%d_%H-%M-%S.txt', time.gmtime()), 'a')

        with tf.variable_scope('layers'):
            self.logits = self.inference(self.inputs, self.is_training)

        #####################################################################
        # Ignore the invalid point (unlabeled) when calculating the loss #
        #####################################################################
        with tf.variable_scope('loss'):
            self.logits = tf.reshape(self.logits, [-1, config.num_classes])
            self.labels = tf.reshape(self.labels, [-1])

            # Boolean mask of points that should be ignored
            ignored_bool = tf.zeros_like(self.labels, dtype=tf.bool)
            for ign_label in self.config.ignored_label_inds:
                ignored_bool = tf.logical_or(ignored_bool,
                                             tf.equal(self.labels, ign_label))

            # Collect logits and labels that are not ignored
            valid_idx = tf.squeeze(tf.where(tf.logical_not(ignored_bool)))
            valid_logits = tf.gather(self.logits, valid_idx, axis=0)
            valid_labels_init = tf.gather(self.labels, valid_idx, axis=0)

            # Reduce label values in the range of logit shape
            reducing_list = tf.range(self.config.num_classes, dtype=tf.int32)
            inserted_value = tf.zeros((1, ), dtype=tf.int32)
            for ign_label in self.config.ignored_label_inds:
                reducing_list = tf.concat([
                    reducing_list[:ign_label], inserted_value,
                    reducing_list[ign_label:]
                ], 0)
            valid_labels = tf.gather(reducing_list, valid_labels_init)

            self.loss = self.get_loss(valid_logits, valid_labels,
                                      self.class_weights)

        with tf.variable_scope('optimizer'):
            self.learning_rate = tf.Variable(config.learning_rate,
                                             trainable=False,
                                             name='learning_rate')
            self.train_op = tf.train.AdamOptimizer(
                self.learning_rate).minimize(self.loss)
            self.extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

        with tf.variable_scope('results'):
            self.correct_prediction = tf.nn.in_top_k(valid_logits,
                                                     valid_labels, 1)
            self.accuracy = tf.reduce_mean(
                tf.cast(self.correct_prediction, tf.float32))
            self.prob_logits = tf.nn.softmax(self.logits)

            tf.summary.scalar('learning_rate', self.learning_rate)
            tf.summary.scalar('loss', self.loss)
            tf.summary.scalar('accuracy', self.accuracy)

        my_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
        self.saver = tf.train.Saver(my_vars, max_to_keep=100)
        c_proto = tf.ConfigProto()
        c_proto.gpu_options.allow_growth = True
        self.sess = tf.Session(config=c_proto)
        self.merged = tf.summary.merge_all()
        self.train_writer = tf.summary.FileWriter(config.train_sum_dir,
                                                  self.sess.graph)
        self.sess.run(tf.global_variables_initializer())