Esempio n. 1
0
    def build_train_graph(self):
        (elmo_context_input, elmo_utterances_input, elmo_context_sentence_input) = \
            self.elmo.build_embeddings_op(self.context_ids_ph, self.utterances_ids_ph,
                                          self.context_sentence_ids_ph)

        # logits
        with tf.variable_scope("inference", reuse=False):
            self.logits = self._inference(
                elmo_context_input['weighted_op'], self.context_len_ph,
                elmo_utterances_input['weighted_op'], self.utterances_len_ph,
                elmo_context_sentence_input['weighted_op'],
                self.context_sentence_len_ph, self.tot_context_len_ph,
                self.speaker_ph)

        self.loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=self.logits, labels=self.target_ph, name="cross_entropy")
        # self.logits_max = tf.argmax(self.logits, axis=-1)
        self.loss_op = tf.reduce_mean(self.loss_op, name="cross_entropy_mean")
        self.train_op = tf.train.AdamOptimizer().minimize(
            self.loss_op, global_step=self.global_step)

        eval = tf.nn.in_top_k(self.logits, self.target_ph, 1)
        correct_count = tf.reduce_sum(tf.cast(eval, tf.int32))

        self.accuracy = tf.divide(correct_count, tf.shape(self.target_ph)[0])
        self.predictions = argsort(self.logits, axis=1, direction='DESCENDING')
        self.confidence = tf.nn.softmax(self.logits, axis=-1)
Esempio n. 2
0
    def build_train_graph_multi_gpu(self):
        gpu_num = len(self.hparams.gpu_num)

        context_ph = tf.split(self.context_ph, gpu_num, 0)
        context_len_ph = tf.split(self.context_len_ph, gpu_num, 0)
        utterances_ph = tf.split(self.utterances_ph, gpu_num, 0)
        utterances_len_ph = tf.split(self.utterances_len_ph, gpu_num, 0)
        target_ph = tf.split(self.target_ph, gpu_num, 0)

        context_sentence_ph = tf.split(self.context_sentence_ph, gpu_num, 0)
        context_sentence_len_ph = tf.split(self.context_sentence_len_ph,
                                           gpu_num, 0)
        tot_context_len_ph = tf.split(self.tot_context_len_ph, gpu_num, 0)
        speaker_ph = tf.split(self.speaker_ph, gpu_num, 0)

        optimizer = tf.train.AdamOptimizer(self.hparams.learning_rate)

        tower_grads = []
        tot_losses = []
        tot_logits = []
        tot_labels = []

        for i, gpu_id in enumerate(self.hparams.gpu_num):
            with tf.device('/gpu:%d' % gpu_id):
                with tf.variable_scope("inference", reuse=tf.AUTO_REUSE):
                    logits = self._inference(
                        context_ph[i], context_len_ph[i], utterances_ph[i],
                        utterances_len_ph[i], context_sentence_ph[i],
                        context_sentence_len_ph[i], tot_context_len_ph[i],
                        speaker_ph[i])

                    loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(
                        logits=logits,
                        labels=target_ph[i],
                        name="cross_entropy")
                    loss_op = tf.reduce_mean(loss_op,
                                             name="cross_entropy_mean")

                    tot_losses.append(loss_op)
                    tot_logits.append(logits)
                    tot_labels.append(target_ph[i])

                    grads = optimizer.compute_gradients(loss_op)

                    tower_grads.append(grads)
                    tf.get_variable_scope().reuse_variables()

        grads = average_gradients(tower_grads)
        self.loss_op = tf.divide(tf.add_n(tot_losses), gpu_num)
        self.logits = tf.concat(tot_logits, axis=0)
        tot_labels = tf.concat(tot_labels, axis=0)
        self.train_op = optimizer.apply_gradients(grads, self.global_step)

        eval = tf.nn.in_top_k(self.logits, tot_labels, 1)
        correct_count = tf.reduce_sum(tf.cast(eval, tf.int32))
        self.accuracy = tf.divide(correct_count, tf.shape(self.target_ph)[0])
        self.predictions = argsort(self.logits, axis=1, direction='DESCENDING')
Esempio n. 3
0
    def _classification_loss(self, pred_label, gt_label, num_matched_boxes):
        """Computes the classification loss.

    Computes the classification loss with hard negative mining.
    Args:
      pred_label: a flatten tensor that includes all predicted class. The shape
        is [batch_size, num_anchors, num_classes].
      gt_label: a tensor that represents the classification groundtruth targets.
        The shape is [batch_size, num_anchors, 1].
      num_matched_boxes: the number of anchors that are matched to a groundtruth
        targets. This is used as the loss normalizater.

    Returns:
      box_loss: a float32 representing total box regression loss.
    """
        cross_entropy = tf.losses.sparse_softmax_cross_entropy(
            gt_label, pred_label, reduction=tf.losses.Reduction.NONE)

        mask = tf.greater(tf.squeeze(gt_label), 0)
        float_mask = tf.cast(mask, tf.float32)

        # Hard example mining
        neg_masked_cross_entropy = cross_entropy * (1 - float_mask)
        relative_position = contrib_framework.argsort(
            contrib_framework.argsort(neg_masked_cross_entropy,
                                      direction='DESCENDING'))
        num_neg_boxes = tf.minimum(
            tf.to_int32(num_matched_boxes) * ssd_constants.NEGS_PER_POSITIVE,
            ssd_constants.NUM_SSD_BOXES)
        top_k_neg_mask = tf.cast(
            tf.less(
                relative_position,
                tf.tile(num_neg_boxes[:, tf.newaxis],
                        (1, ssd_constants.NUM_SSD_BOXES))), tf.float32)

        class_loss = tf.reduce_sum(tf.multiply(cross_entropy,
                                               float_mask + top_k_neg_mask),
                                   axis=1)

        return tf.reduce_mean(class_loss / num_matched_boxes)
Esempio n. 4
0
def non_max_suppression(detection_boxes,
                        detection_scores,
                        max_output_size=70,
                        iou_threshold=0.05,
                        scope=None):
    """"Non max suppression and abnormal filtering."""
    with tf.name_scope(scope, 'Non_max_suppression',
                       [max_output_size, iou_threshold]):
        selected_indices = tf.image.non_max_suppression(
            detection_boxes, detection_scores, max_output_size, iou_threshold)
        result_boxes = tf.gather(detection_boxes,
                                 selected_indices,
                                 name='result_boxes')
        result_scores = tf.gather(detection_scores,
                                  selected_indices,
                                  name='result_scores')

        abnormal_inter = target_assigner.iou(result_boxes, result_boxes)
        abnormal_inter = tf.where((abnormal_inter > 0) & (abnormal_inter < 1),
                                  tf.ones_like(abnormal_inter),
                                  tf.zeros_like(abnormal_inter),
                                  name='abnormal_inter')
        num_inter = tf.reduce_sum(abnormal_inter, 0)
        abnormal_inter_idx = tf.where(num_inter >= 2)
        abnormal_inter_idx = tf.reshape(abnormal_inter_idx, [-1])
        abnormal_inter_idx = tf.cast(abnormal_inter_idx,
                                     tf.int32,
                                     name='abnormal_inter_idx')

        abnormal_indices = argsort(result_boxes[:, :2], axis=0)
        abnormal_indices = tf.concat(
            [abnormal_indices[:2], abnormal_indices[-2:]],
            0,
            name='abnormal_indices')

        result_dict = {
            'result_boxes': result_boxes,
            'result_scores': result_scores,
            'abnormal_indices': abnormal_indices,
            'abnormal_inter_idx': abnormal_inter_idx,
            'abnormal_inter': abnormal_inter
        }
        return result_dict
Esempio n. 5
0
    def build_train_graph(self):

        # logits
        with tf.variable_scope("inference", reuse=False):
            self.logits = self._inference(self.context_ph, self.context_len_ph,
                                          self.utterances_ph, self.utterances_len_ph)

        self.loss_op = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits,
                                                                      labels=self.target_ph,
                                                                      name="cross_entropy")

        self.loss_op = tf.reduce_mean(self.loss_op, name="cross_entropy_mean")
        self.train_op = tf.train.AdamOptimizer().minimize(self.loss_op, global_step=self.global_step)

        eval = tf.nn.in_top_k(self.logits, self.target_ph, 1)
        correct_count = tf.reduce_sum(tf.cast(eval, tf.int32))

        self.accuracy = tf.divide(correct_count, tf.shape(self.target_ph)[0])
        self.predictions = argsort(self.logits, axis=1, direction='DESCENDING')
Esempio n. 6
0
def map_charades(y_true, y_pred):
    """
    Returns mAP
    """
    m_aps = []

    tf_one = tf.constant(1, dtype=tf.float32)

    n_classes = y_pred.shape[1]
    for oc_i in range(n_classes):
        pred_row = y_pred[:, oc_i]
        sorted_idxs = tf_framework.argsort(-pred_row)
        true_row = y_true[:, oc_i]
        true_row = tf.map_fn(lambda i: true_row[i],
                             sorted_idxs,
                             dtype=np.float32)
        tp_poolean = tf.equal(true_row, tf_one)
        tp = tf.cast(tp_poolean, dtype=np.float32)
        fp = K.reverse(tp, axes=0)
        n_pos = tf.reduce_sum(tp)
        f_pcs = tf.cumsum(fp)
        t_pcs = tf.cumsum(tp)
        s = f_pcs + t_pcs

        s = tf.cast(s, tf.float32)
        t_pcs = tf.cast(t_pcs, tf.float32)
        tp_float = tf.cast(tp_poolean, np.float32)

        prec = t_pcs / s
        avg_prec = prec * tp_float

        n_pos = tf.cast(n_pos, tf.float32)
        avg_prec = avg_prec / n_pos
        avg_prec = tf.expand_dims(avg_prec, axis=0)
        m_aps.append(avg_prec)

    m_aps = K.concatenate(m_aps, axis=0)
    mAP = K.mean(m_aps)
    return mAP
Esempio n. 7
0
    def evaluate(self, saved_file: str):

        context = tf.placeholder(tf.int32, shape=[None, None], name="context")
        context_len = tf.placeholder(tf.int32,
                                     shape=[None],
                                     name="context_len")
        utterances = tf.placeholder(tf.int32,
                                    shape=[None, None, None],
                                    name="utterances")
        utterances_len = tf.placeholder(tf.int32,
                                        shape=[None, None],
                                        name="utterances_len")
        target = tf.placeholder(tf.int32, shape=[None], name="target")

        # logits
        with tf.variable_scope("inference", reuse=False):
            logits = self._inference(context, context_len, utterances,
                                     utterances_len)

        predictions = argsort(logits, axis=1, direction='DESCENDING')

        sess = tf.Session()
        sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver()
        saver.restore(sess, saved_file)

        data = DataProcess(self.hparams.valid_path, "test", self.word2id)

        k_list = [1, 2, 5, 10, 50, 100]
        total_examples = 0
        total_correct = np.zeros([6], dtype=np.int32)

        while True:
            pad_batch_data = data.get_batch_data(self.hparams.batch_size)

            if pad_batch_data is None:
                break
            (pad_context, context_len_batch), (
                pad_utterances,
                utterances_len_batch), target_batch = pad_batch_data

            feed_dict = {
                context: pad_context,
                context_len: context_len_batch,
                utterances: pad_utterances,
                utterances_len: utterances_len_batch,
                target: target_batch
            }
            pred_val = sess.run([predictions], feed_dict=feed_dict)

            pred_val = np.asarray(pred_val).squeeze(0)
            num_correct, num_examples = evaluate_recall(
                pred_val, target_batch, k_list)

            total_examples += num_examples
            total_correct = np.add(total_correct, num_correct)

        recall_result = ""
        for i in range(len(k_list)):
            recall_result += "Recall@%s : " % k_list[i] + "%.2f%% | " % (
                (total_correct[i] / total_examples) * 100)
        self._logger.info(recall_result)