def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5):
    """
    Applies Non-max suppression (NMS) to set of boxes

    Arguments:
    scores -- tensor of shape (None,), output of yolo_filter_boxes()
    boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later)
    classes -- tensor of shape (None,), output of yolo_filter_boxes()
    max_boxes -- integer, maximum number of predicted boxes you'd like
    iou_threshold -- real value, "intersection over union" threshold used for NMS filtering

    Returns:
    scores -- tensor of shape (, None), predicted score for each box
    boxes -- tensor of shape (4, None), predicted box coordinates
    classes -- tensor of shape (, None), predicted class for each box

    """

    max_boxes_tensor = K.variable(max_boxes, dtype='int32')     # tensor to be used in tf.image.non_max_suppression()
    K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor

    # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep
    nms_indices = tf.image.non_max_suppression(boxes,scores,max_boxes_tensor,iou_threshold=iou_threshold)


    # Use K.gather() to select only nms_indices from scores, boxes and classes
    scores = K.gather(scores,nms_indices)
    boxes = K.gather(boxes,nms_indices)
    classes = K.gather(classes,nms_indices)


    return scores, boxes, classes
Exemple #2
0
    def call(self):
        E = K.variable(np.random.random((1000,100)), name="entity_embeddings")
        R = K.variable(np.random.random((10,10000)), name="relation_embeddings")
        x = K.placeholder(shape=(1,3), name="spo")
        y = K.placeholder(ndim=0, name="y")
        batch_placeholder = K.cast(x, 'int32')[0]
        # print(batch_placeholder.eval())
        s, o, p = [batch_placeholder[i] for i in range(3)]

        s2v = K.gather(E, s)
        o2v = K.gather(E, o)
        r2v = K.gather(R, p)

        def ccorr(a, b):
            return T.outer(a,b).flatten()
            # return T.arctan(s2v) + T.arctan(o2v)
            # return (s2v.dimshuffle('x', 'x', 0, 'x') + o2v.dimshuffle('x', 'x', 0, 'x')).flatten()
            # return T.nnet.conv2d(a.dimshuffle('x', 'x', 0, 'x'), b.dimshuffle('x', 'x', 0, 'x'), None,
            #                None,
            #                filter_flip=True, border_mode='half')
            # return self.ccorr1d_sc(a, b, border_mode='half')
        eta = K.dot(r2v, ccorr(s2v, o2v))
        # py = 1/(1+K.exp(-eta))
        # l = -K.log(py)
        # from theano import pp, function, printing
        # grad = T.grad(eta, E)
        # print(pp(grad))
        # func = function([x], grad)
        func = K.function([x, y], K.gradients(eta, [s2v, o2v, r2v, E, R]))

        # for i in func.maker.fgraph.outputs:
            # print(pp(i))
        # print (T.grad(py, s2v))
        print (func([[[1,2,3]], -1]))
Exemple #3
0
def yolo_eval(yolo_outputs,
              image_shape,
              max_boxes=10,
              score_threshold=.6,
              iou_threshold=.5):
    """Evaluate YOLO model on given input batch and return filtered boxes."""
    box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs
    boxes = yolo_boxes_to_corners(box_xy, box_wh)
    boxes, scores, classes = yolo_filter_boxes(
        boxes, box_confidence, box_class_probs, threshold=score_threshold)

    # Scale boxes back to original image shape.
    height = image_shape[0]
    width = image_shape[1]
    image_dims = K.stack([height, width, height, width])
    image_dims = K.reshape(image_dims, [1, 4])
    boxes = boxes * image_dims

    # TODO: Something must be done about this ugly hack!
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')
    K.get_session().run(tf.variables_initializer([max_boxes_tensor]))
    nms_index = tf.image.non_max_suppression(
        boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold)
    boxes = K.gather(boxes, nms_index)
    scores = K.gather(scores, nms_index)
    classes = K.gather(classes, nms_index)
    return boxes, scores, classes
Exemple #4
0
    def call(self, x, mask=None):
        if isinstance(x, list): 
            x,_ = x
        if mask is not None and isinstance(mask, list):
            mask,_ = mask
        if 0. < self.dropout < 1.:
            retain_p = 1. - self.dropout
            dims = self.W._keras_shape[:-1]
            B = K.random_binomial(dims, p=retain_p) * (1. / retain_p)
            B = K.expand_dims(B)
            W = K.in_train_phase(self.W * B, self.W)
        else:
            W = self.W
        
        if self.mode == 'matrix':
            return K.gather(W,x)
        elif self.mode == 'tensor':
            # quick and dirty: only allowing for 3dim inputs when it's tensor mode
            assert K.ndim(x) == 3
            # put sequence on first; gather; take diagonal across shared batch dimension
            # in other words, W is (B, S, F)
            # incoming x is (B, S, A)
            inds = K.arange(self.W._keras_shape[0])
            #out = K.gather(K.permute_dimensions(W, (1,0,2)), x).diagonal(axis1=0, axis2=3)
            #return K.permute_dimensions(out, (3,0,1,2))
            ### method above doesn't do grads =.=
            # tensor abc goes to bac, indexed onto with xyz, goes to xyzac, 
            # x == a, so shape to xayzc == xxyzc
            # take diagonal on first two: xyzc 
            #out = K.colgather()
            out = K.gather(K.permute_dimensions(W, (1,0,2)), x) 
            out = K.permute_dimensions(out, (0,3,1,2,4))
            out = K.gather(out, (inds, inds))
            return out
        else:
            raise Exception('sanity check. should not be here.')

        #all_dims = T.arange(len(self.W._keras_shape))
        #first_shuffle = [all_dims[self.embed_dim]] + all_dims[:self.embed_dim] + all_dims[self.embed_dim+1:]
        ## 1. take diagonal from 0th to
        ## chang eof tactics
        ## embed on time or embed on batch. that's all I'm supporting.  
        ## if it's embed on time, then, x.ndim+1 is where batch will be, and is what
        ## i need to take the diagonal over. 
        ## now dim shuffle the xdims + 1 to the front.
        #todo: get second shuffle or maybe find diagonal calculations
        #out = K.gather(W, x)
        #return out

        ### reference
        #A = S(np.arange(60).reshape(3,4,5))
        #x = S(np.random.randint(0, 4, (3,4,10)))
        #x_emb = A.dimshuffle(1,0,2)[x].dimshuffle(0,3,1,2,4)[T.arange(A.shape[0]), T.arange(A.shape[0])]
Exemple #5
0
    def test_gather(self):
        shape = (10, 2, 3)
        ref = np.arange(np.prod(shape)).reshape(shape)
        ref_th = KTH.variable(ref)
        ref_tf = KTF.variable(ref)

        inds = [1, 3, 7, 9]
        inds_th = KTH.variable(inds, dtype='int32')
        inds_tf = KTF.variable(inds, dtype='int32')
        th_z = KTH.gather(ref_th, inds_th)
        th_result = KTH.eval(th_z)
        tf_result = KTF.eval(KTF.gather(ref_tf, inds_tf))

        assert_allclose(tf_result, th_result, atol=1e-05)

        if hasattr(th_z, '_keras_shape'):
            assert th_z._keras_shape == th_result.shape

        # test theano shape inference when
        # input shape has None entries
        if K.backend() == 'theano':
            x = K.placeholder(shape=(None, 3, 4))
            indices = K.placeholder(shape=(5, 6), dtype='int32')
            y = K.gather(x, indices)
            assert y._keras_shape == (5, 6, 3, 4)
Exemple #6
0
    def call(self, x, mask=None):
        batch_placeholder = K.cast(x, 'int32')[0]
        s, o, p = [batch_placeholder[i] for i in range(3)]

        s2v = K.gather(self.E, s)
        o2v = K.gather(self.E, o)
        r2v = K.gather(self.R, p)

        # print(K.shape(s2v).eval())
        # print(self.E[[0]].shape.eval())

        def ccorr(a, b):
            return self.ccorr1d_sc(a, b, border_mode='half')

        eta = K.dot(K.transpose(r2v), ccorr(s2v, o2v))
        return eta
def eval(outputs, anchors, num_classes, image_shape,
         max_boxes=20, score_threshold=.6, iou_threshold=.5):
    '''Evaluate the YOLO model on given input and return filtered boxes'''

    num_layers = len(outputs)
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [
        [3, 4, 5], [1, 2, 3]]
    input_shape = K.shape(outputs[0])[1:3] * 32
    boxes = []
    box_scores = []

    for l in range(num_layers):
        _boxes, _box_scores = boxes_and_scores(outputs[l],
                                               anchors[anchor_mask[l]],
                                               num_classes, input_shape,
                                               image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)

    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []

    for c in range(num_classes):
        # TODO: use Keras backend instead of tf.
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(
            class_boxes, class_box_scores, max_boxes_tensor,
            iou_threshold=iou_threshold)
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)

    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_
Exemple #8
0
    def call(self, x, mask=None):
        batch_placeholder = K.cast(x, 'int32')[0]
        s, o, p = [batch_placeholder[i] for i in range(3)]

        s2v = K.gather(self.E, s)
        o2v = K.gather(self.E, o)
        r2v = K.gather(self.R, p)

        def ccorr(a, b):
            return T.outer(a,b).flatten()
            # return self.ccorr1d_sc(a, b, border_mode='half')
        eta = K.dot(r2v, ccorr(s2v, o2v))

        # func = K.function([s2v,o2v,r2v], K.gradients(K.sigmoid(eta), [s2v,o2v,r2v]))
        # print(func([np.random.random(150),np.random.random(150),np.random.random(150)]))

        return eta
def construct_perturbed_input(perturb_mapping, onehot_vectors):
    """

    :param perturb_mapping:
    :param onehot_vectors:
    :return:
    """

    return K.gather(perturb_mapping, onehot_vectors)
 def call(self, x, mask=None):
     if 0. < self.dropout < 1.:
         retain_p = 1. - self.dropout
         B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p)
         B = K.expand_dims(B)
         W = K.in_train_phase(self.W * B, self.W)
     else:
         W = self.W
     out = K.gather(W, x)
     return out
 def call(self, x, mask=None):
     if 0. < self.dropout < 1.:
         retain_p = 1. - self.dropout
         B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p)
         B = K.expand_dims(B)
         W = K.in_train_phase(self.W * B, self.W)
     else:
         W = self.W
     W_ = T.concatenate([self.zeros_vector, W], axis=0)
     out = K.gather(W_, x)
     return out
Exemple #12
0
 def lookup(self, x, W, memory_length):
     # shape: (batch*memory_length, input_length)
     x = K.cast(K.reshape(x, (-1, self.input_length)), 'int32')
     mask = K.expand_dims(K.not_equal(x, 0.), dim=-1)
     # shape: (batch*memory_length, input_length, output_dim)
     X = K.gather(W, x)
     if self.bow_mode == "bow":
         # shape: (batch*memory_length, output_dim)
         X = K.sum(X + K.expand_dims(self.Te, 0), axis=1)
     # shape: (batch, memory_length, output_dim)
     X = K.reshape(X, (-1, memory_length, self.output_dim))
     return X, mask
Exemple #13
0
    def call(self, x, mask=None):
        batch_placeholder = K.cast(x, 'int32')[0]
        s, o, p = [batch_placeholder[i] for i in range(3)]

        s2v = K.gather(self.E, s)
        o2v = K.gather(self.E, o)
        r2v = K.gather(self.R, p)

        def ccorr(a, b):
            # Return tensor product - basically bilinear/RESCAL models
            return T.outer(a,b).flatten()

            # Or cross-correlation op?
            # return T.nnet.conv2d(a.dimshuffle('x', 'x', 0, 'x'), b.dimshuffle('x', 'x', 0, 'x'), None,
            #                None,
            #                filter_flip=True, border_mode='half').flatten()[:-1]
            # return self.ccorr1d_sc(a, b, border_mode='half')
        eta = K.dot(r2v, ccorr(s2v, o2v))

        # func = K.function([s2v,o2v,r2v], K.gradients(K.sigmoid(eta), [s2v,o2v,r2v]))
        # print(func([np.random.random(150),np.random.random(150),np.random.random(150)]))

        return eta
Exemple #14
0
 def call(self, x, mask=None):
     if K.dtype(x) != 'int32':
         x = K.cast(x, 'int32')
     if 0. < self.dropout < 1.:
         retain_p = 1. - self.dropout
         B = K.random_binomial((self.input_dim,), p=retain_p) * (1. / retain_p)
         B = K.expand_dims(B)
         W = K.in_train_phase(self.W * B, self.W)
     else:
         W = self.W
     denorm = K.sum(W, axis=0)
     W = W / denorm
     out = K.gather(W, x)
     return out
def yolo_non_max_suppression(scores, boxes, classes, max_boxes = 10, iou_threshold = 0.5):
    """
    Applies Non-max suppression (NMS) to set of boxes
    
    Arguments:
    scores -- tensor of shape (None,), output of yolo_filter_boxes()
    boxes -- tensor of shape (None, 4), output of yolo_filter_boxes() that have been scaled to the image size (see later)
    classes -- tensor of shape (None,), output of yolo_filter_boxes()
    max_boxes -- integer, maximum number of predicted boxes you'd like
    iou_threshold -- real value, "intersection over union" threshold used for NMS filtering
    
    Returns:
    scores -- tensor of shape (, None), predicted score for each box
    boxes -- tensor of shape (4, None), predicted box coordinates
    classes -- tensor of shape (, None), predicted class for each box
    
    Note: The "None" dimension of the output tensors has obviously to be less than max_boxes. Note also that this
    function will transpose the shapes of scores, boxes, classes. This is made for convenience.
    """
    
    max_boxes_tensor = K.variable(max_boxes, dtype='int32')     # tensor to be used in tf.image.non_max_suppression()
    K.get_session().run(tf.variables_initializer([max_boxes_tensor])) # initialize variable max_boxes_tensor
    
    # Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep
    ### START CODE HERE ### (≈ 1 line)
    nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold)
    ### END CODE HERE ###
    
     
    # Use K.gather() to select only nms_indices from scores, boxes and classes
    ### START CODE HERE ### (≈ 3 lines)
    scores = K.gather(scores, nms_indices)
    boxes = K.gather(boxes, nms_indices)
    classes = K.gather(classes, nms_indices)
    ### END CODE HERE ###
    
    return scores, boxes, classes
Exemple #16
0
 def call(self, inputs, mask=None):
     if not isinstance(inputs, list) or len(inputs) <= 1:
         raise TypeError('SelectSpkMemory must be called on a list of tensors '
                         '(at least 2). Got: ' + str(inputs))
     # (None(batch), 1), speaker identity
     target_spk_l = inputs[0]
     target_spk_l = K.reshape(target_spk_l, (target_spk_l.shape[0], ))
     if K.dtype(target_spk_l) != 'int32':
         target_spk_l = K.cast(target_spk_l, 'int32')
     # (None(batch), spk_size, embed_dim), life-long memory
     life_long_mem = inputs[1]
     # Extract the acoustic feature from memory
     spk_memory = K.gather(life_long_mem, target_spk_l)
     # (None(batch), embed_dim)
     return spk_memory
Exemple #17
0
    def get_output(self, train=False):
        X = self.get_input(train)
        retain_p = 1. - self.dropout
        if train and self.dropout > 0:
            B = K.random_binomial((self.input_dim,), p=retain_p)
        else:
            B = K.ones((self.input_dim)) * retain_p
        # we zero-out rows of W at random
        Xs = K.cast(K.reshape(X, (-1, self.nb_words)), 'int32')

        # (samples*input_length, nb_words, dim)
        out = K.gather(self.W * K.expand_dims(B), Xs)
        out = K.reshape(out, (-1, self.input_length, self.nb_words,
                              self.output_dim))
        # (samples, input_length, nb_words, dim)
        out = out * K.expand_dims(K.not_equal(X, 0), dim=-1)
        if self.bow_mode == "bow":
            out = K.sum(out, axis=2)
        return out
Exemple #18
0
 def call(self, inputs):
     #return x[self.dtw_y]
     x, dtw_y = inputs
     y = K.gather(x, dtw_y)
     return y
Exemple #19
0
    def _interpolate(self, image, sampled_grids, output_size):

        batch_size = K.shape(image)[0]
        height = K.shape(image)[1]
        width = K.shape(image)[2]
        num_channels = K.shape(image)[3]

        x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32')
        y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32')

        x = .5 * (x + 1.0) * K.cast(width, dtype='float32')
        y = .5 * (y + 1.0) * K.cast(height, dtype='float32')

        x0 = K.cast(x, 'int32')
        x1 = x0 + 1
        y0 = K.cast(y, 'int32')
        y1 = y0 + 1

        max_x = int(K.int_shape(image)[2] - 1)
        max_y = int(K.int_shape(image)[1] - 1)

        x0 = K.clip(x0, 0, max_x)
        x1 = K.clip(x1, 0, max_x)
        y0 = K.clip(y0, 0, max_y)
        y1 = K.clip(y1, 0, max_y)

        pixels_batch = K.arange(0, batch_size) * (height * width)
        pixels_batch = K.expand_dims(pixels_batch, axis=-1)
        flat_output_size = output_size[0] * output_size[1]
        base = K.repeat_elements(pixels_batch, flat_output_size, axis=1)
        base = K.flatten(base)

        # base_y0 = base + (y0 * width)
        base_y0 = y0 * width
        base_y0 = base + base_y0
        # base_y1 = base + (y1 * width)
        base_y1 = y1 * width
        base_y1 = base_y1 + base

        indices_a = base_y0 + x0
        indices_b = base_y1 + x0
        indices_c = base_y0 + x1
        indices_d = base_y1 + x1

        flat_image = K.reshape(image, shape=(-1, num_channels))
        flat_image = K.cast(flat_image, dtype='float32')
        pixel_values_a = K.gather(flat_image, indices_a)
        pixel_values_b = K.gather(flat_image, indices_b)
        pixel_values_c = K.gather(flat_image, indices_c)
        pixel_values_d = K.gather(flat_image, indices_d)

        x0 = K.cast(x0, 'float32')
        x1 = K.cast(x1, 'float32')
        y0 = K.cast(y0, 'float32')
        y1 = K.cast(y1, 'float32')

        area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1)
        area_b = K.expand_dims(((x1 - x) * (y - y0)), 1)
        area_c = K.expand_dims(((x - x0) * (y1 - y)), 1)
        area_d = K.expand_dims(((x - x0) * (y - y0)), 1)

        values_a = area_a * pixel_values_a
        values_b = area_b * pixel_values_b
        values_c = area_c * pixel_values_c
        values_d = area_d * pixel_values_d
        return values_a + values_b + values_c + values_d
Exemple #20
0
 def get_output(self, train=False):
     X = self.get_input(train)
     out = K.gather(self.W, X)
     return out
Exemple #21
0
def DecodeBox(outputs,
            anchors,
            num_classes,
            image_shape,
            input_shape,
            #-----------------------------------------------------------#
            #   13x13的特征层对应的anchor是[81,82],[135,169],[344,319]
            #   26x26的特征层对应的anchor是[10,14],[23,27],[37,58]
            #-----------------------------------------------------------#
            anchor_mask     = [[6, 7, 8], [3, 4, 5], [0, 1, 2]],
            max_boxes       = 100,
            confidence      = 0.5,
            nms_iou         = 0.3,
            letterbox_image = True):

    box_xy = []
    box_wh = []
    box_confidence  = []
    box_class_probs = []
    for i in range(len(outputs)):
        sub_box_xy, sub_box_wh, sub_box_confidence, sub_box_class_probs = \
            get_anchors_and_decode(outputs[i], anchors[anchor_mask[i]], num_classes, input_shape)
        box_xy.append(K.reshape(sub_box_xy, [-1, 2]))
        box_wh.append(K.reshape(sub_box_wh, [-1, 2]))
        box_confidence.append(K.reshape(sub_box_confidence, [-1, 1]))
        box_class_probs.append(K.reshape(sub_box_class_probs, [-1, num_classes]))
    box_xy          = K.concatenate(box_xy, axis = 0)
    box_wh          = K.concatenate(box_wh, axis = 0)
    box_confidence  = K.concatenate(box_confidence, axis = 0)
    box_class_probs = K.concatenate(box_class_probs, axis = 0)

    #------------------------------------------------------------------------------------------------------------#
    #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条,因此生成的box_xy, box_wh是相对于有灰条的图像的
    #   我们需要对其进行修改,去除灰条的部分。 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax
    #   如果没有使用letterbox_image也需要将归一化后的box_xy, box_wh调整成相对于原图大小的
    #------------------------------------------------------------------------------------------------------------#
    boxes       = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape, letterbox_image)
    box_scores  = box_confidence * box_class_probs

    #-----------------------------------------------------------#
    #   判断得分是否大于score_threshold
    #-----------------------------------------------------------#
    mask             = box_scores >= confidence
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_out   = []
    scores_out  = []
    classes_out = []
    for c in range(num_classes):
        #-----------------------------------------------------------#
        #   取出所有box_scores >= score_threshold的框,和成绩
        #-----------------------------------------------------------#
        class_boxes      = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])

        #-----------------------------------------------------------#
        #   非极大抑制
        #   保留一定区域内得分最大的框
        #-----------------------------------------------------------#
        nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=nms_iou)

        #-----------------------------------------------------------#
        #   获取非极大抑制后的结果
        #   下列三个分别是:框的位置,得分与种类
        #-----------------------------------------------------------#
        class_boxes         = K.gather(class_boxes, nms_index)
        class_box_scores    = K.gather(class_box_scores, nms_index)
        classes             = K.ones_like(class_box_scores, 'int32') * c

        boxes_out.append(class_boxes)
        scores_out.append(class_box_scores)
        classes_out.append(classes)
    boxes_out      = K.concatenate(boxes_out, axis=0)
    scores_out     = K.concatenate(scores_out, axis=0)
    classes_out    = K.concatenate(classes_out, axis=0)

    return boxes_out, scores_out, classes_out
 def call(self, x, mask=None):
     x = K.maximum(K.minimum(x, self.model_dims[1] - 1), 0)
     return K.gather(self.W, x)
Exemple #23
0
def batch_gather(reference, indices):
    ref_shape = K.shape(reference)
    batch_size = ref_shape[0]
    n_classes = ref_shape[1]
    flat_indices = K.arange(0, batch_size) * n_classes + K.flatten(indices)
    return K.gather(K.flatten(reference), flat_indices)
    def load_generator_network(batch_size,
                               sequence_class,
                               n_classes=1,
                               seq_length=205,
                               supply_inputs=False,
                               gan_func=gan_func):

        sequence_class_onehots = np.eye(n_classes)

        #Generator network parameters
        latent_size = 100
        out_seed_size = 100

        #Generator inputs
        latent_input_1 = Input(tensor=K.ones((batch_size, latent_size)),
                               name='noise_input_1')
        latent_input_2 = Input(tensor=K.ones((batch_size, latent_size)),
                               name='noise_input_2')
        latent_input_1_out = Lambda(lambda inp: inp * K.random_uniform(
            (batch_size, latent_size), minval=-1.0, maxval=1.0),
                                    name='lambda_rand_input_1')(latent_input_1)
        latent_input_2_out = Lambda(lambda inp: inp * K.random_uniform(
            (batch_size, latent_size), minval=-1.0, maxval=1.0),
                                    name='lambda_rand_input_2')(latent_input_2)

        class_embedding = Lambda(
            lambda x: K.gather(K.constant(sequence_class_onehots),
                               K.cast(x[:, 0], dtype='int32')))(sequence_class)

        seed_input_1 = Concatenate(axis=-1)(
            [latent_input_1_out, class_embedding])
        seed_input_2 = Concatenate(axis=-1)(
            [latent_input_2_out, class_embedding])

        #Policy network definition
        policy_dense_0 = Dense(128,
                               activation='linear',
                               kernel_initializer='glorot_uniform',
                               name='policy_dense_0')
        batch_norm_0 = BatchNormalization(name='policy_batch_norm_0')
        relu_0 = Lambda(lambda x: K.relu(x))

        policy_dense_1 = Dense(128,
                               activation='linear',
                               kernel_initializer='glorot_uniform',
                               name='policy_dense_1')
        batch_norm_1 = BatchNormalization(name='policy_batch_norm_1')
        relu_1 = Lambda(lambda x: K.relu(x))

        policy_dense_2 = Dense(out_seed_size,
                               activation='linear',
                               kernel_initializer='glorot_uniform',
                               name='policy_dense_2')

        seed_out_1 = policy_dense_2(
            relu_1(
                batch_norm_1(
                    policy_dense_1(
                        relu_0(batch_norm_0(policy_dense_0(seed_input_1)))))))
        seed_out_2 = policy_dense_2(
            relu_1(
                batch_norm_1(
                    policy_dense_1(
                        relu_0(batch_norm_0(policy_dense_0(seed_input_2)))))))

        policy_out_1 = gan_func(seed_out_1)
        policy_out_2 = gan_func(seed_out_2)

        return [latent_input_1,
                latent_input_2], [policy_out_1,
                                  policy_out_2], [seed_out_1, seed_out_2]
Exemple #25
0
    style_losses = get_style_losses(outputs_dict, style_targets_dict, args.style_layers,
                                    norm_by_channels=args.norm_by_channels)

    content_losses = get_content_losses(outputs_dict, content_targets_dict, args.content_layers)

    # Use total variation to improve local coherence
    total_var_loss = tv_loss(pastiche_net.output)


    weighted_style_losses = []
    weighted_content_losses = []

    # Compute total loss
    total_loss = K.variable(0.)
    for loss in style_losses:
        weighted_loss = K.mean(K.gather(style_weights, class_targets) * loss)
        weighted_style_losses.append(weighted_loss)
        total_loss += weighted_loss

    for loss in content_losses:
        weighted_loss = K.mean(K.gather(content_weights, class_targets) * loss)
        weighted_content_losses.append(weighted_loss)
        total_loss += weighted_loss

    weighted_tv_loss = K.mean(K.gather(tv_weights, class_targets) * total_var_loss)
    total_loss += weighted_tv_loss


    ## Make training function

    # Get a list of inputs
def yolo_eval(yolo_outputs,
              anchors,
              num_classes,
              image_shape,
              max_boxes=20,
              score_threshold=.6,
              iou_threshold=.5):
    """Evaluate YOLO model on given input and return filtered boxes."""
    """
    num_layers,输出特征图的层数,3层;
    anchor_mask,将anchors划分为3个层,第1层13x13是678,第2层26x26是345,第3层52x52是012;
    input_shape:输入图像的尺寸,也就是第0个特征图的尺寸乘以32,即13x32=416,这与Darknet的网络结构有关。
    特征图越大,13->52,检测的物体越小,需要的anchors越小,所以anchors列表以倒序赋值。
    """
    num_layers = len(yolo_outputs)
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[
        3, 4, 5
    ], [1, 2, 3]]  # default setting
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    """
    接着,在YOLO的第l层输出yolo_outputs中,调用yolo_boxes_and_scores(),
    提取框_boxes和置信度_box_scores,将3个层的框数据放入列表boxes和box_scores,
    再拼接concatenate展平,输出的数据就是所有的框和置信度。
    其中,输出的boxes和box_scores的格式,如下:
    boxes: (?, 4)  # ?是框数
    box_scores: (?, 80)
    """
    boxes = []
    box_scores = []
    for l in range(num_layers):

        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
                                                    anchors[anchor_mask[l]],
                                                    num_classes, input_shape,
                                                    image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)

    # concatenate的作用是:将多个层的数据展平,因为框已经还原为真实坐标,不同尺度没有差异
    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
        # TODO: use keras backend instead of tf.
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(class_boxes,
                                                 class_box_scores,
                                                 max_boxes_tensor,
                                                 iou_threshold=iou_threshold)
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_
    def call(self, inputs, **kwargs):
        """
        Creates the layer as a Keras graph

        Notes:
            This does not add self loops to the adjacency matrix.
            The output indices are only used when `final_layer=True`

        Args:
            inputs (list): list of inputs with 4 items:
            node features (size b x N x F),
            output indices (size b x M),
            sparse graph adjacency matrix (size N x N),
            where N is the number of nodes in the graph,
                  F is the dimensionality of node features
                  M is the number of output nodes
        """
        X = inputs[0]  # Node features (1 x N x F)
        out_indices = inputs[1]  # output indices (1 x K)
        A_sparse = inputs[2]  # Adjacency matrix (1 x N x N)

        if not isinstance(A_sparse, K.tf.SparseTensor):
            raise TypeError("A is not sparse")

        # Get undirected graph edges (E x 2)
        A_indices = A_sparse.indices

        batch_dim, n_nodes, _ = K.int_shape(X)
        if batch_dim != 1:
            raise ValueError(
                "Currently full-batch methods only support a batch dimension of one"
            )
        else:
            # Remove singleton batch dimension
            out_indices = K.squeeze(out_indices, 0)
            X = K.squeeze(X, 0)

        outputs = []
        for head in range(self.attn_heads):
            kernel = self.kernels[head]  # W in the paper (F x F')
            attention_kernel = self.attn_kernels[
                head]  # Attention kernel a in the paper (2F' x 1)

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (N x F')

            # Compute feature combinations
            # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_j]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            attn_for_self = K.dot(
                features, attention_kernel[0])  # (N x 1), [a_1]^T [Wh_i]
            attn_for_neighs = K.dot(
                features, attention_kernel[1])  # (N x 1), [a_2]^T [Wh_j]

            # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]
            dense = attn_for_self + K.transpose(
                attn_for_neighs)  # (N x N) via broadcasting

            # Create sparse attention vector (All non-zero values of the matrix)
            sparse_attn_self = K.tf.gather(K.reshape(attn_for_self, [-1]),
                                           A_indices[:, 0],
                                           axis=0)
            sparse_attn_neighs = K.tf.gather(K.reshape(attn_for_neighs, [-1]),
                                             A_indices[:, 1],
                                             axis=0)
            attn_values = sparse_attn_self + sparse_attn_neighs

            # Add nonlinearity
            attn_values = LeakyReLU(alpha=0.2)(attn_values)

            # Apply dropout to features and attention coefficients
            dropout_feat = Dropout(self.in_dropout_rate)(features)  # (N x F')
            dropout_attn = Dropout(self.attn_dropout_rate)(
                attn_values)  # (N x N)

            # Convert to sparse matrix
            sparse_attn = K.tf.sparse.SparseTensor(
                A_indices, values=dropout_attn, dense_shape=[n_nodes, n_nodes])

            # Apply softmax to get attention coefficients
            sparse_attn = K.tf.sparse.softmax(
                sparse_attn)  # (N x N), Eq. 3 of the paper

            # Linear combination with neighbors' features [YT: see Eq. 4]
            node_features = K.tf.sparse.matmul(sparse_attn,
                                               dropout_feat)  # (N x F')

            if self.use_bias:
                node_features = K.bias_add(node_features, self.biases[head])

            # Add output of attention head to final output
            outputs.append(node_features)

        # Aggregate the heads' output according to the reduction method
        if self.attn_heads_reduction == "concat":
            output = K.concatenate(outputs)  # (N x KF')
        else:
            output = K.mean(K.stack(outputs), axis=0)  # N x F')

        output = self.activation(output)

        # On the final layer we gather the nodes referenced by the indices
        if self.final_layer:
            output = K.gather(output, out_indices)

        # Add batch dimension back if we removed it
        if batch_dim == 1:
            output = K.expand_dims(output, 0)
        return output
    def call(self, inputs):
        """
        Creates the layer as a Keras graph.

        Note that the inputs are tensors with a batch dimension of 1:
        Keras requires this batch dimension, and for full-batch methods
        we only have a single "batch".

        There are three inputs required, the node features, the output
        indices (the nodes that are to be selected in the final layer)
        and the graph adjacency matrix

        Notes:
            This does not add self loops to the adjacency matrix.
            The output indices are only used when ``final_layer=True``

        Args:
            inputs (list): list of inputs with 3 items:
            node features (size 1 x N x F),
            output indices (size 1 x M),
            graph adjacency matrix (size N x N),
            where N is the number of nodes in the graph,
                  F is the dimensionality of node features
                  M is the number of output nodes
        """
        X = inputs[0]  # Node features (1 x N x F)
        out_indices = inputs[1]  # output indices (1 x K)
        A = inputs[2]  # Adjacency matrix (N x N)

        batch_dim, n_nodes, _ = K.int_shape(X)
        if batch_dim != 1:
            raise ValueError(
                "Currently full-batch methods only support a batch dimension of one"
            )

        else:
            # Remove singleton batch dimension
            X = K.squeeze(X, 0)
            out_indices = K.squeeze(out_indices, 0)

        outputs = []
        for head in range(self.attn_heads):
            kernel = self.kernels[head]  # W in the paper (F x F')
            attention_kernel = self.attn_kernels[
                head]  # Attention kernel a in the paper (2F' x 1)

            # Compute inputs to attention network
            features = K.dot(X, kernel)  # (N x F')

            # Compute feature combinations
            # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j]
            attn_for_self = K.dot(
                features, attention_kernel[0])  # (N x 1), [a_1]^T [Wh_i]
            attn_for_neighs = K.dot(
                features, attention_kernel[1])  # (N x 1), [a_2]^T [Wh_j]

            # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]]
            dense = attn_for_self + K.transpose(
                attn_for_neighs)  # (N x N) via broadcasting

            # Add nonlinearity
            dense = LeakyReLU(alpha=0.2)(dense)

            # Mask values before activation (Vaswani et al., 2017)
            # YT: this only works for 'binary' A, not for 'weighted' A!
            # YT: if A does not have self-loops, the node itself will be masked, so A should have self-loops
            # YT: this is ensured by setting the diagonal elements of A tensor to 1 above
            mask = -10e9 * (1.0 - A)
            dense += mask

            # Apply softmax to get attention coefficients
            dense = K.softmax(dense, axis=1)  # (N x N), Eq. 3 of the paper

            # Apply dropout to features and attention coefficients
            dropout_feat = Dropout(self.in_dropout_rate)(features)  # (N x F')
            dropout_attn = Dropout(self.attn_dropout_rate)(dense)  # (N x N)

            # Linear combination with neighbors' features [YT: see Eq. 4]
            node_features = K.dot(dropout_attn, dropout_feat)  # (N x F')

            if self.use_bias:
                node_features = K.bias_add(node_features, self.biases[head])

            # Add output of attention head to final output
            outputs.append(node_features)

        # Aggregate the heads' output according to the reduction method
        if self.attn_heads_reduction == "concat":
            output = K.concatenate(outputs)  # (N x KF')
        else:
            output = K.mean(K.stack(outputs), axis=0)  # N x F')

        # Nonlinear activation function
        output = self.activation(output)

        # On the final layer we gather the nodes referenced by the indices
        if self.final_layer:
            output = K.gather(output, out_indices)

        # Add batch dimension back if we removed it
        if batch_dim == 1:
            output = K.expand_dims(output, 0)

        return output
Exemple #29
0
def d_acc(x):
    ''' Calculate detection metrics for a single sample

   Parameters:
   x: a tuple for (y_true, y_pred) where y_pred is post-eval output from model

   '''

    max_boxes = 20  # TODO: this should be some sort of global constant

    y_true = x[0]
    y_pred = x[1]

    # convert y_true to list of boxes and classes

    pred_boxes, pred_scores, pred_classes = eval(y_pred,
                                                 image_shape,
                                                 max_boxes=max_boxes)

    true_box, true_mask, true_class = true_boxes_true_masks_true_classes(
        y_true)
    true_mask = K.cast(true_mask, dtype='bool')

    true_box = K.squeeze(true_box,
                         axis=2)  # Note: for batch processing, axis=3
    true_box = tf.boolean_mask(true_box, true_mask)
    true_class = tf.boolean_mask(true_class, true_mask)

    height, width = image_shape

    image_dims = K.stack([height, width, height, width])
    image_dims = K.cast(K.reshape(image_dims, (1, 4)), K.floatx())

    true_box = true_box * image_dims

    # need to compare the list of box and class predictions between ground truth and prediction:
    # pred_boxes, pred_classes, true_box, true_class

    iou_matrix = iou(pred_boxes[:, tf.newaxis, :], true_box[tf.newaxis, :, :])

    # case of >2 predictions targeting 1 true box, we keep the one with highest IOU
    iou_matrix = iou_matrix * K.cast(
        iou_matrix - K.max(iou_matrix, axis=0, keepdims=True) >= 0.0,
        dtype=K.floatx())

    # case of >2 true boxes with 1 prediction
    iou_matrix = iou_matrix * K.cast(
        iou_matrix - K.max(iou_matrix, axis=1, keepdims=True) >= 0.0,
        dtype=K.floatx())

    #matched_prediction_idx, matched_truth_idx = K.squeeze(np.nonzero(K.maximum(iou_matrix - 0.5, 0)))

    iou_matrix = K.maximum(iou_matrix - 0.5, 0)
    zero = K.constant(0, dtype=K.floatx())  # tf way of doing np.nonzero(...)
    where = K.not_equal(iou_matrix, zero)
    where = tf.where(where)

    matched_prediction_idx = where[..., 0]
    matched_truth_idx = where[..., 1]

    # calculate precision, recall and f1

    # precision = # true positives / # prediction made (What proportion of positive identifications was actually correct?)

    tot_num_predictions = K.cast(K.shape(pred_boxes)[0], K.floatx())
    tot_num_ground_truths = K.cast(K.shape(true_box)[0], K.floatx())

    num_true_positives = K.sum(
        K.cast(
            K.equal(K.gather(pred_classes, matched_prediction_idx),
                    K.gather(true_class, matched_truth_idx)), K.floatx()))

    # do these for numerical stability, # of true positives or # of predictions can be 0.
    num_true_positives = num_true_positives + K.epsilon()
    tot_num_predictions = tot_num_predictions + K.epsilon()
    tot_num_ground_truths = tot_num_ground_truths + K.epsilon()

    precision = num_true_positives / tot_num_predictions

    # recall = # correct prediction / # of positive ground truth observations (What proportion of actual positives was identified correctly?)

    recall = num_true_positives / tot_num_ground_truths

    f1 = 2.0 * (precision * recall) / (precision + recall)

    return f1
Exemple #30
0
 def call(self, inputs):
     if K.dtype(inputs) != 'int32':
         inputs = K.cast(inputs, 'int32')
     output = K.gather(self.embeddings, inputs)
     return output
Exemple #31
0
    def _subsampling(self,
                     normalized_rois,
                     gt_bboxes,
                     gt_labels,
                     pos_iou_thresh=0.5,
                     exclusive_iou_tresh=0.1,
                     pos_ratio=0.25):
        """正解データとのIoUを基にRoIをサンプリングする。
        IoUがpos_iou_thresh以上であるRoIをオブジェクトとみなす。
            オブジェクトはサンプルの25%以内とする。(n_samples_per_batch * pos_ratio 以内)
        pos_iou_thresh未満、exclusive_iou_thresh以上は非オブジェクトとみなす。
        exclusive_iou_thresh未満は偶然の一致であり意味なし(難解)なので無視。
        ※論文ではheuristic for hard example mining.と記載されている点。
        バッチ毎のサンプル数はn_samples_per_batch以内とする。
        (n_samples_per_batch未満の場合は、n_samples_per_batchになるよう0パディングする。)

        上記のサンプリングに対応する正解データのラベル、また、BBoxとのオフセットも得る。

        Args:
            normalized_rois (tensor) : RegionProposalLayerで得られたRoI。
                (N, n_rois, 4)
                3軸目は領域の左上と右下の座標が0〜1に正規化された値。
                入力画像サイズの高さ、幅で除算することで正規化された値。
                    (y1, x1, y2, x2)
            gt_bboxes (ndarray) : 正解BBox。
                (N, config.n_max_gt_objects_per_image, 4)
                座標は正規化されていない。
            gt_labels (ndarray) : 正解ラベル。
                (N, config.n_max_gt_objects_per_image)
                ==0:背景データ
                >=1:オブジェクト
        Returns:
            sample_rois (tensor): サンプリングしたRoI。
                (N, n_samples_per_batch, 4)
                3軸目の座標は0〜1に正規化された値。
            sample_gt_offset (tensor): サンプリングしたRoIに対応するBBoxとのオフセット。
                (N, n_samples_per_batch, 4)
                3軸目の座標は0〜1に正規化された値をself.config.bbox_refinement_stdで割ることで標準化した値。
            sample_gt_labels (tensor): サンプリングしたRoIに対応するBBoxのラベル。
                (N, n_samples_per_batch)
        """
        pos_roi_per_batch = round(self.n_samples_per_batch * pos_ratio)

        # gt_bboxesをnormalized_roisに合わせて正規化する。
        # これでIoUが評価出来るようになる。
        input_h = self.config.image_shape[0]
        input_w = self.config.image_shape[1]
        normalized_gt_bboxes = bbox.normalize_bbox(gt_bboxes, input_h, input_w)

        # 入力をバッチ毎に分割
        normalized_rois = tf.split(normalized_rois, self.config.batch_size)
        normalized_gt_bboxes = tf.split(normalized_gt_bboxes,
                                        self.config.batch_size)
        gt_labels = tf.split(gt_labels, self.config.batch_size)

        sample_rois = []
        sample_gt_offsets = []
        sample_gt_labels = []

        for roi, gt_bbox, gt_label in zip(normalized_rois,
                                          normalized_gt_bboxes, gt_labels):
            # 0次元目(バッチサイズ)は不要なので削除
            roi = log.tfprint(roi, "roi: ")
            gt_bbox = log.tfprint(gt_bbox, "gt_bbox: ")
            gt_label = log.tfprint(gt_label, "gt_label: ")

            roi = K.squeeze(roi, 0)
            gt_bbox = K.squeeze(gt_bbox, 0)
            gt_label = K.squeeze(gt_label, 0)

            roi = log.tfprint(roi, "roi_squeezed: ")
            gt_bbox = log.tfprint(gt_bbox, "gt_bbox_squeezed: ")
            gt_label = log.tfprint(gt_label, "gt_label_squeezed: ")

            # ゼロパディング行を除外
            # K.gather(zero, K.squeeze(tf.where(K.any(zero, axis=1)), -1) )
            idx_roi_row = K.flatten(tf.where(K.any(roi, axis=1)))
            idx_gt_bbox = K.flatten(tf.where(K.any(gt_bbox, axis=1)))
            roi = K.gather(roi, idx_roi_row)
            # gt_bboxとgt_labelは行数と行の並びが同じなので同じidxを利用できる
            gt_bbox = K.gather(gt_bbox, idx_gt_bbox)
            gt_label = K.gather(gt_label, idx_gt_bbox)

            gt_bbox = log.tfprint(gt_bbox, "gt_bbox_gathered: ")
            gt_label = log.tfprint(gt_label, "gt_label_gathered: ")

            # IoUを求める。
            # (n_rois, )
            ious = bbox.get_iou_K(roi, gt_bbox)
            ious = log.tfprint(ious, "ious: ")

            # 各RoI毎にIoU最大のBBoxの位置を得る
            idx_max_gt = K.argmax(ious, axis=1)
            idx_max_gt = log.tfprint(idx_max_gt, "idx_max_gt: ")

            max_iou = K.max(ious, axis=1)  # max_iouの行数はroiと同じになる
            max_iou = log.tfprint(max_iou, "max_iou: ")
            idx_pos = K.flatten(tf.where(max_iou >= pos_iou_thresh))
            # positiveサンプル数をpos_roi_per_batch以内に制限
            limit_pos = K.minimum(pos_roi_per_batch, K.shape(idx_pos)[0])
            idx_pos = K.switch(
                K.shape(idx_pos)[0] > 0,
                tf.random_shuffle(idx_pos)[:limit_pos], idx_pos)
            limit_pos = log.tfprint(limit_pos, "limit_pos: ")
            idx_pos = log.tfprint(idx_pos, "idx_pos: ")

            # negativeサンプル数を
            #   n_samples_per_batch - pos_roi_per_batch
            # に制限
            idx_neg = K.flatten(
                tf.where((max_iou < pos_iou_thresh)
                         & (max_iou >= exclusive_iou_tresh)))
            # negativeサンプル数は pos_roi_per_batch - limit_pos(つまり残り) 以内に制限
            limit_neg = self.n_samples_per_batch - limit_pos
            limit_neg = K.minimum(limit_neg, K.shape(idx_neg)[0])
            idx_neg = K.switch(
                K.shape(idx_neg)[0] > 0,
                tf.random_shuffle(idx_neg)[:limit_neg], idx_neg)
            limit_neg = log.tfprint(limit_neg, "limit_neg: ")
            idx_neg = log.tfprint(idx_neg, "idx_neg: ")

            # 返却するサンプルを抽出
            # GTのoffsets, labelsは各roisに対応させる。つまり、同じ位置に格納する。
            idx_keep = K.concatenate((idx_pos, idx_neg))
            idx_keep = log.tfprint(idx_keep, "idx_keep: ")

            # 各RoIの最大IoUを示すIndexについても、上記返却するサンプルのみを残す。
            idx_gt_keep = K.gather(idx_max_gt, idx_keep)
            # IoUが閾値以上のPositiveとみなされるサンプルのみを残すためのIndex。
            idx_gt_keep_pos = K.gather(idx_max_gt, idx_pos)
            idx_gt_keep = log.tfprint(idx_gt_keep, "idx_gt_keep: ")

            sample_roi = K.gather(roi, idx_keep)
            sample_gt_offset = bbox.get_offset_K(
                sample_roi, K.gather(gt_bbox, idx_gt_keep))
            # negativeな要素には0を設定
            sample_gt_label = K.concatenate((
                K.cast(K.gather(gt_label, idx_gt_keep_pos), dtype='int32'),
                K.zeros(
                    [limit_neg],  # K.zerosは0階テンソルを受け付けないので配列化。。。
                    dtype='int32')))

            # 行数がn_samples_per_batch未満の場合は0パディング
            remain = tf.maximum(
                self.n_samples_per_batch - tf.shape(sample_roi)[0], 0)
            sample_roi = tf.pad(sample_roi, [(0, remain), (0, 0)],
                                name='subsample_sample_roi')
            sample_gt_offset = tf.pad(sample_gt_offset, [(0, remain), (0, 0)],
                                      name='subsample_sample_gt_offset')
            sample_gt_offset /= self.config.bbox_refinement_std
            sample_gt_label = tf.pad(sample_gt_label, [(0, remain)],
                                     name='subsample_sample_gt_label')

            sample_roi = log.tfprint(sample_roi, "sample_roi: ")
            sample_gt_offset = log.tfprint(sample_gt_offset,
                                           "sample_gt_offset: ")
            sample_gt_label = log.tfprint(sample_gt_label, "sample_gt_label: ")

            sample_rois.append(sample_roi)
            sample_gt_offsets.append(sample_gt_offset)
            sample_gt_labels.append(sample_gt_label)

        return [
            K.stack(sample_rois),
            K.stack(sample_gt_offsets),
            K.stack(sample_gt_labels)
        ]
Exemple #32
0
    def _interpolate(self, image, sampled_grids, output_size):

        batch_size = K.shape(image)[0]
        height = K.shape(image)[1]
        width = K.shape(image)[2]
        num_channels = K.shape(image)[3]

        x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32')
        y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32')

        x = .5 * (x + 1.0) * K.cast(width, dtype='float32')
        y = .5 * (y + 1.0) * K.cast(height, dtype='float32')

        x0 = K.cast(x, 'int32')
        x1 = x0 + 1
        y0 = K.cast(y, 'int32')
        y1 = y0 + 1

        max_x = int(K.int_shape(image)[2] - 1)
        max_y = int(K.int_shape(image)[1] - 1)

        x0 = K.clip(x0, 0, max_x)
        x1 = K.clip(x1, 0, max_x)
        y0 = K.clip(y0, 0, max_y)
        y1 = K.clip(y1, 0, max_y)

        pixels_batch = K.arange(0, batch_size) * (height * width)
        pixels_batch = K.expand_dims(pixels_batch, axis=-1)
        flat_output_size = output_size[0] * output_size[1]
        base = K.repeat_elements(pixels_batch, flat_output_size, axis=1)
        base = K.flatten(base)

        # base_y0 = base + (y0 * width)
        base_y0 = y0 * width
        base_y0 = base + base_y0
        # base_y1 = base + (y1 * width)
        base_y1 = y1 * width
        base_y1 = base_y1 + base

        indices_a = base_y0 + x0
        indices_b = base_y1 + x0
        indices_c = base_y0 + x1
        indices_d = base_y1 + x1

        flat_image = K.reshape(image, shape=(-1, num_channels))
        flat_image = K.cast(flat_image, dtype='float32')
        pixel_values_a = K.gather(flat_image, indices_a)
        pixel_values_b = K.gather(flat_image, indices_b)
        pixel_values_c = K.gather(flat_image, indices_c)
        pixel_values_d = K.gather(flat_image, indices_d)

        x0 = K.cast(x0, 'float32')
        x1 = K.cast(x1, 'float32')
        y0 = K.cast(y0, 'float32')
        y1 = K.cast(y1, 'float32')

        area_a = K.expand_dims(((x1 - x) * (y1 - y)), 1)
        area_b = K.expand_dims(((x1 - x) * (y - y0)), 1)
        area_c = K.expand_dims(((x - x0) * (y1 - y)), 1)
        area_d = K.expand_dims(((x - x0) * (y - y0)), 1)

        values_a = area_a * pixel_values_a
        values_b = area_b * pixel_values_b
        values_c = area_c * pixel_values_c
        values_d = area_d * pixel_values_d
        return values_a + values_b + values_c + values_d
Exemple #33
0
 def call(self, inputs):
     if k.dtype(inputs) != 'int32':
         inputs = k.cast(inputs, 'int32')
     out = k.gather(k.transpose(self.dbedl.kernel), inputs)
     # out2 = K.gather(self.embeddings, inputs)
     return out
Exemple #34
0
def yolo_eval(yolo_outputs,
              anchors,
              num_classes,
              image_shape,
              max_boxes=40,
              score_threshold=.6,
              iou_threshold=.5,
              diff_class_iou_threshold=None):
    """Evaluate YOLO model on given input and return filtered boxes."""
    num_layers = len(yolo_outputs)
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[
        3, 4, 5
    ], [1, 2, 3]]  # default setting
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    boxes = []
    box_scores = []
    for l in range(num_layers):
        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
                                                    anchors[anchor_mask[l]],
                                                    num_classes, input_shape,
                                                    image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
        # TODO: use keras backend instead of tf.
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(class_boxes,
                                                 class_box_scores,
                                                 max_boxes_tensor,
                                                 iou_threshold=iou_threshold)
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)

    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    if diff_class_iou_threshold is not None:
        right_indics = tf.image.non_max_suppression(
            boxes_,
            scores_,
            max_boxes_tensor,
            iou_threshold=diff_class_iou_threshold)
        boxes_ = K.gather(boxes_, right_indics)
        scores_ = K.gather(scores_, right_indics)
        classes_ = K.gather(classes_, right_indics)

    return boxes_, scores_, classes_
    def compute_loss(self, y_true, y_pred):
        batch_size = K.shape(y_true)[0]
        num_prior_boxes = K.cast(K.shape(y_true)[1], 'float')

        y_pred_localization = y_pred[:, :, :4]
        y_true_localization = y_true[:, :, :4]
        y_pred_classification = y_pred[:, :, 4:(4 + self.num_classes)]
        y_true_classification = y_true[:, :, 4:(4 + self.num_classes)]
        # loss for all priors boxes
        localization_loss = self._l1_smooth_loss(y_true_localization,
                                                 y_pred_localization)
        classification_loss = self._softmax_loss(y_true_classification,
                                                 y_pred_classification)

        int_positive_mask = 1 - y_true[:, :, 4 + self.background_id]
        num_positives = tf.reduce_sum(int_positive_mask, axis=-1)
        positive_localization_losses = (localization_loss * int_positive_mask
                                        )  #scalar times vector
        positive_classification_losses = (classification_loss *
                                          int_positive_mask)
        positive_classification_loss = K.sum(positive_classification_losses, 1)
        positive_localization_loss = K.sum(positive_localization_losses, 1)

        # TODO: Refactor/understand ----------------------------------------------
        # every batch contains all priors: here we take the least amount of
        # negatives which depends on the amount of positives at every batch
        # at every set of priors. num_negatives/positives = (?, num_positives)
        # in the second num_positive_mask the values the concatenated value does
        # not get counted since you are doing greater than zero.
        # the most probable value that num_neg_batch will have is:
        # neg_pos_ratio * num_positives where num_positives is the batch element
        # with less positive boxes.
        num_negatives_1 = self.neg_pos_ratio * num_positives
        num_negatives_2 = num_prior_boxes - num_positives
        num_negatives = tf.minimum(num_negatives_1, num_negatives_2)

        #positive_num_negatives_mask = tf.greater(num_negatives, 0)
        num_positive_mask = tf.greater(num_negatives, 0)
        has_a_positive = tf.to_float(tf.reduce_any(num_positive_mask))
        num_negatives = tf.concat(
            0,
            [num_negatives, [(1 - has_a_positive) * self.negatives_for_hard]])
        num_positive_mask = tf.greater(num_negatives, 0)
        num_neg_batch = tf.reduce_min(
            tf.boolean_mask(num_negatives, num_positive_mask))
        num_neg_batch = tf.to_int32(num_neg_batch)
        # ----------------------------------------------------------------------

        #class_start = 4 + self.background_id + 1
        #class_end = class_start + self.num_classes - 1
        # each prior box can only have one class then we take the max at axis 2
        #best_class_scores = K.max(y_pred[:, :, class_start:], 2)

        # picking up the negative examples with the highest probability (highest loss)
        ### ?????? THIS IS WEIRD, the original implementation starts from 5: therefore it
        #### does not take into consideration the background boxes
        pred_class_values = K.max(y_pred_classification[:, :, 1:], axis=2)
        int_negatives_mask = y_true[:, :, 4 + self.background_id]
        pred_negative_class_values = pred_class_values * int_negatives_mask
        top_k_negative_indices = tf.nn.top_k(pred_negative_class_values,
                                             k=num_neg_batch)[1]

        batch_indices = K.expand_dims(K.arange(0, batch_size), 1)
        batch_indices = K.tile(batch_indices, (1, num_neg_batch))
        batch_indices = K.flatten(batch_indices) * K.cast(
            num_prior_boxes, 'int32')
        full_indices = batch_indices + K.flatten(top_k_negative_indices)

        negative_classification_loss = K.gather(K.flatten(classification_loss),
                                                full_indices)
        negative_classification_loss = K.reshape(negative_classification_loss,
                                                 [batch_size, num_neg_batch])
        negative_classification_loss = K.sum(negative_classification_loss, 1)

        # loss is sum of positives and negatives
        total_loss = positive_classification_loss + negative_classification_loss
        num_prior_boxes_per_batch = num_positives + K.cast(
            num_neg_batch, 'float')
        total_loss = total_loss / num_prior_boxes_per_batch
        num_positives = tf.select(K.not_equal(num_positives, 0), num_positives,
                                  K.ones_like(num_positives))
        positive_localization_loss = self.alpha * positive_classification_loss
        positive_localization_loss = positive_localization_loss / num_positives
        total_loss = total_loss + positive_localization_loss
        return total_loss
Exemple #36
0
    def linear_interpolate(self, images, sampled_grids, resampled_size):
        batch_size = K.shape(images)[0]
        height = K.shape(images)[1]
        width = K.shape(images)[2]
        number_of_channels = K.shape(images)[3]

        x = K.cast(K.flatten(sampled_grids[:, 0:1, :]), dtype='float32')
        y = K.cast(K.flatten(sampled_grids[:, 1:2, :]), dtype='float32')

        x = 0.5 * (x + 1.0) * K.cast(width, dtype='float32')
        y = 0.5 * (y + 1.0) * K.cast(height, dtype='float32')

        x0 = K.cast(x, dtype='int32')
        x1 = x0 + 1
        y0 = K.cast(y, dtype='int32')
        y1 = y0 + 1

        xMax = int(K.int_shape(images)[2] - 1)
        yMax = int(K.int_shape(images)[1] - 1)

        x0 = K.clip(x0, 0, xMax)
        x1 = K.clip(x1, 0, xMax)
        y0 = K.clip(y0, 0, yMax)
        y1 = K.clip(y1, 0, yMax)

        batch_pixels = K.arange(0, batch_size) * (height * width)
        batch_pixels = K.expand_dims(batch_pixels, axis=-1)
        base = K.repeat_elements(batch_pixels,
                                 rep=int(resampled_size[0] *
                                         resampled_size[1]),
                                 axis=1)
        base = K.flatten(base)

        indices00 = base + y0 * width + x0
        indices01 = base + y1 * width + x0
        indices10 = base + y0 * width + x1
        indices11 = base + y1 * width + x1

        flat_images = K.reshape(images, shape=(-1, number_of_channels))
        flat_images = K.cast(flat_images, dtype='float32')

        pixelValues00 = K.gather(flat_images, indices00)
        pixelValues01 = K.gather(flat_images, indices01)
        pixelValues10 = K.gather(flat_images, indices10)
        pixelValues11 = K.gather(flat_images, indices11)

        x0 = K.cast(x0, dtype='float32')
        x1 = K.cast(x1, dtype='float32')
        y0 = K.cast(y0, dtype='float32')
        y1 = K.cast(y1, dtype='float32')

        weight00 = K.expand_dims(((x1 - x) * (y1 - y)), axis=1)
        weight01 = K.expand_dims(((x1 - x) * (y - y0)), axis=1)
        weight10 = K.expand_dims(((x - x0) * (y1 - y)), axis=1)
        weight11 = K.expand_dims(((x - x0) * (y - y0)), axis=1)

        interpolatedValues00 = weight00 * pixelValues00
        interpolatedValues01 = weight01 * pixelValues01
        interpolatedValues10 = weight10 * pixelValues10
        interpolatedValues11 = weight11 * pixelValues11

        interpolatedValues = (interpolatedValues00 + interpolatedValues01 +
                              interpolatedValues10 + interpolatedValues11)

        return (interpolatedValues)
Exemple #37
0
def yolo_eval(yolo_outputs,
              anchors,
              num_classes,
              image_shape,
              max_boxes=20,
              score_threshold=.6,
              iou_threshold=.5,
              letterbox_image=True):
    #---------------------------------------------------#
    #   获得特征层的数量,有效特征层的数量为3
    #---------------------------------------------------#
    num_layers = len(yolo_outputs)
    #-----------------------------------------------------------#
    #   13x13的特征层对应的anchor是[116,90],[156,198],[373,326]
    #   26x26的特征层对应的anchor是[30,61],[62,45],[59,119]
    #   52x52的特征层对应的anchor是[10,13],[16,30],[33,23]
    #-----------------------------------------------------------#
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]

    #-----------------------------------------------------------#
    #   这里获得的是输入图片的大小,一般是416x416
    #-----------------------------------------------------------#
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    boxes = []
    box_scores = []
    #-----------------------------------------------------------#
    #   对每个特征层进行处理
    #-----------------------------------------------------------#
    for l in range(num_layers):
        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
                                                    anchors[anchor_mask[l]],
                                                    num_classes, input_shape,
                                                    image_shape,
                                                    letterbox_image)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    #-----------------------------------------------------------#
    #   将每个特征层的结果进行堆叠
    #-----------------------------------------------------------#
    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

    #-----------------------------------------------------------#
    #   判断得分是否大于score_threshold
    #-----------------------------------------------------------#
    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
        #-----------------------------------------------------------#
        #   取出所有box_scores >= score_threshold的框,和成绩
        #-----------------------------------------------------------#
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])

        #-----------------------------------------------------------#
        #   非极大抑制
        #   保留一定区域内得分最大的框
        #-----------------------------------------------------------#
        nms_index = tf.image.non_max_suppression(class_boxes,
                                                 class_box_scores,
                                                 max_boxes_tensor,
                                                 iou_threshold=iou_threshold)

        #-----------------------------------------------------------#
        #   获取非极大抑制后的结果
        #   下列三个分别是
        #   框的位置,得分与种类
        #-----------------------------------------------------------#
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_
Exemple #38
0
def yolo_eval(
        yolo_outputs,  # 模型输出,格式如下:(?,13,13,255),(?,26,26,255),(?,52,52,255),?:batch size
        anchors,
        num_classes,  # 80个类(coco)
        image_shape,
        max_boxes=20,  #每张图每类最多检测到20个框同类别的IOU阈值
        score_threshold=.6,
        iou_threshold=.5):
    # 每层分配三个anchor_mask,如13*13分配到[6,7,8]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    boxes = []
    box_scores = []
    # 对每个特征层进行处理
    for l in range(3):
        # _boxes -> (?,4),_box_scores -> (?,80) ?:框的数目
        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
                                                    anchors[anchor_mask[l]],
                                                    num_classes, input_shape,
                                                    image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    # 将每个特征层的结果进行堆叠
    boxes = K.concatenate(boxes, axis=0)  # 将数据展平 -> (?,4)
    box_scores = K.concatenate(box_scores, axis=0)  # 将数据展平 -> (?,1)

    mask = box_scores >= score_threshold  # mask掩码,过滤小于score阈值的值,只保留大于阈值的值
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')  # 最大检测框数为20
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
        # TODO: use keras backend instead of tf.
        # 筛出得分小于阈值的框
        class_boxes = tf.boolean_mask(boxes, mask[:,
                                                  c])  # 通过掩码mask和类别c筛选框boxes
        class_box_scores = tf.boolean_mask(box_scores[:, c],
                                           mask[:,
                                                c])  # 通过掩码mask和类别筛选box_scores

        # 运行非极大值抑制,得到通过抑制的索引
        nms_index = tf.image.non_max_suppression(class_boxes,
                                                 class_box_scores,
                                                 max_boxes_tensor,
                                                 iou_threshold=iou_threshold)

        class_boxes = K.gather(class_boxes,
                               nms_index)  # 检索张量class_boxes中索引为nms_index的元素
        class_box_scores = K.gather(
            class_box_scores,
            nms_index)  # 检索张量class_box_scores中索引为nms_index的元素
        classes = K.ones_like(
            class_box_scores,
            'int32') * c  # K.ones_like实例化与张量class_box_scores具有相同形状的全1变量
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_
Exemple #39
0
def shuffling(x):
    idxs = K.arange(0, K.shape(x)[0])
    idxs = K.tf.random_shuffle(idxs)
    return K.gather(x, idxs)
 def get_output(self, train=False):
     X = train
     out = K.gather(self.W, X)
     return out
def photoMetric(disp, left, right, width, height, batchsize):
    '''
    Partially inspired by https://github.com/mtngld/monodepth-1/blob/1f1fc80ac0dc727f3de561ead89e6792aea5e178/bilinear_sampler.py, eg use of gather function
    '''

    # Flatten and seperate out channels
    # [batch, width, height, channel]

    disp_f = K.flatten(K.permute_dimensions(disp, pattern=(0, 2, 1, 3)))
    left_f_0 = K.flatten(
        K.permute_dimensions(left[:, :, :, 0], pattern=(0, 2, 1)))
    right_f_0 = K.flatten(
        K.permute_dimensions(right[:, :, :, 0], pattern=(0, 2, 1)))
    left_f_1 = K.flatten(
        K.permute_dimensions(left[:, :, :, 1], pattern=(0, 2, 1)))
    right_f_1 = K.flatten(
        K.permute_dimensions(right[:, :, :, 1], pattern=(0, 2, 1)))
    left_f_2 = K.flatten(
        K.permute_dimensions(left[:, :, :, 2], pattern=(0, 2, 1)))
    right_f_2 = K.flatten(
        K.permute_dimensions(right[:, :, :, 2], pattern=(0, 2, 1)))

    # find the self-referantiatl indicies in the tensor
    indicies = K.arange(0, batchsize * width * height, dtype='float32')

    right_referances = K.clip(
        indicies + (disp_f * 1. * width * 0.3), 0, batchsize * width * height -
        1)  # changed to 0.3 to reflect v1 paper implemenation details

    # OK TO THIS POINT NO GRADS GET LOST
    intReferancesLow = K.cast(tf.floor(right_referances), 'int32')
    intReferancesHigh = K.cast(tf.ceil(right_referances), 'int32')

    lowWeights = 1 - K.abs(
        K.cast(intReferancesLow, 'float32') - right_referances)
    highWeights = 1 - K.abs(
        K.cast(intReferancesHigh, 'float32') - right_referances)

    # gather the values to creat the left re-projected images
    right_f_referance_to_projected_0 = K.gather(
        right_f_0, intReferancesLow) * lowWeights + K.gather(
            right_f_0, intReferancesHigh) * highWeights
    right_f_referance_to_projected_1 = K.gather(
        right_f_1, intReferancesLow) * lowWeights + K.gather(
            right_f_1, intReferancesHigh) * highWeights
    right_f_referance_to_projected_2 = K.gather(
        right_f_2, intReferancesLow) * lowWeights + K.gather(
            right_f_2, intReferancesHigh) * highWeights

    #return K.mean(right_f_referance_to_projected_0)

    # get difference between original left and right images
    #L2Direct      = K.sqrt(  K.square(left_f_0 - right_f_0)
    #                      +  K.square(left_f_1 - right_f_1)
    #                      +  K.square(left_f_2 - right_f_2))
    L1Direct =  K.abs((left_f_0 - right_f_0)) \
              + K.abs((left_f_1 - right_f_1)) \
              + K.abs((left_f_2 - right_f_2))

    #L2Reproject = K.sqrt(   K.square(left_f_0 - right_f_referance_to_projected_0) \
    #                      + K.square(left_f_1 - right_f_referance_to_projected_1) \
    #                      + K.square(left_f_2 - right_f_referance_to_projected_2) )
    L1Reproject =   K.abs(left_f_0 - right_f_referance_to_projected_0) \
                  + K.abs(left_f_1 - right_f_referance_to_projected_1) \
                  + K.abs(left_f_2 - right_f_referance_to_projected_2)

    greyImageRight = (right_f_0 + right_f_1 + right_f_2) / 3.
    greyImageReproject = (right_f_referance_to_projected_0 +
                          right_f_referance_to_projected_1 +
                          right_f_referance_to_projected_2) / 3.
    greyLeftImage = (left_f_0 + left_f_1 + left_f_2) / 3.

    mean_right = K.mean(greyImageRight)
    mean_reproject = K.mean(greyImageReproject)
    mean_left = K.mean(greyLeftImage)

    variance_right = K.sum(K.square(greyImageRight - mean_right)) / (
        batchsize * width * height - 1)
    variance_reproject = K.sum(
        K.square(greyImageReproject -
                 mean_reproject)) / (batchsize * width * height - 1)
    variance_left = K.sum(
        K.square(greyLeftImage - mean_left)) / (batchsize * width * height - 1)

    covariance_right_reproject = K.sum(
        (greyImageRight - mean_right) *
        (greyImageReproject - mean_reproject)) / (
            batchsize * width * height - 1)  # TODO not sum this for masking
    covariance_left_right = K.sum(
        (greyLeftImage - mean_left) * (greyImageRight - mean_right)) / (
            batchsize * width * height - 1)  # TODO not sum this for masking

    L = 256 - 1  # the range of the iamges

    c_1 = (0.01 * L) * (0.01 * L)  # default values
    c_2 = (0.03 * L) * (0.03 * L)  # default values

    SSIM_right_reproject = (2*mean_right*mean_reproject+c_1)*(2*covariance_right_reproject + c_2)/ \
                            ((mean_right*mean_right+mean_reproject*mean_reproject+c_1)*(variance_right*variance_right+variance_reproject*variance_reproject+c_2))

    SSIM_right_left      = (2*mean_right*mean_left+c_1)*(2*covariance_left_right + c_2)/ \
                            ((mean_right*mean_right+mean_left*mean_left+c_1)*(variance_right*variance_right+variance_left*variance_left+c_2))

    #return L1Direct, L1Reproject * (right_referances /( right_referances + 1e-10)), SSIM_right_reproject, SSIM_right_left
    return L1Direct, L1Reproject, SSIM_right_reproject, SSIM_right_left
Exemple #42
0
 def get_output(self, train=False):
     X = self.get_input(train)
     if self.dropout:
         raise NotImplementedError()     # TODO
     out = K.gather(self.W, X)
     return out
import keras
import tensorflow as tf
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

from config import patience, epochs, num_train_samples, num_valid_samples, batch_size
from data_generator import train_gen, valid_gen
from model import build_model, build_simple_model
# from utils import get_available_gpus, categorical_crossentropy_color
import numpy as np
import keras.backend as K

prior_factor = np.load("prior_factor.npy")
prior_factor = K.cast(prior_factor, dtype='float32')

idx_max = np.random.randint(313, size=(16, 32, 32))

a = K.gather(prior_factor, idx_max)

print("")
Exemple #44
0
def simple_test(image_path):
	image = cv2.imread(image_path, cv2.IMREAD_COLOR)
	height = image.shape[1]
	width = image.shape[0]
	image = cv2.resize(image, (image_w,image_h))
	image = image.reshape((1,image_w,image_h,3))
	prediction = model.predict(image, batch_size=1)
	print(prediction.shape)
	# 1, 13, 13, 125
	# Reshape it to 1,13,13,5,25
	# 5 anchor boxes at every grid in 13 x 13
	# 25 elements of reach anchorbox
	# probabiliity if an object is present, bx, by, w, h, 20 dim vector for each class
	p_resh = prediction.reshape(1, 13, 13, 5, 25)
	print(p_resh.shape)

	for box_i in range(5):
		box = p_resh[0][0][0][box_i]
		pc   = box[0]
		c_scores = box[5:]
		res = pc * c_scores
		idx = np.argmax(res)
		p = class_dict[idx]
		print("Box No {} score {} box {},{},{},{} class {} ".format(box_i, res[idx], box[1],box[2],box[3],box[4], p))

	box_confidence = p_resh[:,:,:,:,0]
	box_confidence = box_confidence.reshape(1,13,13,5,1)
	boxes = p_resh[:,:,:,:,1:5]
	boxes = boxes.reshape(1,13,13,5,4)
	box_class_prob = p_resh[:,:,:,:,5:]
	box_class_prob = box_class_prob.reshape(1,13,13,5,20)

	# Filter the boxes
	threshold = 0.6
	box_scores = np.multiply(box_confidence, box_class_prob)
	print(box_scores.shape)
	box_class = K.argmax(box_scores, axis =-1)
	box_class_scores = K.max(box_scores, axis=-1)
	# Filtering mask
	filtering_mask = K.greater_equal(box_class_scores, threshold)
	with K.get_session() as test:
		scores = tf.boolean_mask(box_class_scores, filtering_mask).eval()
		boxes = tf.boolean_mask(boxes, filtering_mask).eval()
		classes = tf.boolean_mask(box_class, filtering_mask).eval()
	
		print(boxes.shape)
		print(classes.shape)
		print(scores.shape)


		max_boxes = 5
		iou_threshold = 0.6


		max_boxes_tensor = K.variable(max_boxes, dtype='int32')     # tensor to be used in tf.image.non_max_suppression()
		test.run(tf.variables_initializer([max_boxes_tensor]))# initialize variable max_boxes_tensor
	# Use tf.image.non_max_suppression() to get the list of indices corresponding to boxes you keep


		nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold)
		scores = K.gather(scores, nms_indices).eval()
		boxes = K.gather(boxes, nms_indices).eval()
		classes = K.gather(classes, nms_indices).eval()

		print(boxes.shape)
		print(classes.shape)
		print(scores.shape)

		# scale the boxes
		image_dims = K.stack([height, width, height, width])
		image_dims = K.reshape(image_dims, [1, 4])
		boxes = boxes * image_dims

		print(boxes.eval())
    def copy_generator_network(batch_size,
                               sequence_class,
                               n_classes=1,
                               seq_length=205,
                               supply_inputs=False,
                               master_generator=master_generator,
                               copy_number=copy_number):

        sequence_class_onehots = np.eye(n_classes)

        #Generator network parameters
        latent_size = 100

        #Generator inputs
        latent_input_1, latent_input_2, latent_input_1_out, latent_input_2_out = None, None, None, None
        if not supply_inputs:
            latent_input_1 = Input(tensor=K.ones((batch_size, latent_size)),
                                   name='noise_input_1')
            latent_input_2 = Input(tensor=K.ones((batch_size, latent_size)),
                                   name='noise_input_2')
            latent_input_1_out = Lambda(
                lambda inp: inp * K.random_uniform(
                    (batch_size, latent_size), minval=-1.0, maxval=1.0),
                name='lambda_rand_input_1')(latent_input_1)
            latent_input_2_out = Lambda(
                lambda inp: inp * K.random_uniform(
                    (batch_size, latent_size), minval=-1.0, maxval=1.0),
                name='lambda_rand_input_2')(latent_input_2)
        else:
            latent_input_1 = Input(batch_shape=(batch_size, latent_size),
                                   name='noise_input_1')
            latent_input_2 = Input(batch_shape=(batch_size, latent_size),
                                   name='noise_input_2')
            latent_input_1_out = Lambda(
                lambda inp: inp, name='lambda_rand_input_1')(latent_input_1)
            latent_input_2_out = Lambda(
                lambda inp: inp, name='lambda_rand_input_2')(latent_input_2)

        class_embedding = Lambda(
            lambda x: K.gather(K.constant(sequence_class_onehots),
                               K.cast(x[:, 0], dtype='int32')))(sequence_class)

        seed_input_1 = Concatenate(axis=-1)(
            [latent_input_1_out, class_embedding])
        seed_input_2 = Concatenate(axis=-1)(
            [latent_input_2_out, class_embedding])

        #Policy network definition
        policy_dense_1 = master_generator.get_layer('policy_dense_1')

        policy_dense_1_reshape = Reshape((21, 1, 384))

        policy_deconv_0 = master_generator.get_layer('policy_deconv_0')

        policy_deconv_1 = master_generator.get_layer('policy_deconv_1')

        policy_deconv_2 = master_generator.get_layer('policy_deconv_2')

        policy_conv_3 = master_generator.get_layer('policy_conv_3')

        policy_conv_4 = master_generator.get_layer('policy_conv_4')

        policy_conv_5 = master_generator.get_layer('policy_conv_5')

        #policy_deconv_3 = Conv2DTranspose(4, (7, 1), strides=(1, 1), padding='valid', activation='linear', kernel_initializer='glorot_normal', name='policy_deconv_3')

        batch_norm_0 = master_generator.get_layer('policy_batch_norm_0')
        relu_0 = Lambda(lambda x: K.relu(x))
        batch_norm_1 = master_generator.get_layer('policy_batch_norm_1')
        relu_1 = Lambda(lambda x: K.relu(x))
        batch_norm_2 = master_generator.get_layer('policy_batch_norm_2')
        relu_2 = Lambda(lambda x: K.relu(x))

        batch_norm_3 = master_generator.get_layer('policy_batch_norm_3')
        relu_3 = Lambda(lambda x: K.relu(x))

        batch_norm_4 = master_generator.get_layer('policy_batch_norm_4')
        relu_4 = Lambda(lambda x: K.relu(x))

        policy_out_1 = Reshape((seq_length, 4, 1))(policy_conv_5(
            relu_4(
                batch_norm_4(policy_conv_4(
                    relu_3(
                        batch_norm_3(policy_conv_3(
                            relu_2(
                                batch_norm_2(policy_deconv_2(
                                    relu_1(
                                        batch_norm_1(policy_deconv_1(
                                            relu_0(
                                                batch_norm_0(policy_deconv_0(
                                                    policy_dense_1_reshape(
                                                        policy_dense_1(
                                                            seed_input_1))),
                                                             training=True))),
                                                     training=True))),
                                             training=True))),
                                     training=True))),
                             training=True))))
        policy_out_2 = Reshape((seq_length, 4, 1))(policy_conv_5(
            relu_4(
                batch_norm_4(policy_conv_4(
                    relu_3(
                        batch_norm_3(policy_conv_3(
                            relu_2(
                                batch_norm_2(policy_deconv_2(
                                    relu_1(
                                        batch_norm_1(policy_deconv_1(
                                            relu_0(
                                                batch_norm_0(policy_deconv_0(
                                                    policy_dense_1_reshape(
                                                        policy_dense_1(
                                                            seed_input_2))),
                                                             training=True))),
                                                     training=True))),
                                             training=True))),
                                     training=True))),
                             training=True))))

        return [latent_input_1, latent_input_2], [policy_out_1,
                                                  policy_out_2], []
Exemple #46
0
def neighbour_lookup(atoms, edges, maskvalue=0, include_self=False):
    ''' Looks up the features of an all atoms neighbours, for a batch of molecules.

    # Arguments:
        atoms (K.tensor): of shape (batch_n, max_atoms, num_atom_features)
        edges (K.tensor): of shape (batch_n, max_atoms, max_degree) with neighbour
            indices and -1 as padding value
        maskvalue (numerical): the maskingvalue that should be used for empty atoms
            or atoms that have no neighbours (does not affect the input maskvalue
            which should always be -1!)
        include_self (bool): if True, the featurevector of each atom will be added
            to the list feature vectors of its neighbours

    # Returns:
        neigbour_features (K.tensor): of shape (batch_n, max_atoms(+1), max_degree,
            num_atom_features) depending on the value of include_self

    # Todo:
        - make this function compatible with Tensorflow, it should be quite trivial
            because there is an equivalent of `T.arange` in tensorflow.
    '''

    # The lookup masking trick: We add 1 to all indices, converting the
    #   masking value of -1 to a valid 0 index.
    masked_edges = edges + 1
    # We then add a padding vector at index 0 by padding to the left of the
    #   lookup matrix with the value that the new mask should get
    masked_atoms = temporal_padding(atoms, (1, 0), padvalue=maskvalue)

    # Import dimensions
    atoms_shape = K.shape(masked_atoms)
    batch_n = atoms_shape[0]
    lookup_size = atoms_shape[1]
    num_atom_features = atoms_shape[2]

    edges_shape = K.shape(masked_edges)
    max_atoms = edges_shape[1]
    max_degree = edges_shape[2]

    # create broadcastable offset
    offset_shape = (batch_n, 1, 1)
    offset = K.reshape(
        tf.keras.backend.arange(stop=batch_n, start=0, dtype='int32'),
        offset_shape)
    offset *= lookup_size

    # apply offset to account for the fact that after reshape, all individual
    #   batch_n indices will be combined into a single big index
    flattened_atoms = K.reshape(masked_atoms, (-1, num_atom_features))
    flattened_edges = K.reshape(masked_edges + offset, (batch_n, -1))

    # Gather flattened
    flattened_result = K.gather(flattened_atoms, flattened_edges)

    # Unflatten result
    output_shape = (batch_n, max_atoms, max_degree, num_atom_features)
    output = K.reshape(flattened_result, output_shape)

    if include_self:
        return K.concatenate([tf.expand_dims(atoms, axis=2), output], axis=2)
    return output
Exemple #47
0
def build_network(X_nodes, X_edges, X_nodes_in_out, X_messages_in,
                  X_messages_out, message_passers, state_updater, readout,
                  ndim_features_nodes, fake_message_const, steps):
    for step in range(steps):
        messages = message_passers[step](K.concatenate([
            K.reshape(K.gather(reference=X_nodes, indices=X_nodes_in_out),
                      shape=(-1, 2 * ndim_features_nodes)), X_edges
        ],
                                                       axis=1))
        messages = K.concatenate([messages, fake_message_const], axis=0)
        messages = tf.where(tf.is_inf(messages), tf.zeros_like(messages),
                            messages)

        messages_aggregated_in = K.max(K.gather(reference=messages,
                                                indices=X_messages_in),
                                       axis=1)
        messages_aggregated_out = K.max(K.gather(reference=messages,
                                                 indices=X_messages_out),
                                        axis=1)

        messages_aggregated_in2 = K.mean(K.gather(reference=messages,
                                                  indices=X_messages_in),
                                         axis=1)
        messages_aggregated_out2 = K.mean(K.gather(reference=messages,
                                                   indices=X_messages_out),
                                          axis=1)

        messages_aggregated_in3 = K.var(K.gather(reference=messages,
                                                 indices=X_messages_in),
                                        axis=1)
        messages_aggregated_out3 = K.var(K.gather(reference=messages,
                                                  indices=X_messages_out),
                                         axis=1)

        messages_aggregated_in4 = K.std(K.gather(reference=messages,
                                                 indices=X_messages_in),
                                        axis=1)
        messages_aggregated_out4 = K.std(K.gather(reference=messages,
                                                  indices=X_messages_out),
                                         axis=1)

        ## For GRU-based state_updater
        #  _, X_nodes = state_updater(
        #     inputs=K.concatenate([messages_aggregated_in, messages_aggregated_out
        #      ,messages_aggregated_in2, messages_aggregated_out2,
        #      messages_aggregated_in3, messages_aggregated_out3,
        #    ], axis=1),
        # state=X_nodes
        #   )

        # For LSTM-based state_updater
        #  _, (_, X_nodes) = state_updater(
        #     inputs=K.concatenate([messages_aggregated_in, messages_aggregated_out
        #                          ,messages_aggregated_in2, messages_aggregated_out2,
        #                         messages_aggregated_in3, messages_aggregated_out3,
        #                        messages_aggregated_in4, messages_aggregated_out4
        #                    ], axis=1),
        #   state=(tf.zeros_like(X_nodes), X_nodes)
        #   )

        ## For dense state_updater
        X_nodes = state_updater(
            K.concatenate([
                messages_aggregated_in, messages_aggregated_in2,
                messages_aggregated_out, messages_aggregated_out2,
                messages_aggregated_in3, messages_aggregated_out3,
                messages_aggregated_in4, messages_aggregated_out4, X_nodes
            ],
                          axis=1))

    return readout(X_nodes)
def _construct_inference_tensors(*,
                                 restored_model,
                                 num_of_anchors,
                                 anchors,
                                 model_image_width,
                                 model_image_height,
                                 prob_detection_threshold=0.25,
                                 nms_iou_threshold=0.5):
    """
    Constructs input tensors (placeholders) and output tensors that are used for inference.

    Arguments:
    :param restored_model Keras model restored from the Darknet
    :param num_of_anchors number of anchors used in the architecture
    :param anchors anchors used in the architecture (expected shape=(num_of_anchors, 2), first dimension is width)
    :param model_image_width width of the image used by model (needs to be divisible by 32)
    :param model_image_height height of the image used by model (needs to be divisible by 32)
    :param model_image_height height of the image used by model (needs to be divisible by 32)
    :param prob_detection_threshold threshold for detecting object
    :param nms_iou_threshold threshold for non-max suppresion

    Returns:
    :return (out_tensors, input_tensors)
        - out_tensors - (picked_boxes, picked_classes, picked_scores)
            - picked_boxes = Tensor of (left, top, bottom, right)
            - picked_classes = Tensor of ints
            - picked_score = Tensor of floats
        - input_tensors = (model_input, orig_image_width, orig_image_height)
            - orig_image_width - Placeholder for original image width (before resizing)
            - orig_image_height - Placeholder for original image height (before resizing)
            - model_input - Placeholder for image pixels
    """
    start = time.time()
    boxes = []
    prob_class = []

    placeholder_orig_image_width = K.placeholder(shape=(1, ))
    placeholder_orig_image_height = K.placeholder(shape=(1, ))

    for yolo_head_idx in range(len(restored_model.output)):
        yolo_head = restored_model.output[yolo_head_idx]
        yolo_head_shape = K.shape(yolo_head)
        yolo_head_num_of_cols, yolo_head_num_of_rows = yolo_head_shape[
            2], yolo_head_shape[1]

        curr_yolo_head = K.reshape(yolo_head, [
            -1, yolo_head_num_of_cols, yolo_head_num_of_rows, num_of_anchors,
            NUM_OF_BOX_PARAMS + NUM_OF_CLASSES
        ])

        grid = construct_grid(yolo_head_shape[1], yolo_head_shape[2])
        grid = K.cast(grid, dtype=K.dtype(curr_yolo_head))
        grid_size = K.cast([yolo_head_num_of_cols, yolo_head_num_of_rows],
                           dtype=K.dtype(curr_yolo_head))

        curr_boxes_xy = (K.sigmoid(curr_yolo_head[..., :2]) + grid) / grid_size

        curr_boxes_wh = K.exp(curr_yolo_head[...,
                                             2:4]) * anchors[yolo_head_idx]

        curr_prob_obj = K.sigmoid(curr_yolo_head[..., 4:5])
        curr_prob_class = K.sigmoid(curr_yolo_head[..., 5:])
        curr_prob_detected_class = curr_prob_obj * curr_prob_class

        boxes.append(
            get_corrected_boxes(
                box_width=curr_boxes_wh[..., 0:1],
                box_height=curr_boxes_wh[..., 1:2],
                box_x=curr_boxes_xy[..., 0:1],
                box_y=curr_boxes_xy[..., 1:2],
                orig_image_shape=(placeholder_orig_image_width,
                                  placeholder_orig_image_height),
                model_image_shape=(model_image_width, model_image_height)))

        curr_prob_detected_class = K.reshape(curr_prob_detected_class,
                                             [-1, NUM_OF_CLASSES])
        prob_class.append(curr_prob_detected_class)

    prob_class = K.concatenate(prob_class, axis=0)
    boxes = K.concatenate(boxes, axis=0)

    mask = prob_class >= prob_detection_threshold
    max_boxes_tensor = K.constant(20, dtype='int32')

    picked_boxes = []
    picked_scores = []
    picked_classes = []

    for c in range(NUM_OF_CLASSES):
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(prob_class[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(
            class_boxes,
            class_box_scores,
            max_boxes_tensor,
            iou_threshold=nms_iou_threshold)

        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c

        picked_boxes.append(class_boxes)
        picked_scores.append(class_box_scores)
        picked_classes.append(classes)

    picked_boxes = K.concatenate(picked_boxes, axis=0)
    picked_scores = K.concatenate(picked_scores, axis=0)
    picked_classes = K.concatenate(picked_classes, axis=0)

    out_tensors = [picked_boxes, picked_scores, picked_classes]

    print(f'Took {time.time() - start} seconds to construct network.')

    input_tensors = [
        restored_model.input, placeholder_orig_image_width,
        placeholder_orig_image_height
    ]

    return out_tensors, input_tensors
Exemple #49
0
    def decoder_fn(time, cell_state, cell_input, cell_output, context_state):
        """Decoder function used in the `dynamic_rnn_decoder` for inference.

        The main difference between this decoder function and the `decoder_fn` in
        `attention_decoder_fn_train` is how `next_cell_input` is calculated. In
        decoder function we calculate the next input by applying an argmax across
        the feature dimension of the output from the decoder. This is a
        greedy-search approach. (Bahdanau et al., 2014) & (Sutskever et al., 2014)
        use beam-search instead.

        Args:
          time: positive integer constant reflecting the current timestep.
          cell_state: state of RNNCell.
          cell_input: input provided by `dynamic_rnn_decoder`.
          cell_output: output of RNNCell.
          context_state: context state provided by `dynamic_rnn_decoder`.

        Returns:
          A tuple (done, next state, next input, emit output, next context state)
          where:

          done: A boolean vector to indicate which sentences has reached a
          `end_of_sequence_id`. This is used for early stopping by the
          `dynamic_rnn_decoder`. When `time>=maximum_length` a boolean vector with
          all elements as `true` is returned.

          next state: `cell_state`, this decoder function does not modify the
          given state.

          next input: The embedding from argmax of the `cell_output` is used as
          `next_input`.

          emit output: If `output_fn is None` the supplied `cell_output` is
          returned, else the `output_fn` is used to update the `cell_output`
          before calculating `next_input` and returning `cell_output`.

          next context state: `context_state`, this decoder function does not
          modify the given context state. The context state could be modified when
          applying e.g. beam search.

        Raises:
          ValueError: if cell_input is not None.

        """
        with tf.name_scope(
                name, "attention_decoder_fn_inference",
            [time, cell_state, cell_input, cell_output, context_state]):
            if cell_input is not None:
                raise ValueError(
                    "Expected cell_input to be None, but saw: %s" % cell_input)
            if cell_output is None:
                # invariant that this is time == 0
                next_input_id = K.ones([
                    batch_size,
                ], dtype=dtype) * (start_of_sequence_id)
                done = tf.zeros([
                    batch_size,
                ], dtype=tf.bool)
                cell_state = encoder_state
                cell_output = K.zeros([num_decoder_symbols], dtype=tf.float32)
                cell_input = K.gather(embeddings, next_input_id)

                # init attention
                attention = _init_attention(encoder_state)
            else:
                # construct attention
                attention = attention_construct_fn(cell_output, attention_keys,
                                                   attention_values)
                cell_output = attention

                # argmax decoder
                cell_output = output_fn(cell_output)  # logits
                next_input_id = K.cast(K.argmax(cell_output, 1), dtype=dtype)
                done = K.equal(next_input_id, end_of_sequence_id)
                cell_input = K.gather(embeddings, next_input_id)

            # combine cell_input and attention
            next_input = Concatenate(axis=1)([cell_input, attention])

            # if time > maxlen, return all true vector
            done = tf.cond(K.greater(time, maximum_length),
                           lambda: K.ones([
                               batch_size,
                           ], dtype=tf.bool), lambda: done)
            return (done, cell_state, next_input, cell_output, context_state)
Exemple #50
0
            def dropped_inputs():
                if 'max' == self.agg_method:
                    x_agg = bk.max(inputs, axis=0)
                    if self.smooth_rate > 0:
                        x_agg = self.smooth_rate * bk.mean(
                            inputs, axis=0) + (1 - self.smooth_rate) * x_agg
                elif 'extreme' == self.agg_method:
                    x_mean = bk.mean(inputs, axis=0)
                    x_agg = tf.where(x_mean >= 0, bk.max(inputs, axis=0),
                                     bk.min(inputs, axis=0))
                    if self.smooth_rate > 0:
                        x_agg = self.smooth_rate * x_mean + (
                            1 - self.smooth_rate) * x_agg
                else:
                    x_agg = bk.mean(inputs, axis=0)

                x_min, x_max = bk.min(x_agg), bk.max(x_agg)
                x_agg_int = bk.cast(
                    input_shape[-1] * (x_agg - x_min) / (x_max - x_min),
                    'int32')
                if self.unique_supported:
                    _, idx, counts = tf.unique_with_counts(x_agg_int)
                    dr = self.rate**(
                        1. / (self.anneal * bk.cast(counts, inputs.dtype)))
                    dr = tf.where(1 == counts, self.rate * bk.ones_like(dr),
                                  dr)
                else:

                    def _seg_dr(ele):
                        _cnt = bk.sum(bk.cast(ele == x_agg_int, inputs.dtype))
                        _dr = self.rate if 1 == _cnt else self.rate**(
                            1. / (self.anneal * _cnt))
                        return _dr

                    dr = bk.map_fn(_seg_dr, x_agg_int, dtype=inputs.dtype)
                    idx = bk.arange(0, x_agg_int.shape[0])

                if 'gaussian' == self.noise_type:
                    sigma = (dr / (1. - dr))**.5
                    noise_tensor = bk.gather(sigma, idx) * bk.random_normal(
                        x_agg_int.shape, dtype=inputs.dtype) + 1.
                    return inputs * noise_tensor
                else:
                    dr_tensor = bk.random_uniform(noise_shape,
                                                  seed=self.seed,
                                                  dtype=inputs.dtype)
                    ret = inputs * bk.cast(dr_tensor >= bk.gather(dr, idx),
                                           inputs.dtype)

                    if 'abs' == self.keep_amp_type:
                        old_amps = bk.sum(bk.abs(inputs),
                                          axis=-1,
                                          keepdims=True)
                        cur_amps = bk.sum(bk.stop_gradient(bk.abs(ret)),
                                          axis=-1,
                                          keepdims=True)
                        ret = ret * old_amps / (cur_amps + self.epsilon)
                    elif self.keep_amp_type is not None:
                        old_amps = bk.sum(inputs, axis=-1, keepdims=True)
                        cur_amps = bk.sum(bk.stop_gradient(ret),
                                          axis=-1,
                                          keepdims=True)
                        ret = ret * old_amps / (cur_amps + self.epsilon)

                    return ret
def yolo_eval(yolo_outputs,
              anchors,
              num_classes,
              image_shape,
              max_boxes=20,
              score_threshold=.6,
              iou_threshold=.5):
    """Evaluate YOLO model on given input and return filtered boxes."""
    num_layers = len(yolo_outputs)
    anchor_mask = [[3, 4, 5], [0, 1, 2]]  # default setting
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    boxes = []
    box_scores = []
    for l in range(num_layers):
        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
                                                    anchors[anchor_mask[l]],
                                                    num_classes, input_shape,
                                                    image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []

    ### for c == 4
    class_boxes = tf.boolean_mask(boxes, mask[:, 4])
    class_box_scores = tf.boolean_mask(box_scores[:, 4], mask[:, 4])
    nms_index = tf.image.non_max_suppression(class_boxes,
                                             class_box_scores,
                                             max_boxes_tensor,
                                             iou_threshold=iou_threshold)
    class_boxes = K.gather(class_boxes, nms_index)
    class_box_scores = K.gather(class_box_scores, nms_index)
    classes = K.ones_like(class_box_scores, 'int32') * 4
    boxes_.append(class_boxes)
    scores_.append(class_box_scores)
    classes_.append(classes)

    for c in range(num_classes):
        if c == 4:
            continue

        else:
            class_boxes = tf.boolean_mask(boxes, mask[:, c])
            class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
            nms_index = tf.image.non_max_suppression(
                class_boxes,
                class_box_scores,
                max_boxes_tensor,
                iou_threshold=iou_threshold)
            class_boxes = K.gather(class_boxes, nms_index)
            class_box_scores = K.gather(class_box_scores, nms_index)
            classes = K.ones_like(class_box_scores, 'int32') * c
            boxes_.append(class_boxes)
            scores_.append(class_box_scores)
            classes_.append(classes)

    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_
Exemple #52
0
def sparse_gather(y_pred, target_indices, task_name):
    clf_h = Lambda(lambda x: K.reshape(x, (-1, K.int_shape(x)[-1])),
                   name=task_name + '_flatten')(y_pred)
    return Lambda(lambda x: K.gather(x[0], K.cast(x[1], 'int32')),
                  name=task_name + '_gather')([clf_h, target_indices])
Exemple #53
0
    iou_rate = inter_area / union_area

    return iou_rate
def draw_rectangle():
    """
    画矩形框
    :return:
    """

    fig = plt.figure() #创建图
    ax = fig.add_subplot(111) # 创建子图
    # ax = plt.gca() # 获得当前整张图表的坐标对象
    ax.invert_yaxis()  # y轴反向
    ax.xaxis.set_ticks_position('top')  # 将x轴的位置设置在顶部
    def add_rectangle(x1,y1,x2,y2,color="black"): # 输入剧性的对脚坐标
        ax.add_patch(patches.Rectangle((x1, y1), x2-x1, y2-y1, fill=False, color=color))
    add_rectangle(.2, .1, .4, .3)
    add_rectangle(.3, .1, .4, .3, color="red")
    add_rectangle(.3, .1, .4, .4, color="blue")
    add_rectangle(.1, .1, .4, .4, color="orange") #scores = np.array([.4,.5,.72,.9,.45],dtype=np.float32)
    add_rectangle(.1, .1, .4, .3, color="yellow")
    plt.show()
boxes = np.array([[.1,.2,.3,.4],[.1,.3,.3,.4],[.1,.3,.4,.4],[.1,.1,.4,.4],[.1,.1,.3,.4]], dtype=np.float32)
scores = np.array([.4,.5,.72,.9,.45],dtype=np.float32)
with tf.Session() as sess:
    selected_indices = sess.run(tf.image.non_max_suppression(boxes=boxes, scores=scores,iou_threshold=0.5, max_output_size=5))
    print(selected_indices)
    selected_boxes = sess.run(K.gather(boxes, selected_indices))
    print(selected_boxes)
draw_rectangle()