Exemple #1
0
    def call(self, inputs, mask=None):
        if not isinstance(inputs, list) or len(inputs) <= 1:
            raise TypeError('SpkLifeLongMemory must be called on a list of tensors '
                            '(at least 2). Got: ' + str(inputs))
        # (None(batch), 1), index of speaker
        target_spk_l = inputs[0]
        target_spk_l = K.reshape(target_spk_l, (target_spk_l.shape[0], ))
        if K.dtype(target_spk_l) != 'int32':
            target_spk_l = K.cast(target_spk_l, 'int32')
        # (None(batch), embed_dim)
        spk_vector_l = inputs[1]
        # Start to update life-long memory based on the learned speech vector
        # First do normalization
        spk_vector_eps = K.switch(K.equal(spk_vector_l, 0.), np.spacing(1), spk_vector_l)  # avoid zero
        spk_vector_eps = K.sqrt(K.sum(spk_vector_eps**2, axis=1))
        spk_vector_eps = spk_vector_eps.dimshuffle((0, 'x'))
        spk_vector = T.true_div(spk_vector_l, K.repeat_elements(spk_vector_eps, self.vec_dim, axis=1))
        # Store speech vector into life-long memory according to the speaker identity.
        life_long_mem = T.inc_subtensor(self.life_long_mem[target_spk_l, :], spk_vector)
        # Normalization for memory
        life_long_mem_eps = K.switch(K.equal(life_long_mem, 0.), np.spacing(1), life_long_mem)  # avoid 0
        life_long_mem_eps = K.sqrt(K.sum(life_long_mem_eps**2, axis=1))
        life_long_mem_eps = life_long_mem_eps.dimshuffle((0, 'x'))
        life_long_mem = T.true_div(life_long_mem, K.repeat_elements(life_long_mem_eps, self.vec_dim, axis=1))

        # (None(batch), spk_size, embed_dim)
        return life_long_mem
Exemple #2
0
def step(x):
    """Theano step function"""
    if (_BACKEND == 'tensorflow'):
        import tensorflow as tf
        return tf.select(tf.python.math_ops.greater(x, 0), K.ones_like(x), K.zeros_like(x))
    else:
        return K.switch(x > 0, 1, 0)
def time_distributed_masked_max(x, m):
    """
    Computes max along the first (time) dimension.

    In:
        x - input; a 3D tensor
        m - mask
        m_value - value for masking
    """
    # place infinities where mask is off
    m_value = 0.0
    tmp = K.switch(K.equal(m, 0.0), -numpy.inf, 0.0)
    x_with_inf = x + K.expand_dims(tmp)
    x_max = K.max(x_with_inf, axis=1) 
    r = K.switch(K.equal(x_max, -numpy.inf), m_value, x_max)
    return r 
Exemple #4
0
def accumulate(attend_function, inputs, input_length,
                                mask=None, return_probabilities=False):
    '''get the running attention over a sequence. 

    given a 3dim tensor where the 1st dim is time (or not. whatever.),  calculating the running attended sum.
    in other words, at the first time step, you only have that item.
                    at the second time step, attend over the first two items.
                    at the third..  the third. so on. 

    this basically a mod on keras' rnn implementation
    author: bcm
    '''

    ndim = inputs.ndim
    assert ndim >= 3, 'inputs should be at least 3d'

    axes = [1,0] + list(range(2, ndim))
    inputs = inputs.dimshuffle(axes)

    indices = list(range(input_length))

    successive_outputs = []
    if mask is not None:
        if mask.ndim == ndim-1:
            mask = K.expand_dims(mask)
        assert mask.ndim == ndim
        mask = mask.dimshuffle(axes)
        prev_output = None

    successive_outputs = []
    successive_pvecs = []
    uncover_mask = K.zeros_like(inputs)
    uncover_indices = K.arange(input_length)
    for _ in range(ndim-1):
        uncover_indices = K.expand_dims(uncover_indices)
    make_subset = lambda i,X: K.switch(uncover_indices <= i, X, uncover_mask)
    for i in indices:
        inputs_i = make_subset(i,inputs)
        mask_i = make_subset(i,mask)
        if mask is not None:
            output = attend_function(inputs_i, mask_i) # this should not output the time dimension; it should be marginalized over. 
        else:
            output = attend_function(inputs_i) # this should not output the time dimension; it should be marginalized over. 
        if return_probabilities:
            output, p_vectors = output
            successive_pvecs.append(p_vectors)
        assert output.ndim == 2, "Your attention function is malfunctioning; the attention accumulator should return 2 dimensional tensors"
        successive_outputs.append(output)
    outputs = K.pack(successive_outputs)
    K.squeeze(outputs, -1)
    axes = [1, 0] + list(range(2, outputs.ndim))
    outputs = outputs.dimshuffle(axes)

    if return_probabilities:
        out_pvecs = K.pack(successive_pvecs)
        K.squeeze(out_pvecs, -1)
        out_pvecs = out_pvecs.dimshuffle(axes)
        outputs = [outputs, out_pvecs]

    return outputs
def residual_drop(x, input_shape, output_shape, strides=(1, 1)):
    global add_tables

    nb_filter = output_shape[0]
    conv = Convolution2D(nb_filter, 3, 3, subsample=strides, border_mode="same")(x)
    conv = BatchNormalization(axis=1)(conv)
    conv = Activation("relu")(conv)
    conv = Convolution2D(nb_filter, 3, 3, border_mode="same")(conv)
    conv = BatchNormalization(axis=1)(conv)

    if strides[0] >= 2:
        x = AveragePooling2D(strides)(x)

    if (output_shape[0] - input_shape[0]) > 0:
        pad_shape = (1,
                     output_shape[0] - input_shape[0],
                     output_shape[1],
                     output_shape[2])
        padding = K.ones(pad_shape)
        padding = K.repeat_elements(padding, K.shape(x)[0], axis=0)
        x = Lambda(lambda y: K.concatenate([y, padding], axis=1),
                   output_shape=output_shape)(x)

    _death_rate = K.variable(death_rate)
    scale = K.ones_like(conv) - _death_rate
    conv = Lambda(lambda c: K.in_test_phase(scale * c, c),
                  output_shape=output_shape)(conv)

    out = merge([conv, x], mode="sum")
    out = Activation("relu")(out)

    gate = K.variable(1, dtype="uint8")
    add_tables += [{"death_rate": _death_rate, "gate": gate}]
    return Lambda(lambda tensors: K.switch(gate, tensors[0], tensors[1]),
                  output_shape=output_shape)([out, x])
Exemple #6
0
def stock_loss(y_true, y_pred):
    alpha = 100.
    loss = K.switch(K.less(y_true * y_pred, 0), \
        alpha*y_pred**2 - K.sign(y_true)*y_pred + K.abs(y_true), \
        K.abs(y_true - y_pred)
        )
    return K.mean(loss, axis=-1)
Exemple #7
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(T, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    yolo_outputs = args[:3]
    y_true = args[3:]
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3)]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]

    for l in range(3):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(yolo_outputs[l],
             anchors[anchor_mask[l]], num_classes, input_shape)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet box loss.
        xy_delta = (y_true[l][..., :2]-pred_xy)*grid_shapes[l][::-1]
        wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh)
        # Avoid log(0)=-inf.
        wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta))
        box_delta = K.concatenate([xy_delta, wh_delta], axis=-1)
        box_delta_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        box_loss = object_mask * K.square(box_delta*box_delta_scale)
        confidence_loss = object_mask * K.square(1-pred_confidence) + \
            (1-object_mask) * K.square(0-pred_confidence) * ignore_mask
        class_loss = object_mask * K.square(true_class_probs-pred_class_probs)
        loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss)
    return loss / K.cast(m, K.dtype(loss))
Exemple #8
0
 def get_output(self, train=False):
     X = self.get_input(train)
     if train:
         M = K.max(X, axis=(2, 3), keepdims=True)
         R = K.switch(K.equal(X, M), X, 0.)
         return R
     else:
         return X
 def call(self, x, mask=None):
     # x[0]: (batch_size, input_length, input_dim)
     # x[1]: (batch_size, 1) indices of prepositions
     # Optional: x[2]: (batch_size, input_length - 2)
     assert isinstance(x, list) or isinstance(x, tuple)
     encoded_sentence = x[0]
     prep_indices = K.squeeze(x[1], axis=-1)  #(batch_size,)
     batch_indices = K.arange(K.shape(encoded_sentence)[0])  # (batch_size,)
     if self.with_attachment_probs:
         # We're essentially doing K.argmax(x[2]) here, but argmax is not differentiable!
         head_probs = x[2]
         head_probs_padding = K.zeros_like(x[2])[:, :2]  # (batch_size, 2)
         # (batch_size, input_length)
         padded_head_probs = K.concatenate([head_probs, head_probs_padding])
         # (batch_size, 1)
         max_head_probs = K.expand_dims(K.max(padded_head_probs, axis=1))
         # (batch_size, input_length, 1)
         max_head_prob_indices = K.expand_dims(K.equal(padded_head_probs, max_head_probs))
         # (batch_size, input_length, input_dim)
         masked_head_encoding = K.switch(max_head_prob_indices, encoded_sentence, K.zeros_like(encoded_sentence))
         # (batch_size, input_dim)
         head_encoding = K.sum(masked_head_encoding, axis=1)
     else:
         head_indices = prep_indices - 1  # (batch_size,)
         head_encoding = encoded_sentence[batch_indices, head_indices, :]  # (batch_size, input_dim)
     prep_encoding = encoded_sentence[batch_indices, prep_indices, :]  # (batch_size, input_dim)
     child_encoding = encoded_sentence[batch_indices, prep_indices+1, :]  # (batch_size, input_dim)
     '''
     prep_indices = x[1]
     sentence_mask = mask[0]
     if sentence_mask is not None:
         if K.ndim(sentence_mask) > 2:
             # This means this layer came after a Bidirectional layer. Keras has this bug which
             # concatenates input masks instead of output masks.
             # TODO: Fix Bidirectional instead.
             sentence_mask = K.any(sentence_mask, axis=(-2, -1))
     head_encoding, prep_encoding, child_encoding = self.get_split_averages(encoded_sentence, sentence_mask,
                                                                            prep_indices)
     '''
     head_projection = K.dot(head_encoding, self.proj_head)  # (batch_size, proj_dim)
     prep_projection = K.dot(prep_encoding, self.proj_prep)  # (batch_size, proj_dim)
     child_projection = K.dot(child_encoding, self.proj_child)  # (batch_size, proj_dim)
     #(batch_size, proj_dim)
     if self.composition_type == 'HPCT':
         composed_projection = K.tanh(head_projection + prep_projection + child_projection)
     elif self.composition_type == 'HPC':
         prep_child_projection = K.tanh(prep_projection + child_projection)  # (batch_size, proj_dim)
         composed_projection = K.tanh(head_projection + prep_child_projection)
     else:
         # Composition type in HC
         composed_projection = K.tanh(head_projection + child_projection)
     for hidden_layer in self.hidden_layers:
         composed_projection = K.tanh(K.dot(composed_projection, hidden_layer))  # (batch_size, proj_dim)
     # (batch_size, num_classes)
     class_scores = K.dot(composed_projection, self.scorer)
     label_probabilities = K.softmax(class_scores)
     return label_probabilities
 def _gen_local_drops(self, count, p):
     # Create a local droppath with at least one path
     arr = self._random_arr(count, p)
     drops = K.switch(
         K.any(arr),
         arr,
         self._arr_with_one(count)
     )
     return drops
 def _drop_path(self, inputs):
     count = len(inputs)
     drops = K.switch(
         self.is_global,
         self._gen_global_path(count),
         self._gen_local_drops(count, self.p)
     )
     ave = K.zeros(shape=self.average_shape)
     for i in range(0, count):
         ave += inputs[i] * drops[i]
     sum = K.sum(drops)
     # Check that the sum is not 0 (global droppath can make it
     # 0) to avoid divByZero
     ave = K.switch(
         K.not_equal(sum, 0.),
         ave/sum,
         ave)
     return ave
Exemple #12
0
 def get_gradients(self, loss, params):
     grads = K.gradients(loss, params)
     if hasattr(self, 'scale') and self.scale != 1:
         grads = [g*K.variable(self.scale) for g in grads]
     if hasattr(self, 'clipnorm') and self.clipnorm > 0:
         norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads]))
         grads = [K.switch(norm >= self.clipnorm, g * self.clipnorm / norm, g) for g in grads]
     if hasattr(self, 'clipvalue') and self.clipvalue > 0:
         grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads]
     return grads
Exemple #13
0
def jacc_coef_th(y_true, y_pred, smooth=smooth_default):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    jacc = (intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) - intersection + smooth)
    result = K.switch(
        jacc > 0.65,
        jacc,
        jacc * 0.1
    )
    return result
Exemple #14
0
 def __call__(self, loss):
     if not hasattr(self, 'layer'):
         raise Exception('Need to call `set_layer` on '
                         'MaskRegularizer instance '
                         'before calling the instance.')
     min_tag_size = self.mask_size**2 * self.min_covered
     factor = min_tag_size / self.max_loss
     out = self.layer.output
     out_sum = out.sum(axis=(1, 2, 3))
     reg_loss = K.switch(out_sum <= min_tag_size,
                         factor*(out_sum - min_tag_size)**2, 0)
     return K.in_train_phase(loss + reg_loss.mean(), loss)
Exemple #15
0
    def step(self, x, states):
        # assert len(states) == 3
        h_tm1 = states[0]
        t = states[1]
        p_tm1 = states[2]
        
        x_t = K.dot(x, self.xh) + self.b

        p = x_t + K.dot(h_tm1, self.hh * self.mask)

        p_t = K.switch(K.equal(t[0] % self.period, 0), p, p_tm1)
        
        h = self.activation(p_t)

        return h, [h, t+1, p_t]
Exemple #16
0
    def step(self, x, states):
        prev_output = states[0]
        time_step = states[1]
        B_U = states[2]
        B_W = states[3]
        period = states[4]

        if self.consume_less == 'cpu':
            h = x
        else:
            h = K.dot(x * B_W, self.W) + self.b

        output = self.activation(h + K.dot(prev_output * B_U, self.U))
        output = K.switch(K.equal(time_step % period, 0.), output, prev_output)
        return output, [output, time_step+1]
Exemple #17
0
 def basic_block(self, z, nb_filter, column, reset_gates=True):
     if reset_gates:
         self.flush_gates(column)
     fz = self.fc_block(z, nb_filter)
     if column >= 1:
         fc1 = self.basic_block(z, nb_filter, column-1, False)
         fc2 = self.basic_block(fc1, nb_filter, column-1, False)
         M1 = merge([fz,fc2], mode='ave')
         M1 = Activation("relu")(M1)
         gate = K.variable(1, dtype="uint8")
         self.gates[column].append(gate)
         return Lambda(lambda outputs: K.switch(gate, outputs[0], outputs[1]),
                       output_shape= lambda x: x[0])([fz, M1])
     else:
         return fz
def _shortcut(input, residual):
    stride_width = input._keras_shape[2] / residual._keras_shape[2]
    stride_height = input._keras_shape[3] / residual._keras_shape[3]
    equal_channels = residual._keras_shape[1] == input._keras_shape[1]

    shortcut = input
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        shortcut = Convolution2D(nb_filter=residual._keras_shape[1], nb_row=1, nb_col=1,
                                 subsample=(stride_width, stride_height),
                                 init="he_normal", border_mode="valid", W_regularizer=l2(weight_decay))(input)
        shortcut = Activation("relu")(shortcut)

    M1 = merge([shortcut, residual], mode="sum")
    M1 = Activation("relu")(M1)
    
    gate = K.variable(0.0, dtype="uint8")
    decay_rate = 1
    name = 'residual_'+str(len(gates)+1)
    gates[name]=[decay_rate, gate]
    return Lambda(lambda outputs: K.switch(gate, outputs[0], outputs[1]),
                  output_shape= lambda x: x[0], name=name)([shortcut, M1])
Exemple #19
0
        def _dream_step(x, states):
            # input + states
            assert len(states) == 2*self.depth + 1
            x = states[-1]
            x = K.switch(K.equal(x, K.max(x, axis=-1,
                                          keepdims=True)), 1., 0.)
            states = states[:-1]

            h = []
            for i, (h_tm1, c_tm1) in enumerate(zip(states[:-1:2], states[1::2])):
                x, new_states = self.lstms[i].step(x, [h_tm1, c_tm1])
                h.extend(new_states)

            if self.readout:
                h += [self.readout_layer(h[-2])]
                final = h[-1]
            else:
                h += [h[-2]]
                final = h[-2]

            return final, h
Exemple #20
0
def get_model():
    model = Sequential()

    model.add(Lambda(
        lambda x: K.switch(K.T.le(x, 0), 0, K.T.log2(x)),
        input_shape=(nrows, ncols)
    ))

    model.add(Reshape((1, nrows, ncols)))

    model.add(Convolution2D(32, 3, 3, border_mode='same'))
    model.add(Activation("relu"))

    model.add(Convolution2D(32, 3, 3, border_mode='same'))
    model.add(Activation("relu"))

    model.add(Convolution2D(32, 3, 3, border_mode='same'))
    model.add(Activation("relu"))

    model.add(Flatten())

    # model.add(Dense(500))
    # model.add(BatchNormalization())
    # model.add(Activation("relu"))
    # model.add(Dropout(0.5))

    model.add(Dense(500))
    # model.add(BatchNormalization())
    model.add(Activation("relu"))
    # model.add(Dropout(0.5))

    model.add(Dense(output_dim=4))
    model.add(Activation("softmax"))

    model.compile(loss='mse', optimizer="adam")
    return model
def clip_norm(g, c, n):
    if c > 0:
        if K.backend() == 'tensorflow':
            import tensorflow as tf
            import copy
            condition = n >= c
            then_expression = tf.scalar_mul(c / n, g)
            else_expression = g

            if hasattr(then_expression, 'get_shape'):
                g_shape = copy.copy(then_expression.get_shape())
            elif hasattr(then_expression, 'dense_shape'):
                g_shape = copy.copy(then_expression.dense_shape)
            if condition.dtype != tf.bool:
                condition = tf.cast(condition, 'bool')
            g = K.tensorflow_backend.control_flow_ops.cond(
                condition, lambda: then_expression, lambda: else_expression)
            if hasattr(then_expression, 'get_shape'):
                g.set_shape(g_shape)
            elif hasattr(then_expression, 'dense_shape'):
                g._dense_shape = g_shape
        else:
            g = K.switch(n >= c, g * c / n, g)
    return g
Exemple #22
0
    def get_updates(self, params, constraints, loss):
        grads = self.get_gradients(loss, params)

        self.updates = [K.update_add(self.iterations, 1)]
        t = self.iterations + 1

        loss_prev = K.variable(0)
        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]

        ch_fact_lbound = K.switch(K.greater(loss, loss_prev), 1+self.thl, 1/(1+self.thu))
        ch_fact_ubound = K.switch(K.greater(loss, loss_prev), 1+self.thu, 1/(1+self.thl))
        loss_ch_fact = loss / loss_prev
        loss_ch_fact = K.switch(K.lesser(loss_ch_fact, ch_fact_lbound), ch_fact_lbound, loss_ch_fact)
        loss_ch_fact = K.switch(K.greater(loss_ch_fact, ch_fact_ubound), ch_fact_ubound, loss_ch_fact)
        loss_hat = K.switch(K.greater(t, 1), loss_prev * loss_ch_fact, loss)

        d_den = K.switch(K.greater(loss_hat, loss_prev), loss_prev, loss_hat)
        d_t = (self.beta_3 * self.d) + (1. - self.beta_3) * K.abs((loss_hat - loss_prev) / d_den)
        d_t = K.switch(K.greater(t, 1), d_t, 1.)
        self.updates.append(K.update(self.d, d_t))

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            mhat_t = m_t / (1. - K.pow(self.beta_1, t))
            self.updates.append(K.update(m, m_t))

            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            vhat_t = v_t / (1. - K.pow(self.beta_2, t))
            self.updates.append(K.update(v, v_t))

            p_t = p - (self.lr / (1. + (self.iterations * self.decay))) * mhat_t / ((K.sqrt(vhat_t) * d_t) + self.epsilon)
            self.updates.append(K.update(p, p_t))

        self.updates.append(K.update(loss_prev, loss_hat))
        return self.updates
def step(x, alpha=0):
    return K.switch(alpha, 0, 1)
Exemple #24
0
def _mask_loss(y_true, y_pred, y_mask, element_wise_loss):
    l = K.switch(y_mask, element_wise_loss(y_true, y_pred),
                 K.zeros_like(y_mask, dtype=K.floatx()))
    return K.sum(l) / (K.cast(K.sum(y_mask), dtype='float32') + K.epsilon())
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            beta2_t = self.beta_2 ** t
            N_sma_max = 2 / (1 - self.beta_2) - 1
            N_sma = N_sma_max - 2 * t * beta2_t / (1 - beta2_t)

            # apply weight decay
            if self.weight_decay != 0.:
                p_wd = p - self.weight_decay * lr * p
            else:
                p_wd = None

            if p_wd is None:
                p_ = p
            else:
                p_ = p_wd

            def gt_path():
                step_size = lr * K.sqrt(
                    (1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max /
                    (N_sma_max - 2)) / (1 - self.beta_1 ** t)

                denom = K.sqrt(v_t) + self.epsilon
                p_t = p_ - step_size * (m_t / denom)

                return p_t

            def lt_path():
                step_size = lr / (1 - self.beta_1 ** t)
                p_t = p_ - step_size * m_t

                return p_t

            p_t = K.switch(N_sma > 5, gt_path, lt_path)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #26
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
Exemple #27
0
def _wta(X):
    M = K.max(X, axis=-1, keepdims=True)
    R = K.switch(K.equal(X, M), X, 0.)
    return R
Exemple #28
0
    def GRU_merge(self, self_act, a, b, act):
        lower_a, upper_a = a.get_lu()
        lower_b, upper_b = b.get_lu()
        fa_lower, fa_upper = lower_a, upper_a
        fb_lower, fb_upper = act(lower_b), act(upper_b)
        lower_x, upper_x = self.get_lu()
        fx_lower, fx_upper = self_act(lower_x), self_act(upper_x)
        partial_fx_lower = tf.gradients(fx_lower, lower_x)[0]
        partial_fx_upper = tf.gradients(fx_upper, upper_x)[0]

        def lower_a_greater_zero():
            uz_x_Phi = K.minimum(partial_fx_upper * fa_upper,
                                 (fx_upper - fx_lower) * fa_upper /
                                 (upper_x - lower_x))
            ax_right_upper = fx_upper * fa_upper
            ax_left_upper = uz_x_Phi * (lower_x - upper_x) + ax_right_upper
            lz_x_Phi = K.minimum(partial_fx_lower * fa_lower,
                                 (fx_lower - fx_upper) * fa_lower /
                                 (lower_x - upper_x))
            ax_left_lower = fx_lower * fa_lower
            ax_right_lower = lz_x_Phi * (upper_x - lower_x) + ax_left_lower
            return [
                ax_left_lower, ax_left_upper, ax_right_lower, ax_right_upper
            ]

        def lower_b_greater_zero():
            uz_x_Phi = K.maximum(-partial_fx_lower * fb_upper,
                                 (-fx_upper + fx_lower) * fb_upper /
                                 (upper_x - lower_x))
            bx_left_upper = (1 - fx_lower) * fb_upper
            bx_right_upper = uz_x_Phi * (upper_x - lower_x) + bx_left_upper
            lz_x_Phi = K.maximum(-partial_fx_upper * fb_lower,
                                 (-fx_lower + fx_upper) * fb_lower /
                                 (lower_x - upper_x))
            bx_right_lower = (1 - fx_upper) * fb_lower
            bx_left_lower = lz_x_Phi * (lower_x - upper_x) + bx_right_lower
            return [
                bx_left_lower, bx_left_upper, bx_right_lower, bx_right_upper
            ]

        def upper_a_less_zero():
            uz_x_Phi = K.maximum(partial_fx_lower * fa_upper,
                                 (fx_lower - fx_upper) * fa_upper /
                                 (lower_x - upper_x))
            ax_left_upper = fx_lower * fa_upper
            ax_right_upper = uz_x_Phi * (upper_x - lower_x) + ax_left_upper
            lz_x_Phi = K.maximum(partial_fx_upper * fa_lower,
                                 (fx_upper - fx_lower) * fa_lower /
                                 (upper_x - lower_x))
            ax_right_lower = fx_upper * fa_lower
            ax_left_lower = lz_x_Phi * (lower_x - upper_x) + ax_right_lower
            return [
                ax_left_lower, ax_left_upper, ax_right_lower, ax_right_upper
            ]

        def upper_b_less_zero():
            uz_x_Phi = K.minimum(-partial_fx_upper * fb_upper,
                                 (-fx_upper + fx_lower) * fb_upper /
                                 (upper_x - lower_x))
            bx_right_upper = (1 - fx_upper) * fb_upper
            bx_left_upper = uz_x_Phi * (lower_x - upper_x) + bx_right_upper
            lz_x_Phi = K.minimum(-partial_fx_lower * fb_lower,
                                 (-fx_lower + fx_upper) * fb_lower /
                                 (lower_x - upper_x))
            bx_left_lower = (1 - fx_lower) * fb_lower
            bx_right_lower = lz_x_Phi * (upper_x - lower_x) + bx_left_lower
            return [
                bx_left_lower, bx_left_upper, bx_right_lower, bx_right_upper
            ]

        def otherwise_a():
            uz_x_Phi = K.minimum(partial_fx_upper * fa_upper,
                                 (fx_upper - fx_lower) * fa_upper /
                                 (upper_x - lower_x))
            ax_right_upper = fx_upper * fa_upper
            ax_left_upper = uz_x_Phi * (lower_x - upper_x) + ax_right_upper
            lz_x_Phi = K.maximum(partial_fx_upper * fa_lower,
                                 (fx_upper - fx_lower) * fa_lower /
                                 (upper_x - lower_x))
            ax_right_lower = fx_upper * fa_lower
            ax_left_lower = lz_x_Phi * (lower_x - upper_x) + ax_right_lower
            return [
                ax_left_lower, ax_left_upper, ax_right_lower, ax_right_upper
            ]

        def otherwise_b():
            uz_x_Phi = K.maximum(-partial_fx_lower * fb_upper,
                                 (-fx_upper + fx_lower) * fb_upper /
                                 (upper_x - lower_x))
            bx_left_upper = (1 - fx_lower) * fb_upper
            bx_right_upper = uz_x_Phi * (upper_x - lower_x) + bx_left_upper
            lz_x_Phi = K.minimum(-partial_fx_lower * fb_lower,
                                 (-fx_lower + fx_upper) * fb_lower /
                                 (lower_x - upper_x))
            bx_left_lower = (1 - fx_lower) * fb_lower
            bx_right_lower = lz_x_Phi * (upper_x - lower_x) + bx_left_lower
            return [
                bx_left_lower, bx_left_upper, bx_right_lower, bx_right_upper
            ]

        a_anchors = otherwise_a()
        anchors_lower_a_greater_zero = lower_a_greater_zero()
        anchors_upper_a_less_zero = upper_a_less_zero()
        for i in range(4):
            a_anchors[i] = K.switch(K.greater(lower_a, K.zeros_like(lower_a)),
                                    anchors_lower_a_greater_zero[i],
                                    a_anchors[i])
            a_anchors[i] = K.switch(K.less(upper_a, K.zeros_like(upper_a)),
                                    anchors_upper_a_less_zero[i], a_anchors[i])

        b_anchors = otherwise_b()
        anchors_lower_b_greater_zero = lower_b_greater_zero()
        anchors_upper_b_less_zero = upper_b_less_zero()
        for i in range(4):
            b_anchors[i] = K.switch(K.greater(lower_b, K.zeros_like(lower_b)),
                                    anchors_lower_b_greater_zero[i],
                                    b_anchors[i])
            b_anchors[i] = K.switch(K.less(upper_b, K.zeros_like(upper_b)),
                                    anchors_upper_b_less_zero[i], b_anchors[i])

        for i in range(4):
            a_anchors[i] += b_anchors[i]
        lower_z = K.minimum(a_anchors[0], a_anchors[2])
        upper_z = K.maximum(a_anchors[1], a_anchors[3])
        return AI((lower_z + upper_z) / 2, (upper_z - lower_z) / 2, None, True)
Exemple #29
0
	def call(self, x, mask=None):
		R = T.reshape(x,(T.shape(x)[0],T.shape(x)[1]/self.OneOnX,self.OneOnX))
		M = K.max(R, axis=(2), keepdims=True)
		R = K.switch(K.equal(R, M), R, 0.)
		R = T.reshape(R,(T.shape(x)[0],T.shape(x)[1]))
		return R
Exemple #30
0
    def call(self, inputs, states, training=None):
        """We need to reimplmenet `call` entirely rather than reusing that
        from `GRUCell` since there are lots of differences.

        Args:
            inputs: One tensor which is stacked by 3 inputs (x, m, s)
                x and m are of shape (n_batch * input_dim).
                s is of shape (n_batch, 1).
            states: states and other values from the previous step.
                (h_tm1, x_keep_tm1, s_prev_tm1)
        """
        # Get inputs and states
        input_x = inputs[:, :self.true_input_dim]  # inputs x, m, s
        input_m = inputs[:, self.true_input_dim:-1]
        input_s = inputs[:, -1:]
        # Need to add broadcast for time_stamp if using theano backend.
        if K.backend() == 'theano':
            input_s = K.pattern_broadcast(input_s, [False, True])
        h_tm1, x_keep_tm1, s_prev_tm1 = states
        # previous memory ([n_batch * self.units])
        # previous input x ([n_batch * input_dim])
        # and the subtraction term (of delta_t^d in Equation (2))
        # ([n_batch * input_dim])
        input_1m = K.cast_to_floatx(1.) - input_m
        input_d = input_s - s_prev_tm1

        # Get dropout
        if 0. < self.dropout < 1. and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(K.ones_like(input_x),
                                                        self.dropout,
                                                        training=training,
                                                        count=3)
        if (0. < self.recurrent_dropout < 1.
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                K.ones_like(h_tm1),
                self.recurrent_dropout,
                training=training,
                count=3)
        dp_mask = self._dropout_mask
        rec_dp_mask = self._recurrent_dropout_mask

        if self.feed_masking:
            if 0. < self.dropout < 1. and self._masking_dropout_mask is None:
                self._masking_dropout_mask = _generate_dropout_mask(
                    K.ones_like(input_m),
                    self.dropout,
                    training=training,
                    count=3)
            m_dp_mask = self._masking_dropout_mask

        # Compute decay if any
        if self.input_decay is not None:
            gamma_di = input_d * self.input_decay_kernel
            if self.use_decay_bias:
                gamma_di = K.bias_add(gamma_di, self.input_decay_bias)
            gamma_di = self.input_decay(gamma_di)
        if self.hidden_decay is not None:
            gamma_dh = K.dot(input_d, self.hidden_decay_kernel)
            if self.use_decay_bias:
                gamma_dh = K.bias_add(gamma_dh, self.hidden_decay_bias)
            gamma_dh = self.hidden_decay(gamma_dh)
        if self.feed_masking and self.masking_decay is not None:
            gamma_dm = input_d * self.masking_decay_kernel
            if self.use_decay_bias:
                gamma_dm = K.bias_add(gamma_dm, self.masking_decay_bias)
            gamma_dm = self.masking_decay(gamma_dm)

        # Get the imputed or decayed input if needed
        # and `x_keep_t` for the next time step

        if self.input_decay is not None:
            x_keep_t = K.switch(input_m, input_x, x_keep_tm1)
            x_t = K.switch(input_m, input_x, gamma_di * x_keep_t)
        elif self.x_imputation == 'forward':
            x_t = K.switch(input_m, input_x, x_keep_tm1)
            x_keep_t = x_t
        elif self.x_imputation == 'zero':
            x_t = K.switch(input_m, input_x, K.zeros_like(input_x))
            x_keep_t = x_t
        elif self.x_imputation == 'raw':
            x_t = input_x
            x_keep_t = x_t
        else:
            raise ValueError('No input decay or invalid x_imputation '
                             '{}.'.format(self.x_imputation))

        # Get decayed hidden if needed
        if self.hidden_decay is not None:
            h_tm1d = gamma_dh * h_tm1
        else:
            h_tm1d = h_tm1

        # Get decayed masking if needed
        if self.feed_masking:
            m_t = input_1m
            if self.masking_decay is not None:
                m_t = gamma_dm * m_t

        # Apply the dropout
        if 0. < self.dropout < 1.:
            x_z, x_r, x_h = x_t * dp_mask[0], x_t * dp_mask[1], x_t * dp_mask[2]
            if self.feed_masking:
                m_z, m_r, m_h = (m_t * m_dp_mask[0], m_t * m_dp_mask[1],
                                 m_t * m_dp_mask[2])
        else:
            x_z, x_r, x_h = x_t, x_t, x_t
            if self.feed_masking:
                m_z, m_r, m_h = m_t, m_t, m_t
        if 0. < self.recurrent_dropout < 1.:
            h_tm1_z, h_tm1_r = (
                h_tm1d * rec_dp_mask[0],
                h_tm1d * rec_dp_mask[1],
            )
        else:
            h_tm1_z, h_tm1_r = h_tm1d, h_tm1d

        # Get z_t, r_t, hh_t
        z_t = K.dot(x_z, self.kernel_z) + K.dot(h_tm1_z,
                                                self.recurrent_kernel_z)
        r_t = K.dot(x_r, self.kernel_r) + K.dot(h_tm1_r,
                                                self.recurrent_kernel_r)
        hh_t = K.dot(x_h, self.kernel_h)
        if self.feed_masking:
            z_t += K.dot(m_z, self.masking_kernel_z)
            r_t += K.dot(m_r, self.masking_kernel_r)
            hh_t += K.dot(m_h, self.masking_kernel_h)
        if self.use_bias:
            z_t = K.bias_add(z_t, self.input_bias_z)
            r_t = K.bias_add(r_t, self.input_bias_r)
            hh_t = K.bias_add(hh_t, self.input_bias_h)
        z_t = self.recurrent_activation(z_t)
        r_t = self.recurrent_activation(r_t)

        if 0. < self.recurrent_dropout < 1.:
            h_tm1_h = r_t * h_tm1d * rec_dp_mask[2]
        else:
            h_tm1_h = r_t * h_tm1d
        hh_t = self.activation(hh_t + K.dot(h_tm1_h, self.recurrent_kernel_h))

        # get h_t
        h_t = z_t * h_tm1 + (1 - z_t) * hh_t
        if 0. < self.dropout + self.recurrent_dropout:
            if training is None:
                h_t._uses_learning_phase = True

        # get s_prev_t
        s_prev_t = K.switch(input_m, K.tile(input_s, [1, self.state_size[-1]]),
                            s_prev_tm1)
        return h_t, [h_t, x_keep_t, s_prev_t]
def masked_loss(y_true, y_pred):
    y_mask = K.cast(K.any(y_true, axis=-1), "float32")
    loss = K.switch(y_mask, K.sparse_categorical_crossentropy(y_true, y_pred),
                    K.zeros_like(y_mask, dtype=K.floatx()))
    return K.sum(loss) / (K.cast(K.sum(y_mask), dtype='float32') + K.epsilon())
Exemple #32
0
def yolo_loss(args,
              anchors,
              num_classes,
              ignore_thresh=.5,
              print_loss=False,
              normalize=True):
    # 一共有三个特征层
    num_layers = len(anchors) // 3

    #---------------------------------------------------------------------------------------------------#
    #   将预测结果和实际ground truth分开,args是[*model_body.output, *y_true]
    #   y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #   yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #---------------------------------------------------------------------------------------------------#
    y_true = args[num_layers:]
    yolo_outputs = args[:num_layers]

    #-----------------------------------------------------------#
    #   13x13的特征层对应的anchor是[116,90],[156,198],[373,326]
    #   26x26的特征层对应的anchor是[30,61],[62,45],[59,119]
    #   52x52的特征层对应的anchor是[10,13],[16,30],[33,23]
    #-----------------------------------------------------------#
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]

    # 得到input_shpae为416,416
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))

    #-----------------------------------------------------------#
    #   得到网格的shape为[13,13]; [26,26]; [52,52]
    #-----------------------------------------------------------#
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    num_pos = 0
    #-----------------------------------------------------------#
    #   取出每一张图片
    #   m的值就是batch_size
    #-----------------------------------------------------------#
    m = K.shape(yolo_outputs[0])[0]
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    #---------------------------------------------------------------------------------------------------#
    #   y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #   yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #---------------------------------------------------------------------------------------------------#
    for l in range(num_layers):
        #-----------------------------------------------------------#
        #   以第一个特征层(m,13,13,3,85)为例子
        #   取出该特征层中存在目标的点的位置。(m,13,13,3,1)
        #-----------------------------------------------------------#
        object_mask = y_true[l][..., 4:5]
        #-----------------------------------------------------------#
        #   取出其对应的种类(m,13,13,3,80)
        #-----------------------------------------------------------#
        true_class_probs = y_true[l][..., 5:]

        #-----------------------------------------------------------#
        #   将yolo_outputs的特征层输出进行处理、获得四个返回值
        #   其中:
        #   grid        (13,13,1,2) 网格坐标
        #   raw_pred    (m,13,13,3,85) 尚未处理的预测结果
        #   pred_xy     (m,13,13,3,2) 解码后的中心坐标
        #   pred_wh     (m,13,13,3,2) 解码后的宽高坐标
        #-----------------------------------------------------------#
        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)

        #-----------------------------------------------------------#
        #   pred_box是解码后的预测的box的位置
        #   (m,13,13,3,4)
        #-----------------------------------------------------------#
        pred_box = K.concatenate([pred_xy, pred_wh])

        #-----------------------------------------------------------#
        #   找到负样本群组,第一步是创建一个数组,[]
        #-----------------------------------------------------------#
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        #-----------------------------------------------------------#
        #   对每一张图片计算ignore_mask
        #-----------------------------------------------------------#
        def loop_body(b, ignore_mask):
            #-----------------------------------------------------------#
            #   取出n个真实框:n,4
            #-----------------------------------------------------------#
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            #-----------------------------------------------------------#
            #   计算预测框与真实框的iou
            #   pred_box    13,13,3,4 预测框的坐标
            #   true_box    n,4 真实框的坐标
            #   iou         13,13,3,n 预测框和真实框的iou
            #-----------------------------------------------------------#
            iou = box_iou(pred_box[b], true_box)

            #-----------------------------------------------------------#
            #   best_iou    13,13,3 每个特征点与真实框的最大重合程度
            #-----------------------------------------------------------#
            best_iou = K.max(iou, axis=-1)

            #-----------------------------------------------------------#
            #   判断预测框和真实框的最大iou小于ignore_thresh
            #   则认为该预测框没有与之对应的真实框
            #   该操作的目的是:
            #   忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了
            #   不适合当作负样本,所以忽略掉。
            #-----------------------------------------------------------#
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        #-----------------------------------------------------------#
        #   在这个地方进行一个循环、循环是对每一张图片进行的
        #-----------------------------------------------------------#
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])

        #-----------------------------------------------------------#
        #   ignore_mask用于提取出作为负样本的特征点
        #   (m,13,13,3)
        #-----------------------------------------------------------#
        ignore_mask = ignore_mask.stack()
        #   (m,13,13,3,1)
        ignore_mask = K.expand_dims(ignore_mask, -1)

        #-----------------------------------------------------------#
        #   将真实框进行编码,使其格式与预测的相同,后面用于计算loss
        #-----------------------------------------------------------#
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][:] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])

        #-----------------------------------------------------------#
        #   object_mask如果真实存在目标则保存其wh值
        #   switch接口,就是一个if/else条件判断语句
        #-----------------------------------------------------------#
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))
        #-----------------------------------------------------------#
        #   真实框越大,比重越小,小框的比重更大。
        #-----------------------------------------------------------#
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        #-----------------------------------------------------------#
        #   利用binary_crossentropy计算中心点偏移情况,效果更好
        #-----------------------------------------------------------#
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        #-----------------------------------------------------------#
        #   wh_loss用于计算宽高损失
        #-----------------------------------------------------------#
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])

        #------------------------------------------------------------------------------#
        #   如果该位置本来有框,那么计算1与置信度的交叉熵
        #   如果该位置本来没有框,那么计算0与置信度的交叉熵
        #   在这其中会忽略一部分样本,这些被忽略的样本满足条件best_iou<ignore_thresh
        #   该操作的目的是:
        #   忽略预测结果与真实框非常对应特征点,因为这些框已经比较准了
        #   不适合当作负样本,所以忽略掉。
        #------------------------------------------------------------------------------#
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) + \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask

        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        #-----------------------------------------------------------#
        #   将所有损失求和
        #-----------------------------------------------------------#
        xy_loss = K.sum(xy_loss)
        wh_loss = K.sum(wh_loss)
        confidence_loss = K.sum(confidence_loss)
        class_loss = K.sum(class_loss)
        #-----------------------------------------------------------#
        #   计算正样本数量
        #-----------------------------------------------------------#
        num_pos += tf.maximum(K.sum(K.cast(object_mask, tf.float32)), 1)
        loss += xy_loss + wh_loss + confidence_loss + class_loss

        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                tf.shape(ignore_mask)
            ],
                            summarize=100,
                            message='loss: ')

    if normalize:
        loss = loss / num_pos
    else:
        loss = loss / mf
    return loss
def compute_loss(yolo_outputs,
                 y_true,
                 anchors,
                 num_classes,
                 ignore_thresh=ignore_thresh,
                 print_loss=False):
    # yolo_outputs = YOLO_outputs
    # y_true = Y_true  # output of preprocess_true_boxes [3, None, 13, 13, 3, 2]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]

    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(3)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(3):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_IoU(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
                          (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5],
                                                                    from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf

        loss += xy_loss + wh_loss + confidence_loss + class_loss
        with tf.name_scope("losses"):
            tf.summary.scalar("coordinate_loss", xy_loss)
            tf.summary.scalar("dimensions_loss", wh_loss)
            tf.summary.scalar("confidence_loss", confidence_loss)
            tf.summary.scalar("class_loss", class_loss)
            tf.summary.scalar("total_loss", loss)
    return loss
    def build_object_untargeted_loss(self):
        yolo_outputs = self.model.output
        y_true = self.encoded_labels
        anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
        input_shape = K.cast(
            K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
        grid_shapes = [
            K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
            for l in range(3)
        ]
        loss = 0
        m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
        mf = K.cast(m, K.dtype(yolo_outputs[0]))

        for l in range(3):
            object_mask = y_true[l][..., 4:5]
            true_class_probs = y_true[l][..., 5:]

            grid, raw_pred, pred_xy, pred_wh = yolo_head(
                yolo_outputs[l],
                self.anchors[anchor_mask[l]],
                self.num_classes,
                input_shape,
                calc_loss=True)
            pred_box = K.concatenate([pred_xy, pred_wh])

            # Darknet raw box to calculate loss.
            raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
            raw_true_wh = K.log(y_true[l][..., 2:4] /
                                self.anchors[anchor_mask[l]] *
                                input_shape[::-1])
            raw_true_wh = K.switch(
                object_mask, raw_true_wh,
                K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
            box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

            # Find ignore mask, iterate over each of batch.
            ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                         size=1,
                                         dynamic_size=True)
            object_mask_bool = K.cast(object_mask, 'bool')

            def loop_body(b, ignore_mask):
                true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                           object_mask_bool[b, ..., 0])
                iou = box_iou(pred_box[b], true_box)
                best_iou = K.max(iou, axis=-1)
                ignore_mask = ignore_mask.write(
                    b, K.cast(best_iou < 0.45, K.dtype(true_box)))
                return b + 1, ignore_mask

            _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body,
                                           [0, ignore_mask])
            ignore_mask = ignore_mask.stack()
            ignore_mask = K.expand_dims(ignore_mask, -1)

            # K.binary_cross-entropy is helpful to avoid exp overflow.
            xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
                raw_true_xy, raw_pred[..., 0:2], from_logits=True)
            wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
                raw_true_wh - raw_pred[..., 2:4])
            confidence_loss = 0
            confidence_loss += object_mask * K.binary_crossentropy(
                object_mask, raw_pred[..., 4:5], from_logits=True)
            confidence_loss += (1 - object_mask) * K.binary_crossentropy(
                object_mask, raw_pred[...,
                                      4:5], from_logits=True) * ignore_mask
            class_loss = object_mask * K.binary_crossentropy(
                true_class_probs, raw_pred[..., 5:], from_logits=True)

            xy_loss = K.sum(xy_loss) / mf
            wh_loss = K.sum(wh_loss) / mf
            confidence_loss = K.sum(confidence_loss) / mf
            class_loss = K.sum(class_loss) / mf
            loss += xy_loss + wh_loss + confidence_loss + class_loss
        return -loss
Exemple #35
0
 def call(self, x, mask=None):
     from keras import backend as K
     j = K.softplus((x - 1) / self.sigma) * self.sigma
     v = self.amplitude / (self.tau_ref + self.tau_rc*K.log(1 + 1/j))
     return K.switch(j > 0, v, 0)
Exemple #36
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor. This is wrapped as a layer Output 
    For convenience using Keras. 

    Parameters
    ----------
    yolo_outputs (args[:num_layers]): list of tensor, the output of yolo_body or tiny_yolo_body
    y_true (args[numlayers:]): list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the IoU threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,), the summed losses for bounding boxes, objectness, and class prob

    '''
    num_layers = len(anchors) // 3  # default setting

    # separate out the args
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]

    # save some shapes for convenience
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    m = K.shape(yolo_outputs[0])[0]  # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    # init the loss
    loss = 0

    for l in range(num_layers):
        # whether an object is in this anchor
        object_mask = y_true[l][..., 4:5]
        # 1's and 0's of the true class presence in anchor
        true_class_probs = y_true[l][..., 5:]

        # parse out the predictions
        # raw_pred is everything, before applying sigmoid to xy and wh
        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        # Make into one tensor, for convenience
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        # Mask is the conditional for ignoring objects that are not
        # within the specified IoU
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        # Okay, this is a weird implementation, but here goes for an explanation:
        #  YOLO xy outputs need to pass through a sigmoid before interpretation, but that isn't yet done here
        #  when from_logits=true, the log odds is not calculated so this becomes:
        #  xy_pred *(1 - xy_real) + log(1 + exp(-abs(xy_pred)))
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)

        # both are already "log" scaled, so just take the square difference
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        # this is straigh forard, right out of the YOLO paper
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask

        # class loss based on if the
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr

        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        if self.initial_total_steps > 0:
            warmup_steps = self.total_steps * self.warmup_proportion
            decay_steps = self.total_steps - warmup_steps
            lr = K.switch(
                t <= warmup_steps,
                lr * (t / warmup_steps),
                lr * (1.0 - K.minimum(t, decay_steps) / decay_steps),
            )

        ms = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='m_' + str(i))
            for (i, p) in enumerate(params)
        ]
        vs = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p), name='v_' + str(i))
            for (i, p) in enumerate(params)
        ]

        if self.amsgrad:
            vhats = [
                K.zeros(K.int_shape(p),
                        dtype=K.dtype(p),
                        name='vhat_' + str(i)) for (i, p) in enumerate(params)
            ]
        else:
            vhats = [
                K.zeros(1, name='vhat_' + str(i)) for i in range(len(params))
            ]

        self.weights = [self.iterations] + ms + vs + vhats

        beta_1_t = K.pow(self.beta_1, t)
        beta_2_t = K.pow(self.beta_2, t)

        sma_inf = 2.0 / (1.0 - self.beta_2) - 1.0
        sma_t = sma_inf - 2.0 * t * beta_2_t / (1.0 - beta_2_t)

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            m_corr_t = m_t / (1.0 - beta_1_t)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                v_corr_t = K.sqrt(vhat_t / (1.0 - beta_2_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                v_corr_t = K.sqrt(v_t / (1.0 - beta_2_t) + self.epsilon)

            r_t = K.sqrt((sma_t - 4.0) / (sma_inf - 4.0) * (sma_t - 2.0) /
                         (sma_inf - 2.0) * sma_inf / sma_t)

            p_t = K.switch(sma_t > 5, r_t * m_corr_t / v_corr_t, m_corr_t)

            if self.initial_weight_decay > 0:
                p_t += self.weight_decay * p

            p_t = p - lr * p_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
    num_layers = len(anchors) // 3  # 得到先验框的个数整除3
    # 将args的值分割出来
    y_true = args[num_layers:]  # y真实值
    yolo_outputs = args[:num_layers]  # 预测的三个特征
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]  # 先验框
    input_shape = (416, 416)  # 输入形状
    # 得到13x13,26,26,52,52网格
    grid_shape = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0  # 用于存放最后返回的loss值
    m = K.shape(yolo_outputs[0])[0]  # 取出图片数量
    mf = K.cast(m, K.dtype(yolo_outputs[0]))  # 改变变量的类型

    for l in range(num_layers):
        object_mask = y_true[1][..., 4:5]  # 图片中是否有物体用01表示
        true_class_probs = y_true[1][..., 5:]  # 图片的类

        grid, raw_pred, pred_xy, pred_wh = yolo_head(  # 对输入的预测特征进行解码
            yolo_outputs[1], anchors[anchor_mask[l]], num_classes, input_shape)

        pred_box = K.concatenate([pred_xy, pred_wh])  # xy_wh进行拼接

        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')  # 转bool

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[1][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])  # 正确的先验框
            iou = box_iou(pred_box[b], true_box)  # 预测和真实进行求面积交集和并集的差
            best_iou = K.max(iou, axis=-1)  #
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh,
                          K.dtype(true_box)))  # 如果iou小于50%,就将图片和先验框写入
            return b + 1, ignore_mask

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[1][b, ..., :4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(iou < ignore_thresh),
                                            K.dtype(true_box))

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        raw_true_xy = y_true[1][..., :2] * grid_shape[1][:] - grid
        raw_true_wh = K.log(y_true[1][..., 2:4] / anchors[anchor_mask[1]] *
                            input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))

        box_loss_scale = 2 - y_true[1][..., 2:3] * y_true[l][..., 3:4]
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(
            object_mask, raw_pred[..., 4:5],
            from_logits=True) + (1 - object_mask) * K.binary_crossentropy(
                object_mask, raw_pred[4:5], from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss = xy_loss + wh_loss + confidence_loss + class_loss
    return loss
 def call(self, inputs):
     return K.switch(tf.constant(1), inputs[0], inputs[1])
Exemple #40
0
def bbox_ciou(boxes1, boxes2):
    '''
    计算ciou = iou - p2/c2 - av
    :param boxes1: (8, 13, 13, 3, 4)   pred_xywh
    :param boxes2: (8, 13, 13, 3, 4)   label_xywh
    :return:
    '''

    # 变成左上角坐标、右下角坐标
    boxes1_x0y0x1y1 = tf.concat([
        boxes1[..., :2] - boxes1[..., 2:] * 0.5,
        boxes1[..., :2] + boxes1[..., 2:] * 0.5
    ],
                                axis=-1)
    boxes2_x0y0x1y1 = tf.concat([
        boxes2[..., :2] - boxes2[..., 2:] * 0.5,
        boxes2[..., :2] + boxes2[..., 2:] * 0.5
    ],
                                axis=-1)
    '''
    逐个位置比较boxes1_x0y0x1y1[..., :2]和boxes1_x0y0x1y1[..., 2:],即逐个位置比较[x0, y0]和[x1, y1],小的留下。
    比如留下了[x0, y0]
    这一步是为了避免一开始w h 是负数,导致x0y0成了右下角坐标,x1y1成了左上角坐标。
    '''
    boxes1_x0y0x1y1 = tf.concat([
        tf.minimum(boxes1_x0y0x1y1[..., :2], boxes1_x0y0x1y1[..., 2:]),
        tf.maximum(boxes1_x0y0x1y1[..., :2], boxes1_x0y0x1y1[..., 2:])
    ],
                                axis=-1)
    boxes2_x0y0x1y1 = tf.concat([
        tf.minimum(boxes2_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., 2:]),
        tf.maximum(boxes2_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., 2:])
    ],
                                axis=-1)

    # 两个矩形的面积
    boxes1_area = (boxes1_x0y0x1y1[..., 2] - boxes1_x0y0x1y1[..., 0]) * (
        boxes1_x0y0x1y1[..., 3] - boxes1_x0y0x1y1[..., 1])
    boxes2_area = (boxes2_x0y0x1y1[..., 2] - boxes2_x0y0x1y1[..., 0]) * (
        boxes2_x0y0x1y1[..., 3] - boxes2_x0y0x1y1[..., 1])

    # 相交矩形的左上角坐标、右下角坐标,shape 都是 (8, 13, 13, 3, 2)
    left_up = tf.maximum(boxes1_x0y0x1y1[..., :2], boxes2_x0y0x1y1[..., :2])
    right_down = tf.minimum(boxes1_x0y0x1y1[..., 2:], boxes2_x0y0x1y1[..., 2:])

    # 相交矩形的面积inter_area。iou
    inter_section = tf.maximum(right_down - left_up, 0.0)
    inter_area = inter_section[..., 0] * inter_section[..., 1]
    union_area = boxes1_area + boxes2_area - inter_area
    iou = inter_area / union_area

    # 包围矩形的左上角坐标、右下角坐标,shape 都是 (8, 13, 13, 3, 2)
    enclose_left_up = tf.minimum(boxes1_x0y0x1y1[..., :2],
                                 boxes2_x0y0x1y1[..., :2])
    enclose_right_down = tf.maximum(boxes1_x0y0x1y1[..., 2:],
                                    boxes2_x0y0x1y1[..., 2:])

    # 包围矩形的对角线的平方
    enclose_wh = enclose_right_down - enclose_left_up
    enclose_c2 = K.pow(enclose_wh[..., 0], 2) + K.pow(enclose_wh[..., 1], 2)

    # 两矩形中心点距离的平方
    p2 = K.pow(boxes1[..., 0] - boxes2[..., 0], 2) + K.pow(
        boxes1[..., 1] - boxes2[..., 1], 2)

    # 增加av。分母boxes2[..., 3]可能为0,所以加上除0保护防止nan。
    atan1 = tf.atan(boxes1[..., 2] / boxes1[..., 3])
    temp_a = K.switch(boxes2[..., 3] > 0.0, boxes2[..., 3],
                      boxes2[..., 3] + 1.0)
    atan2 = tf.atan(boxes2[..., 2] / temp_a)
    v = 4.0 * K.pow(atan1 - atan2, 2) / (math.pi**2)
    a = v / (1 - iou + v)

    ciou = iou - 1.0 * p2 / enclose_c2 - 1.0 * a * v
    return ciou
    def step(self, a, states):
        r_tm1 = states[:self.nb_layers]
        c_tm1 = states[self.nb_layers:2*self.nb_layers]
        e_tm1 = states[2*self.nb_layers:3*self.nb_layers]

        if self.extrap_start_time is not None:
            t = states[-1]
            a = K.switch(t >= self.t_extrap, states[-2], a)  # if past self.extrap_start_time, the previous prediction will be treated as the actual

        c = []
        r = []
        e = []

        for l in reversed(range(self.nb_layers)):
            inputs = [r_tm1[l], e_tm1[l]]
            if l < self.nb_layers - 1:
                inputs.append(r_up)

            inputs = K.concatenate(inputs, axis=self.channel_axis)
            i = self.conv_layers['i'][l].call(inputs)
            f = self.conv_layers['f'][l].call(inputs)
            o = self.conv_layers['o'][l].call(inputs)
            _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs)
            _r = o * self.LSTM_activation(_c)
            c.insert(0, _c)
            r.insert(0, _r)

            if l > 0:
                r_up = self.upsample.call(_r)

        for l in range(self.nb_layers):
            ahat = self.conv_layers['ahat'][l].call(r[l])
            if l == 0:
                ahat = K.minimum(ahat, self.pixel_max)
                frame_prediction = ahat

            # compute errors
            e_up = self.error_activation(ahat - a)
            e_down = self.error_activation(a - ahat)

            e.append(K.concatenate((e_up, e_down), axis=self.channel_axis))

            if l < self.nb_layers - 1:
                a = self.conv_layers['a'][l].call(e[l])
                a = self.pool.call(a)  # target for next layer

        if self.output_mode == 'prediction':
            output = frame_prediction
    
         
        elif self.output_mode == 'all_R':
            #for l in range(3, self.nb_layers): #start at 1, to grab the last 3 layers
            for l in range(2, 4): #Grab last 2 layers
                layer_R = K.batch_flatten(r[l])
                output = layer_R if l == 2 else K.concatenate((output, layer_R), axis=-1)    
            
        else:
            for l in range(self.nb_layers):
                layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True)
                all_error = layer_error if l == 0 else K.concatenate((all_error, layer_error), axis=-1)
            if self.output_mode == 'error':
                output = all_error
            else:
                output = K.concatenate((K.batch_flatten(frame_prediction), all_error), axis=-1)

        states = r + c + e
        if self.extrap_start_time is not None:
            states += [frame_prediction, t + 1]
        return output, states
Exemple #42
0
def bool_match(y_true, y_pred):
    return K.switch(K.any(y_true - y_pred.round()), K.variable(0),
                    K.variable(1))
Exemple #43
0
def myMax(dist):
    dist1, dist2, sth = dist
    ma = K.max((dist1, dist2), axis=0)
    ma = K.dot(ma, ma)
    return K.switch(ma > sth, 1, 0)
Exemple #44
0
 def compute_loss(self, input, output, input_mask=None, output_mask=None):
     l = K.switch(input < self.low, K.abs(input - self.low), 0)
     h = K.switch(input > self.high, K.abs(input - self.high), 0)
     return K.in_train_phase(self.weight*K.mean(h + l), 0)
Exemple #45
0
def contrastive_accuracy(y_true, y_pred):
    y_class = K.switch(y_pred > 1, 1, 0)
    return K.mean(K.equal(y_true, y_class), axis=-1)
Exemple #46
0
 def get_output(self, train=False):
     X = self.get_input(train)
     return K.expand_dims(K.switch(K.sum(X, -1), 1, 0))
Exemple #47
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors) // 3  # default setting
    # y_pred
    yolo_outputs = args[:num_layers]
    # y_true
    y_true = args[num_layers:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]
    # input shape is obtained by multiplying 32 to the output grid of 1st detection layer
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    # grid shape is the index 1,2 location of the y_pred
    # (14,14), (28,28), (56,56)
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0
    # batch size
    m = K.shape(yolo_outputs[0])[0]
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        # confidence score for grid-cell with object =1, rest all are 0
        # index of the cell where an object is present
        object_mask = y_true[l][..., 4:5]
        # class probabilities at index 5:55
        true_class_probs = y_true[l][..., 5:]

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        # The xywh of the true_boxes need to converted wrt to the top-left corner of the grid-cell.
        # Original true-box are normalized wrt to the image size (448, 448)
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])
        # if else to remove -inf because of log operation on zero values in previous step
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))  # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        if TF_VERSION2:
            _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body,
                                           [0, ignore_mask])
        else:
            _, ignore_mask = K.control_flow_ops.while_loop(
                lambda b, *args: b < m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # K.binary_crossentropy is helpful to avoid exp overflow.
        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \
                          (1 - object_mask) * K.binary_crossentropy(object_mask, raw_pred[..., 4:5],
                                                                    from_logits=True) * ignore_mask
        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
Exemple #48
0
def clip_norm(g, c, n):
    if c > 0:
        g = K.switch(n >= c, g * c / n, g)
    return g
Exemple #49
0
def weighted_sum(first, second, sigma, first_threshold=-np.inf, second_threshold=np.inf):
    logit_probs = first * sigma + second * (1.0 - sigma)
    infty_tensor = kb.ones_like(logit_probs) * INFTY
    logit_probs = kb.switch(kb.greater(first, first_threshold), logit_probs, infty_tensor)
    logit_probs = kb.switch(kb.greater(second, second_threshold), logit_probs, infty_tensor)
    return logit_probs
Exemple #50
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
    '''Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(T, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''

    # YOLOv3 outputs are 3 numpy array of shape (batch_size, height, width, num_anchors * (5 + num_classes))
    yolo_outputs = args[:3]

    # Ground truth is a list of 3 numpy array of shape (batch_size, height, width, num_anchors, 5 + num_classes)
    y_true = args[3:]

    # Anchors size decrease during top-down upsampling path way
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]

    # Input shape is 32 times more than the first detection layer shape
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))

    # Shapes of each detection layer
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[layer])[1:3], K.dtype(y_true[0]))
        for layer in range(3)
    ]

    # Initialize loss
    xy_loss = 0
    wh_loss = 0
    confidence_loss = 0
    class_loss = 0
    loss = 0

    batch_size = K.shape(yolo_outputs[0])[0]

    for layer in range(3):

        # True objectness score/confidence (either 1 or 0) of each anchor-offset at each grid cell
        object_mask = y_true[layer][..., 4:5]

        # True one-hot encoded class probabilities of each ground truth box
        true_class_probs = y_true[layer][..., 5:]

        grid, y_pred, pred_xy, pred_wh, anchors_tensor = yolo_head(
            feats=yolo_outputs[layer],
            anchors=anchors[anchor_mask[layer]],
            num_classes=num_classes,
            input_shape=input_shape,
            calc_loss=True)

        pred_box = K.concatenate([pred_xy, pred_wh])

        true_xy = y_true[layer][..., :2] * grid_shapes[layer][::-1] - grid

        true_wh = K.log(y_true[layer][..., 2:4] * input_shape[::-1] /
                        anchors_tensor)

        # Avoid log(0) = -inf
        true_wh = K.switch(object_mask, true_wh, K.zeros_like(true_wh))

        box_loss_scale = 2 - y_true[layer][..., 2:3] * y_true[layer][..., 3:4]

        ignore_mask = tf.TensorArray(dtype=K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)

        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):

            true_box = tf.boolean_mask(tensor=y_true[layer][b, ..., 0:4],
                                       mask=object_mask_bool[b, ..., 0])

            iou = box_iou(pred_box[b], true_box)

            best_iou = K.max(iou, axis=-1)

            ignore_mask = ignore_mask.write(index=b,
                                            value=K.cast(
                                                best_iou < ignore_thresh,
                                                K.dtype(true_box)))

            return b + 1, ignore_mask

        _, ignore_mask = tf.while_loop(cond=lambda b, *args: b < batch_size,
                                       body=loop_body,
                                       loop_vars=[0, ignore_mask])

        ignore_mask = ignore_mask.stack()

        ignore_mask = K.expand_dims(ignore_mask, axis=-1)

        xy_loss += K.sum(object_mask * box_loss_scale *
                         K.binary_crossentropy(true_xy, y_pred[..., 0:2]))

        wh_loss += K.sum(object_mask * box_loss_scale * 0.5 *
                         K.square(true_wh - y_pred[..., 2:4]))

        # log_weight = ignore_mask + (ignore_mask - 1) * object_mask
        #confidence_loss = (1 - object_mask) * y_pred[..., 4] + \
        #									log_weight * K.log(1 + K.exp(0 - K.abs(y_pred[..., 4]))) + \
        #													   K.relu(0 - y_pred[..., 4])

        confidence_loss += K.sum(object_mask * K.binary_crossentropy(object_mask, y_pred[..., 4:5], from_logits=True) + \
                                (1 - object_mask) * K.binary_crossentropy(object_mask, y_pred[..., 4:5], from_logits=True) * ignore_mask)

        # confidence_loss = tf.nn.weighted_cross_entropy_with_logits(object_mask, y_pred[..., 4:5], ignore_mask)

        class_loss += K.sum(object_mask * K.binary_crossentropy(
            true_class_probs, y_pred[..., 5:], from_logits=True))

    loss += xy_loss + wh_loss + confidence_loss + class_loss

    return loss
Exemple #51
0
def clip_norm(g, c, n):
    ''' Clip gradients '''
    if c > 0:
        g = K.switch(K.ge(n, c), g * c / n, g)
    return g
Exemple #52
0
def yolo4_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0, use_focal_loss=False, use_focal_obj_loss=False, use_softmax_loss=False, use_giou_loss=False, use_diou_loss=False):
    '''Return yolo4_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(N, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    '''
    num_layers = len(anchors)//3 # default setting
    yolo_outputs = args[:num_layers]
    y_true = args[num_layers:]
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [0,1,2]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)]
    loss = 0
    total_location_loss = 0
    total_confidence_loss = 0
    total_class_loss = 0
    m = K.shape(yolo_outputs[0])[0] # batch size, tensor
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    for l in range(num_layers):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]
        if label_smoothing:
            true_class_probs = _smooth_labels(true_class_probs, label_smoothing)

        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
             anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet raw box to calculate loss.
        raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1])
        raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf
        box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask
        _, ignore_mask = tf.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        if use_focal_obj_loss:
            # Focal loss for objectness confidence
            confidence_loss = sigmoid_focal_loss(object_mask, raw_pred[...,4:5])
        else:
            confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
                (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask

        if use_focal_loss:
            # Focal loss for classification score
            if use_softmax_loss:
                class_loss = softmax_focal_loss(true_class_probs, raw_pred[...,5:])
            else:
                class_loss = sigmoid_focal_loss(true_class_probs, raw_pred[...,5:])
        else:
            if use_softmax_loss:
                # use softmax style classification output
                class_loss = object_mask * K.expand_dims(K.categorical_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True), axis=-1)
            else:
                # use sigmoid style classification output
                class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True)


        if use_giou_loss:
            # Calculate GIoU loss as location loss
            raw_true_box = y_true[l][...,0:4]
            giou = box_giou(pred_box, raw_true_box)
            giou_loss = object_mask * box_loss_scale * (1 - giou)
            giou_loss = K.sum(giou_loss) / mf
            location_loss = giou_loss
        elif use_diou_loss:
            # Calculate DIoU loss as location loss
            raw_true_box = y_true[l][...,0:4]
            diou = box_diou(pred_box, raw_true_box)
            diou_loss = object_mask * box_loss_scale * (1 - diou)
            diou_loss = K.sum(diou_loss) / mf
            location_loss = diou_loss
        else:
            # Standard YOLO location loss
            # K.binary_crossentropy is helpful to avoid exp overflow.
            xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True)
            wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4])
            xy_loss = K.sum(xy_loss) / mf
            wh_loss = K.sum(wh_loss) / mf
            location_loss = xy_loss + wh_loss

        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += location_loss + confidence_loss + class_loss
        total_location_loss += location_loss
        total_confidence_loss += confidence_loss
        total_class_loss += class_loss

    # Fit for tf 2.0.0 loss shape
    loss = K.expand_dims(loss, axis=-1)

    return loss #, total_location_loss, total_confidence_loss, total_class_loss
Exemple #53
0
 def call(self, x, mask=None):
     condition, then_expr, else_expr = x
     pattern = (0, 1) + ('x',) * (K.ndim(then_expr) - 2)
     return K.switch(condition.dimshuffle(*pattern), then_expr, else_expr)
Exemple #54
0
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False):

    # 一共有三层
    num_layers = len(anchors) // 3

    # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true]
    # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    y_true = args[num_layers:]
    yolo_outputs = args[:num_layers]

    # 先验框
    # 678为116,90,  156,198,  373,326
    # 345为30,61,  62,45,  59,119
    # 012为10,13,  16,30,  33,23,
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                   ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]]

    # 得到input_shpae为416,416
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))

    # 得到网格的shape为13,13;26,26;52,52
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(num_layers)
    ]
    loss = 0

    # 取出每一张图片
    # m的值就是batch_size
    m = K.shape(yolo_outputs[0])[0]
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    for l in range(num_layers):
        # 以第一个特征层(m,13,13,3,85)为例子
        # 取出该特征层中存在目标的点的位置。(m,13,13,3,1)
        object_mask = y_true[l][..., 4:5]
        # 取出其对应的种类(m,13,13,3,80)
        true_class_probs = y_true[l][..., 5:]

        # 将yolo_outputs的特征层输出进行处理
        # grid为网格结构(13,13,1,2),raw_pred为尚未处理的预测结果(m,13,13,3,85)
        # 还有解码后的xy,wh,(m,13,13,3,2)
        grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l],
                                                     anchors[anchor_mask[l]],
                                                     num_classes,
                                                     input_shape,
                                                     calc_loss=True)

        # 这个是解码后的预测的box的位置
        # (m,13,13,3,4)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # 找到负样本群组,第一步是创建一个数组,[]
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        # 对每一张图片计算ignore_mask
        def loop_body(b, ignore_mask):
            # 取出第b副图内,真实存在的所有的box的参数
            # n,4
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            # 计算预测结果与真实情况的iou
            # pred_box为13,13,3,4
            # 计算的结果是每个pred_box和其它所有真实框的iou
            # 13,13,3,n
            iou = box_iou(pred_box[b], true_box)

            # 13,13,3,1
            best_iou = K.max(iou, axis=-1)

            # 判断预测框的iou小于ignore_thresh则认为该预测框没有与之对应的真实框
            # 则被认为是这幅图的负样本
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        # 遍历所有的图片
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])

        # 将每幅图的内容压缩,进行处理
        ignore_mask = ignore_mask.stack()
        #(m,13,13,3,1,1)
        ignore_mask = K.expand_dims(ignore_mask, -1)

        # 将真实框进行编码,使其格式与预测的相同,后面用于计算loss
        raw_true_xy = y_true[l][..., :2] * grid_shapes[l][:] - grid
        raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] *
                            input_shape[::-1])

        # object_mask如果真实存在目标则保存其wh值
        # switch接口,就是一个if/else条件判断语句
        raw_true_wh = K.switch(object_mask, raw_true_wh,
                               K.zeros_like(raw_true_wh))
        box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(
            raw_true_xy, raw_pred[..., 0:2], from_logits=True)
        wh_loss = object_mask * box_loss_scale * 0.5 * K.square(
            raw_true_wh - raw_pred[..., 2:4])

        # 如果该位置本来有框,那么计算1与置信度的交叉熵
        # 如果该位置本来没有框,而且满足best_iou<ignore_thresh,则被认定为负样本
        # best_iou<ignore_thresh用于限制负样本数量
        confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \
            (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask

        class_loss = object_mask * K.binary_crossentropy(
            true_class_probs, raw_pred[..., 5:], from_logits=True)

        xy_loss = K.sum(xy_loss) / mf
        wh_loss = K.sum(wh_loss) / mf
        confidence_loss = K.sum(confidence_loss) / mf
        class_loss = K.sum(class_loss) / mf
        loss += xy_loss + wh_loss + confidence_loss + class_loss
        if print_loss:
            loss = tf.Print(loss, [
                loss, xy_loss, wh_loss, confidence_loss, class_loss,
                K.sum(ignore_mask)
            ],
                            message='loss: ')
    return loss
 def call(self, x, mask=None):
     if mask is None:
         return K.max(x, axis=1)
     else:
         return K.max(K.switch(mask[:, :, np.newaxis], x, -np.inf), axis=1)
Exemple #56
0
def smooth_L1(y_true, y_pred, clip_delta=0.5):
    x = K.abs(y_true - y_pred)[:, :1]
    x = K.switch(x < clip_delta, 0.5 * x ** 2, clip_delta * (x - 0.5 * clip_delta))
    return  K.mean(x)
Exemple #57
0
    def call(self, inputs, states, training=None):
        samples, inFeatures = states[0].shape
        h_tm1 = states[0]  # previous state
        time_step = states[1]
        if 0 < self.dropout < 1 and self._dropout_mask is None:
            self._dropout_mask = _generate_dropout_mask(_generate_dropout_ones(
                inputs,
                K.shape(inputs)[-1]),
                                                        self.dropout,
                                                        training=training)
        if (0 < self.recurrent_dropout < 1
                and self._recurrent_dropout_mask is None):
            self._recurrent_dropout_mask = _generate_dropout_mask(
                _generate_dropout_ones(inputs, self.units),
                self.recurrent_dropout,
                training=training)

        dp_mask = self._dropout_mask
        rec_dp_mask = self._recurrent_dropout_mask

        if dp_mask is not None:
            inputs *= dp_mask

        if rec_dp_mask is not None:
            h_tm1 *= rec_dp_mask

        if self.split_method:
            # Update State, module-by-module
            h_mod = []
            unitsPerMod = self.units // self.clock_numPeriods

            def if_true():
                hModule = K.dot(h_tm1[:, s:],
                                self.rec_kernel_c_mod[i]) + K.dot(
                                    inputs, self.kernel_c_mod[i])
                if self.use_bias:
                    hModule = K.bias_add(hModule, self.bias_mod[i])
                if self.recurrent_activation is not None:
                    hModule = self.recurrent_activation(hModule)
                return hModule

            def if_false():
                return hModule

            for i, period in enumerate(self.clock_periods):
                s = i * unitsPerMod
                e = (i + 1) * unitsPerMod
                hModule = h_tm1[:, s:e]
                h_mod.append(
                    tf.cond(K.equal(K.tf.mod(time_step[0][0], period), 0),
                            if_true, if_false))
            hidden = K.concatenate(h_mod)

        else:
            # Update State, all at once, then only use certain updates
            h = K.dot(inputs, self.kernel) + K.dot(
                h_tm1, self.recurrent_kernel_c * self.cw_mask)
            if self.bias is not None:
                h = K.bias_add(h, self.bias)
            if self.recurrent_activation is not None:
                h = self.recurrent_activation(h)

            h = K.switch(K.equal(K.tf.mod(time_step, self.cw_periods), 0), h,
                         h_tm1)
            hidden = h

        # Calculate Output
        output = K.dot(hidden, self.recurrent_kernel_o)
        if self.activation is not None:
            output = self.activation(output)

        # Properly set learning phase on output tensor.
        if 0 < self.dropout + self.recurrent_dropout:
            if training is None:
                output._uses_learning_phase = True
        return output, [hidden, time_step + 1]
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
    """Return yolo_loss tensor

    Parameters
    ----------
    yolo_outputs: list of tensor, the output of yolo_body
    y_true: list of array, the output of preprocess_true_boxes
    anchors: array, shape=(T, 2), wh
    num_classes: integer
    ignore_thresh: float, the iou threshold whether to ignore object confidence loss

    Returns
    -------
    loss: tensor, shape=(1,)

    """
    yolo_outputs = args[:3]
    y_true = args[3:]
    anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
    input_shape = K.cast(
        K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [
        K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0]))
        for l in range(3)
    ]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]

    for l in range(3):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]

        pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(
            yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape)
        pred_box = K.concatenate([pred_xy, pred_wh])

        # Darknet box loss.
        xy_delta = (y_true[l][..., :2] - pred_xy) * grid_shapes[l][::-1]
        wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh)
        # Avoid log(0)=-inf.
        wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta))
        box_delta = K.concatenate([xy_delta, wh_delta], axis=-1)
        box_delta_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4]

        # Find ignore mask, iterate over each of batch.
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]),
                                     size=1,
                                     dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')

        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b, ..., 0:4],
                                       object_mask_bool[b, ..., 0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(
                b, K.cast(best_iou < ignore_thresh, K.dtype(true_box)))
            return b + 1, ignore_mask

        _, ignore_mask = K.control_flow_ops.while_loop(lambda b, *args: b < m,
                                                       loop_body,
                                                       [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)

        box_loss = object_mask * K.square(box_delta * box_delta_scale)
        confidence_loss = object_mask * K.square(1 - pred_confidence) + \
                          (1 - object_mask) * K.square(0 - pred_confidence) * ignore_mask
        class_loss = object_mask * K.square(true_class_probs -
                                            pred_class_probs)
        loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss)
    return loss / K.cast(m, K.dtype(loss))
Exemple #59
0
def _wta(X):
    M = K.max(X, axis=-1, keepdims=True)
    R = K.switch(K.equal(X, M), X, 0.)
    return R
Exemple #60
0
def fcn_norm_loss_graph(target_masks, pred_heatmap):
    '''
    Mask binary cross-entropy loss for the masks head.
    target_masks:       [batch, height, width, num_classes].
    pred_heatmap:         [batch, height, width, num_classes] float32 tensor
    '''
    # Reshape for simplicity. Merge first two dimensions into one.
    print('\n>>> fcn_norm_loss_graph ')
    print('    target_masks     shape :', target_masks.shape)
    print('    pred_heatmap       shape :', pred_heatmap.shape)
    print(
        '\n    L2 normalization ------------------------------------------------------'
    )
    pred_shape = KB.shape(pred_heatmap)
    print(' pred_shape: KB.shape:', pred_shape, ' tf.get_shape(): ',
          pred_heatmap.get_shape(), ' pred_maks.shape:', pred_heatmap.shape,
          'tf.shape :', tf.shape(pred_heatmap))

    output_flatten = KB.reshape(pred_heatmap,
                                (pred_shape[0], -1, pred_shape[-1]))
    output_norm1 = KB.l2_normalize(output_flatten, axis=1)
    output_norm = KB.reshape(output_norm1, pred_shape)

    print('   output_flatten    : ', KB.int_shape(output_flatten),
          output_flatten.get_shape(), ' Keras tensor ',
          KB.is_keras_tensor(output_flatten))
    print('   output_norm1      : ', KB.int_shape(output_norm1),
          output_norm1.get_shape(), ' Keras tensor ',
          KB.is_keras_tensor(output_norm1))
    print('   output_norm final : ', KB.int_shape(output_norm),
          output_norm.get_shape(), ' Keras tensor ',
          KB.is_keras_tensor(output_norm))

    print(
        '\n    L2 normalization ------------------------------------------------------'
    )
    target_shape = KB.shape(target_masks)
    print(' target shape is :', target_shape, '   ', target_masks.get_shape(),
          target_masks.shape, tf.shape(target_masks))

    gauss_flatten = KB.reshape(target_masks,
                               (target_shape[0], -1, target_shape[-1]))
    gauss_norm1 = KB.l2_normalize(gauss_flatten, axis=1)
    gauss_norm = KB.reshape(gauss_norm1, target_shape)

    print('    guass_flatten         : ', gauss_flatten.shape,
          gauss_flatten.get_shape(), 'Keras tensor ',
          KB.is_keras_tensor(gauss_flatten))
    print('    gauss_norm shape      : ', gauss_norm1.shape,
          gauss_norm1.get_shape(), 'Keras tensor ',
          KB.is_keras_tensor(gauss_norm1))
    print('    gauss_norm final shape: ', gauss_norm.shape,
          gauss_norm.get_shape(), 'Keras tensor ',
          KB.is_keras_tensor(gauss_norm))

    pred_heatmap1 = output_norm
    target_masks1 = gauss_norm

    # pred_shape    = KB.shape(target_masks1)
    # print('    pred_shape shape :', pred_shape.eval(), KB.int_shape(pred_shape))
    target_masks1 = KB.reshape(target_masks1,
                               (-1, pred_shape[1], pred_shape[2]))
    print('    target_masks1 shape :', target_masks1.get_shape(),
          KB.int_shape(target_masks1))
    pred_heatmap1 = KB.reshape(pred_heatmap1,
                               (-1, pred_shape[1], pred_shape[2]))
    print('    pred_heatmap1  shape :', pred_heatmap1.get_shape())

    # Compute binary cross entropy. If no positive ROIs, then return 0.
    # shape: [batch, roi, num_classes]
    # Smooth-L1 Loss
    loss = KB.switch(
        tf.size(target_masks1) > 0,
        smooth_l1_loss(y_true=target_masks1, y_pred=pred_heatmap1),
        tf.constant(0.0))
    loss = KB.mean(loss)
    loss = KB.reshape(loss, [1, 1])
    print('    loss type is :', type(loss))
    return loss