Exemple #1
0
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3]  # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
                    [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
                    [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats,
        [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(
        grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(
        input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs
Exemple #2
0
    def call(self, inputs, **kwargs):
        W = K.tanh(self.W_hat) * K.sigmoid(self.M_hat)
        a = K.dot(inputs, W)

        if self.nac_only:
            outputs = a
        else:
            m = K.exp(K.dot(K.log(K.abs(inputs) + self.epsilon), W))
            g = K.sigmoid(K.dot(inputs, self.G))
            outputs = g * a + (1. - g) * m

        return outputs
Exemple #3
0
    def call(self, input_feature):
        kernel_size = 3

        # if K.image_data_format() == "channels_first":
        #     channel = input_feature._keras_shape[1]
        #     cbam_feature = Permute((2, 3, 1))(input_feature)
        # else:
        #     channel = input_feature._keras_shape[-1]
        cbam_feature = input_feature

        print(input_feature.shape)
        avg_pool = Lambda(lambda x: K.mean(x, axis=3, keepdims=True))(cbam_feature)
        #         # assert avg_pool._keras_shape[-1] == 1
        #print(avg_pool.shape)
        max_pool = Lambda(lambda x: K.max(x, axis=3, keepdims=True))(cbam_feature)

        #print(max_pool.shape)
        # assert max_pool._keras_shape[-1] == 1
        concat = Concatenate(axis=3)([avg_pool, max_pool])
        #print(concat.shape)
        # assert concat._keras_shape[-1] == 2
        cbam_feature = Conv2D(filters=1,
                              kernel_size=kernel_size,
                              strides=1,
                              padding='same',
                              kernel_initializer='he_normal',
                              use_bias=False)(concat)
        # assert cbam_feature._keras_shape[-1] == 1

        # if K.image_data_format() == "channels_first":
        #    cbam_feature = Permute((3, 1, 2))(cbam_feature)
        print(cbam_feature.shape)
        res = K.sigmoid(cbam_feature)
        #print(res.shape)
        return res
Exemple #4
0
def line_loss(y_true, y_pred):

    r1 = y_true * y_pred
    r2 = K.sigmoid(r1)
    r3 = K.log(r2)
    result = -K.mean(r3)
    return result
Exemple #5
0
def line_loss(y_true, y_pred):
    try:
        import tensorflow as tf
    except ImportWarning:
        print("tensorflow not found, please install")
        pass
    from tensorflow.python.keras import backend as K

    y = K.sigmoid(y_true * y_pred)
    # Avoid Nan in the result of 'K.log'
    return -K.mean(K.log(tf.clip_by_value(y, 1e-8, tf.reduce_max(y))))
    def call(self, inputs, **kwargs):
        input_shape = K.int_shape(inputs)
        sequence_length, d_model = input_shape[-2:]
        # output of the "sigmoid halting unit" (not the probability yet)
        halting = K.sigmoid(
            K.reshape(
                K.bias_add(K.dot(K.reshape(inputs, [-1, d_model]),
                                 self.act_weights['halting_kernel']),
                           self.act_weights['halting_biases'],
                           data_format='channels_last'),
                [-1, sequence_length]))
        if self.zeros_like_halting is None:
            self.initialize_control_tensors(halting)
        # useful flags
        step_is_active = K.greater(self.halt_budget, 0)
        no_further_steps = K.less_equal(self.halt_budget - halting, 0)
        # halting probability is equal to
        # a. halting output if this isn't the last step (we have some budget)
        # b. to remainder if it is,
        # c. and zero for the steps that shouldn't be executed at all
        #    (out of budget for them)
        halting_prob = K.switch(
            step_is_active, K.switch(no_further_steps, self.remainder,
                                     halting), self.zeros_like_halting)
        self.active_steps += K.switch(step_is_active, self.ones_like_halting,
                                      self.zeros_like_halting)
        # We don't know which step is the last, so we keep updating
        # expression for the loss with each call of the layer
        self.ponder_cost = (self.act_weights['time_penalty_t'] *
                            K.mean(self.remainder + self.active_steps))
        # Updating "the remaining probability" and the halt budget
        self.remainder = K.switch(no_further_steps, self.remainder,
                                  self.remainder - halting)
        self.halt_budget -= halting  # OK to become negative

        # If none of the inputs are active at this step, then instead
        # of zeroing them out by multiplying to all-zeroes halting_prob,
        # we can simply use a constant tensor of zeroes, which means that
        # we won't even calculate the output of those steps, saving
        # some real computational time.
        if self.zeros_like_input is None:
            self.zeros_like_input = K.zeros_like(inputs,
                                                 name='zeros_like_input')
        # just because K.any(step_is_active) doesn't work in PlaidML
        any_step_is_active = K.greater(K.sum(K.cast(step_is_active, 'int32')),
                                       0)
        step_weighted_output = K.switch(
            any_step_is_active,
            K.expand_dims(halting_prob, -1) * inputs, self.zeros_like_input)
        if self.weighted_output is None:
            self.weighted_output = step_weighted_output
        else:
            self.weighted_output += step_weighted_output
        return [inputs, self.weighted_output]
Exemple #7
0
def line_loss(y_true, y_pred):
    '''
    y_true = np.vstack([k_weight, odw]).T
    '''

    r1 = layers.multiply([y_true[:, 1], y_pred])
    r2 = K.sigmoid(r1)
    r3 = K.log(r2)
    r4 = layers.multiply([y_true[:, 0], r3])
    result = -K.mean(r4)

    return result
Exemple #8
0
def line_loss(y_true, y_pred):
    '''
    y_true[0]: -1 or +1 (indicating pos/neg samples)
    y_true[1]: lamb (lamb * NS_loss)
    '''

    r1 = y_true[0][0] * y_pred
    r2 = K.sigmoid(r1)
    r3 = K.log(r2)
    result = y_true[0][1] * -K.mean(r3)

    return result
Exemple #9
0
 def call(self, x):
     if self.mode == MODE_VISIBLE_BERNOULLI:
         return K.cast(
             K.less(
                 K.random_uniform(shape=(self.hps['batch_size'],
                                         x.shape[1]))  #?
                 ,
                 K.sigmoid(K.dot(x, self.rbm_weight) + self.hidden_bias)))
     elif self.mode == MODE_VISIBLE_GAUSSIAN:
         return K.cast(
             K.less(
                 K.random_uniform(shape=(self.hps['batch_size'],
                                         x.shape[1])),
                 K.relu(K.dot(x, self.rbm_weight) + self.hidden_bias)))  #?
    def call(self, inputs, reverse=False, ddi=False, **kwargs):
        z = inputs
        z1, z2 = split_channels(z)

        scale, shift = split_channels_by_even_and_odd(self.nn(z1, ddi=ddi))
        # scale = K.exp(scale)  # seems not stable to train
        # scale = 1 + K.tanh(scale) * 0.2  # how about this?
        scale = K.sigmoid(scale + 2)  # ?? from reference implementation
        if not reverse:
            z2 = (z2 + shift) * scale
            self.add_loss(-K.sum(K.log(scale), axis=[1, 2, 3]) *
                          self.bit_per_sub_pixel_factor)
        else:
            z2 = z2 / scale - shift
        out = K.concatenate([z1, z2], axis=3)
        return out
Exemple #11
0
    def call(self, inputs, states, training=None):
        h_tm1 = states[0]
        c_tm1 = states[1]

        # dropout matrices for input units
        dp_mask = self.get_dropout_mask_for_cell(inputs, training, count=4)
        # dropout matrices for recurrent units
        rec_dp_mask = self.get_recurrent_dropout_mask_for_cell(h_tm1,
                                                               training,
                                                               count=4)

        if 0 < self.dropout < 1.:
            inputs_i = inputs * dp_mask[0]
            inputs_f = inputs * dp_mask[1]
            inputs_c = inputs * dp_mask[2]
            inputs_o = inputs * dp_mask[3]
        else:
            inputs_i = inputs
            inputs_f = inputs
            inputs_c = inputs
            inputs_o = inputs

        if 0 < self.recurrent_dropout < 1.:
            h_tm1_i = h_tm1 * rec_dp_mask[0]
            h_tm1_f = h_tm1 * rec_dp_mask[1]
            h_tm1_c = h_tm1 * rec_dp_mask[2]
            h_tm1_o = h_tm1 * rec_dp_mask[3]
        else:
            h_tm1_i = h_tm1
            h_tm1_f = h_tm1
            h_tm1_c = h_tm1
            h_tm1_o = h_tm1

        (kernel_i, kernel_f, kernel_c,
         kernel_o) = array_ops.split(self.kernel, 4,
                                     axis=3)  # (3, 3, input_dim, filters)
        (recurrent_kernel_i, recurrent_kernel_f, recurrent_kernel_c,
         recurrent_kernel_o) = array_ops.split(self.recurrent_kernel,
                                               4,
                                               axis=3)

        if self.use_bias:
            bias_i, bias_f, bias_c, bias_o = array_ops.split(self.bias, 4)
        else:
            bias_i, bias_f, bias_c, bias_o = None, None, None, None

        # input_i: batch
        x_i = self.input_conv(inputs_i, kernel_i, bias_i, padding=self.padding)
        x_f = self.input_conv(inputs_f, kernel_f, bias_f, padding=self.padding)
        x_c = self.input_conv(inputs_c, kernel_c, bias_c, padding=self.padding)
        x_o = self.input_conv(inputs_o, kernel_o, bias_o, padding=self.padding)
        h_i = self.recurrent_conv(h_tm1_i, recurrent_kernel_i)
        h_f = self.recurrent_conv(h_tm1_f, recurrent_kernel_f)
        h_c = self.recurrent_conv(h_tm1_c, recurrent_kernel_c)
        h_o = self.recurrent_conv(h_tm1_o, recurrent_kernel_o)

        i = self.recurrent_activation(x_i + h_i)
        f = self.recurrent_activation(x_f + h_f)
        c = f * c_tm1 + i * self.activation(x_c + h_c)
        o = self.recurrent_activation(x_o + h_o)
        h = o * self.activation(c)

        # sa computation
        m_t_minus_one = states[2]  # h, w, filters
        h_t, c_t = h, c

        (kernel_hv, kernel_hk, kernel_hq, kernel_mk,
         kernel_mv) = array_ops.split(
             self.sa_kernel, 5,
             axis=3)  # kernel_size, filters, 1, turn to one layer

        if self.use_bias:
            bias_i, bias_g, bias_o = array_ops.split(self.sa_bias, 3)
        else:
            bias_i, bias_g, bias_o = None, None, None

        v_h = self.sa_conv(h_t, kernel_hv)
        k_h = self.sa_conv(h_t, kernel_hk)
        q_h = self.sa_conv(h_t, kernel_hq)
        k_m = self.sa_conv(m_t_minus_one, kernel_mk)
        v_m = self.sa_conv(m_t_minus_one, kernel_mv)  # h, w, 1

        q_h = K.squeeze(q_h, 3)
        k_m = K.squeeze(k_m, 3)
        k_h = K.squeeze(k_h, 3)

        e_m = tf.matmul(q_h, k_m)
        alpha_m = K.softmax(e_m)
        e_h = tf.matmul(q_h, k_h)
        alpha_h = K.softmax(e_h)

        v_m = K.squeeze(v_m, 3)
        v_h = K.squeeze(v_h, 3)
        z_m = tf.matmul(alpha_m, v_m)
        z_h = tf.matmul(alpha_h, v_h)

        z_m = K.expand_dims(z_m, 3)
        z_h = K.expand_dims(z_h, 3)
        zi = self.sa_conv(K.concatenate((z_h, z_m), 3), self.kernel_z)

        (kernel_m_zi, kernel_m_hi, kernel_m_zg, kernel_m_hg, kernel_m_zo,
         kernel_m_ho) = array_ops.split(self.depth_wise_kernel, 6, axis=3)  #

        i = K.sigmoid(
            K.depthwise_conv2d(zi, kernel_m_zi, padding='same') +
            K.depthwise_conv2d(h_t, kernel_m_hi, padding='same') + bias_i)
        g = K.tanh(
            K.depthwise_conv2d(zi, kernel_m_zg, padding='same') +
            K.depthwise_conv2d(h_t, kernel_m_hg, padding='same') + bias_g)
        o = K.sigmoid(
            K.depthwise_conv2d(zi, kernel_m_zo, padding='same') +
            K.depthwise_conv2d(h_t, kernel_m_ho, padding='same') + bias_o)

        m_t = (1 - i) * m_t_minus_one + i * g
        h_hat_t = m_t * o
        # sa computation end
        return h_hat_t, [c_t, h_hat_t, m_t]
Exemple #12
0
def line_loss(y_true, y_pred):
    y = K.sigmoid(y_true * y_pred)
    # Avoid Nan in the result of 'K.log'
    return -K.mean(K.log(tf.clip_by_value(y, 1e-8, tf.reduce_max(y))))
Exemple #13
0
    def build(self, input_shape):
        self.rbm_weight = self.add_weight(
            name='rbm_weight',
            shape=(input_shape[1], self.output_dim),
            initializer='uniform'  # Which initializer is optimal?
            ,
            trainable=True)
        self.hidden_bias = self.add_weight(name='rbm_hidden_bias',
                                           shape=(self.output_dim, ),
                                           initializer='uniform',
                                           trainable=True)
        self.visible_bias = K.variable(initializers.get('uniform')(
            (input_shape[1], )),
                                       dtype=K.floatx(),
                                       name='rbm_visible_bias')

        # Make symbolic computation objects.
        if self.mode == MODE_VISIBLE_BERNOULLI:
            # Transform visible units.
            self.input_visible = K.placeholder(shape=(None, input_shape[1]),
                                               name='input_visible')
            self.transform = K.cast(
                K.less(
                    K.random_uniform(shape=(self.hps['batch_size'],
                                            input_shape[1])),
                    K.sigmoid(
                        K.dot(self.input_visible, self.rbm_weight) +
                        self.hidden_bias)))
            self.transform_func = K.function([self.input_visible],
                                             [self.transform])

            # Transform hidden units.
            self.input_hidden = K.placeholder(shape=(None, self.output_dim),
                                              name='input_hidden')
            self.inv_transform = K.cast(
                K.less(
                    K.random_uniform(shape=(self.hps['batch_size'],
                                            input_shape[1])),
                    K.sigmoid(
                        K.dot(self.input_hidden, K.transpose(self.rbm_weight))
                        + self.visible_bias)))
            self.inv_transform_func = K.function([self.input_hidden],
                                                 [self.inv_transform])
        elif self.mode == MODE_VISIBLE_GAUSSIAN:
            # Transform visible units.
            self.input_visible = K.placeholder(shape=(None, input_shape[1]),
                                               name='input_visible')
            self.transform = K.cast(
                K.less(
                    K.random_uniform(shape=(self.hps['batch_size'],
                                            input_shape[1])),
                    K.relu(
                        K.dot(self.input_visible, self.rbm_weight) +
                        self.hidden_bias)))  #?
            self.transform_func = K.function([self.input_visible],
                                             [self.transform])

            # Transform hidden units.
            self.input_hidden = K.placeholder(shape=(None, self.output_dim),
                                              name='input_hidden')
            self.inv_transform = Ke.multivariate_normal_diag(
                loc=(K.dot(self.input_hidden, K.transpose(self.rbm_weight)) +
                     self.visible_bias),
                scale_diag=np.ones(shape=(self.hps['batch_size'],
                                          input_shape[1]))).sample()
            self.inv_transform_func = K.function([self.input_hidden],
                                                 [self.inv_transform])
        else:
            # TODO
            pass

        # Calculate free energy. #?
        self.free_energy = -1 * (K.squeeze(K.dot(self.input_visible, K.expand_dims(self.visible_bias, axis=-1)), -1) +\
                                K.sum(K.log(1 + K.exp(K.dot(self.input_visible, self.rbm_weight) +\
                                                self.hidden_bias)), axis=-1))
        self.free_energy_func = K.function([self.input_visible],
                                           [self.free_energy])

        super(RBM, self).build(input_shape)
Exemple #14
0
    def fit(self, V, verbose=1):
        """Train RBM with the data V.
        
        Parameters
        ----------
        V : 2d numpy array
            Visible data (batch size x input_dim).
        verbose : integer
            Verbose mode (default, 1).
        """
        num_step = V.shape[0] // self.hps['batch_size'] \
            if V.shape[0] % self.hps['batch_size'] == 0 else V.shape[0] // self.hps['batch_size'] + 1 # Exception processing?

        for k in range(self.hps['epochs']):
            if verbose == 1:
                print(k + 1, '/', self.hps['epochs'], ' epochs', end='\r')

            if self.mode == MODE_VISIBLE_BERNOULLI:
                # Contrastive divergence.
                v_pos = self.input_visible
                h_pos = self.transform
                v_neg = K.cast(K.less(
                    K.random_uniform(shape=(self.hps['batch_size'],
                                            V.shape[1])),
                    K.sigmoid(
                        K.dot(h_pos, K.transpose(self.rbm_weight)) +
                        self.visible_bias)),
                               dtype=np.float32)
                h_neg = K.sigmoid(
                    K.dot(v_neg, self.rbm_weight) + self.hidden_bias)
                update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \
                                     - K.dot(K.transpose(h_neg), v_neg))
                self.rbm_weight_update_func = K.function(
                    [self.input_visible],
                    [K.update_add(self.rbm_weight, self.hps['lr'] * update)])
                self.hidden_bias_update_func = K.function([self.input_visible]
                                                , [K.update_add(self.hidden_bias, self.hps['lr'] \
                                                * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))])
                self.visible_bias_update_func = K.function([self.input_visible]
                                                , [K.update_add(self.visible_bias, self.hps['lr'] \
                                                * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))])

                # Create the fist visible nodes sampling object.
                self.sample_first_visible = K.function([self.input_visible],
                                                       [v_neg])
            elif self.mode == MODE_VISIBLE_GAUSSIAN:
                # Contrastive divergence.
                v_pos = self.input_visible
                h_pos = self.transform
                v_neg = Ke.multivariate_normal_diag(
                    loc=(K.dot(h_pos, K.transpose(self.rbm_weight)) +
                         self.visible_bias),
                    scale_diag=np.ones(shape=(self.hps['batch_size'],
                                              V.shape[1]))).sample()
                h_neg = K.sigmoid(
                    K.dot(v_neg, self.rbm_weight) + self.hidden_bias)
                update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \
                                     - K.dot(K.transpose(h_neg), v_neg))
                self.rbm_weight_update_func = K.function(
                    [self.input_visible],
                    [K.update_add(self.rbm_weight, self.hps['lr'] * update)])
                self.hidden_bias_update_func = K.function([self.input_visible]
                                                , [K.update_add(self.hidden_bias, self.hps['lr'] \
                                                * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))])
                self.visible_bias_update_func = K.function([self.input_visible]
                                                , [K.update_add(self.visible_bias, self.hps['lr'] \
                                                * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))])

                # Create the fist visible nodes sampling object.
                self.sample_first_visible = K.function([self.input_visible],
                                                       [v_neg])
            else:
                pass

            for i in range(num_step):
                if i == (num_step - 1):
                    if self.mode == MODE_VISIBLE_BERNOULLI:
                        # Contrastive divergence.
                        v_pos = self.input_visible
                        h_pos = self.transform
                        v_neg = K.cast(K.less(
                            K.random_uniform(shape=(
                                V.shape[0] -
                                int(i * self.hps['batch_size'], V.shape[1]))),
                            K.sigmoid(
                                K.dot(h_pos, K.transpose(self.rbm_weight)) +
                                self.visible_bias)),
                                       dtype=np.float32)
                        h_neg = K.sigmoid(
                            K.dot(v_neg, self.rbm_weight) + self.hidden_bias)
                        update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \
                                             - K.dot(K.transpose(h_neg), v_neg))
                        self.rbm_weight_update_func = K.function(
                            [self.input_visible], [
                                K.update_add(self.rbm_weight,
                                             self.hps['lr'] * update)
                            ])
                        self.hidden_bias_update_func = K.function([self.input_visible]
                                                        , [K.update_add(self.hidden_bias, self.hps['lr'] \
                                                        * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))])
                        self.visible_bias_update_func = K.function([self.input_visible]
                                                        , [K.update_add(self.visible_bias, self.hps['lr'] \
                                                        * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))])

                        # Create the fist visible nodes sampling object.
                        self.sample_first_visible = K.function(
                            [self.input_visible], [v_neg])
                    elif self.mode == MODE_VISIBLE_GAUSSIAN:
                        # Contrastive divergence.
                        v_pos = self.input_visible
                        h_pos = self.transform
                        v_neg = Ke.multivariate_normal_diag(
                            loc=(K.dot(h_pos, K.transpose(self.rbm_weight)) +
                                 self.visible_bias),
                            scale_diag=np.ones(shape=(
                                V.shape[0] -
                                int(i * self.hps['batch_size'], V.shape[1])
                            ))).sample()
                        h_neg = K.sigmoid(
                            K.dot(v_neg, self.rbm_weight) + self.hidden_bias)
                        update = K.transpose(K.transpose(K.dot(K.transpose(v_pos), h_pos)) \
                                             - K.dot(K.transpose(h_neg), v_neg))
                        self.rbm_weight_update_func = K.function(
                            [self.input_visible], [
                                K.update_add(self.rbm_weight,
                                             self.hps['lr'] * update)
                            ])
                        self.hidden_bias_update_func = K.function([self.input_visible]
                                                        , [K.update_add(self.hidden_bias, self.hps['lr'] \
                                                        * (K.sum(h_pos, axis=0) - K.sum(h_neg, axis=0)))])
                        self.visible_bias_update_func = K.function([self.input_visible]
                                                        , [K.update_add(self.visible_bias, self.hps['lr'] \
                                                        * (K.sum(v_pos, axis=0) - K.sum(v_neg, axis=0)))])

                        # Create the fist visible nodes sampling object.
                        self.sample_first_visible = K.function(
                            [self.input_visible], [v_neg])
                    else:
                        pass

                    V_batch = [V[int(i * self.hps['batch_size']):V.shape[0]]]

                    # Train.
                    self.rbm_weight_update_func(V_batch)
                    self.hidden_bias_update_func(V_batch)
                    self.visible_bias_update_func(V_batch)
                else:
                    V_batch = [
                        V[int(i * self.hps['batch_size']):int(
                            (i + 1) * self.hps['batch_size'])]
                    ]

                    # Train.
                    self.rbm_weight_update_func(V_batch)
                    self.hidden_bias_update_func(V_batch)
                    self.visible_bias_update_func(V_batch)

                # Calculate a training score by each step.
                # Free energy of the input visible nodes.
                fe = self.cal_free_energy(V_batch)

                # Free energy of the first sampled visible nodes.
                V_p_batch = self.sample_first_visible(V_batch)
                fe_p = self.cal_free_energy(V_p_batch)

                score = np.mean(np.abs(fe[0] - fe_p[0]))  # Scale?
                print('\n{0:d}/{1:d}, score: {2:f}'.format(
                    i + 1, num_step, score))
Exemple #15
0
def yolo_loss(yolo_output,
              true_boxes,
              detectors_mask,
              matching_true_boxes,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tf.Tensor
        Final convolutional layer features.
    true_boxes : tf.Tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.
    detectors_mask : np.ndarray
        0/1 mask for detector positions where there is a matching ground truth.
    matching_true_boxes : np.ndarray
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.
    anchors : np.ndarray
        Anchor boxes for model.
    num_classes : int
        Number of object classes.
    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.
    print_loss : bool, default=False
        If True then print the loss components.

    Returns
    -------
    mean_loss : float
        Mean localization loss across minibatch
    """

    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale, class_scale, coordinates_scale = 1, 1, 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]),
                               axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # TODO: training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))

    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum +
                        coordinates_loss_sum)

    if print_loss:
        # TODO: printing Tensor values. Maybe use eval function or session?
        print(
            'yolo_loss: {}, conf_loss: {}, class_loss: {}, box_coord_loss: {}'.
            format(total_loss, confidence_loss_sum, classification_loss_sum,
                   coordinates_loss_sum))

    return total_loss
Exemple #16
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tf.Tensor
        Final convolutional layer features.
    anchors : np.array, list
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy: tf.Tensor
        (x, y) box predictions adjusted by spatial location in conv layer.
    box_wh: tf.Tensor
        (w, h) box predictions adjusted by anchors and conv spatial resolution.
    box_conf: tf.Tensor
        Probability estimate for whether each box contains any object.
    box_class_pred: tf.Tensor
        Probability distribution estimate for each box over class labels.

    """

    num_anchors = len(anchors)

    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last

    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_confidence = K.sigmoid(feats[..., 4:5])
    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_confidence, box_xy, box_wh, box_class_probs
Exemple #17
0
def line_loss(y_true, y_pred):
    return -kb.mean(kb.log(kb.sigmoid(y_true * y_pred)))
Exemple #18
0
def line_loss(y_true, y_pred):
    return -K.mean(K.log(K.sigmoid(y_true * y_pred)))
    args = parser.parse_args()
    dataloader = Dataloader(args)

    p = PointEmbedding(args)
    p_model = p.build()
    loss_func = p.custom_loss()

    x1 = Input((28, 28))
    x2 = Input((28, 28))
    x1_r = K.reshape(x1, (-1, 28, 28, 1))
    x2_r = K.reshape(x2, (-1, 28, 28, 1))
    pair1 = K.concatenate([x1_r, x2_r], axis=2)

    x3 = Input((28, 28))
    x4 = Input((28, 28))
    x3_r = K.reshape(x3, (-1, 28, 28, 1))
    x4_r = K.reshape(x4, (-1, 28, 28, 1))
    pair2 = K.concatenate([x3_r, x4_r], axis=2)

    output = Conv2D(1, (5, 5), (2, 2))(pair1)
    output = MaxPooling2D((2, 2), (1, 1))(output)
    output = Conv2D(1, (5, 5), (2, 2))(output)

    output = MaxPooling2D((2, 2), (1, 1))(output)
    output = Flatten()(output)
    output = Dense(2)(output)
    print(K.square(output))
    print(K.sum(K.square(output), axis=-1))
    print(K.sigmoid(K.sum(K.square(output), axis=-1)))