Esempio n. 1
0
def yolo_head(feats, anchors, num_classes, input_shape):
    """Convert final layer features to bounding box parameters."""
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))

    return box_xy, box_wh, box_confidence, box_class_probs
Esempio n. 2
0
    def step(self, x, states):
        
        r_tm1, V_tm1,s_tm1,time = states[:4]
        h_tm1 = states[4:]
 
        
        
        r_tm1 = r_tm1
        
        op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1),
                                             h_tm1)
              
       # op_t = op_t  + print_name_shape("W_d",self.W_d.get_value()) 
        op_t = op_t
        #op_t = op_t[:,0,:]
        d_t = K.sigmoid( K.dot(op_t, self.W_d)  + self.b_d)  
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) 
        
        
        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                             u_t[::,0], v_t,time[0],stack=self.stack)
        

       
        return o_t, [r_t, V_t, s_t, time] + h_t
    def call(self, x):
        assert(K.backend() == 'tensorflow')
        temp = K.permute_dimensions(x, (0, 2, 1))
        for i in range(0, self.attention_depth):
            temp = K.sigmoid(K.dot(temp, self.Ws[i]) + self.bs[i])
        temp = K.permute_dimensions(temp, (0, 2, 1))
        estimated_weight = K.squeeze(K.dot(temp, K.expand_dims(self.Wf, -1)), -1)
        biased_weight = estimated_weight + self.bias
        non_linear_weight = K.tanh(biased_weight)

        # For each hidded state calculate how much should it contribute
        # to the context vector. This is the main part of attention.
        # In order to convert weights to "probabilities" use a sigmoid
        # based function: exp(x) / sum(exp(xi)).
        prob = K.exp(non_linear_weight)
        # Compute the total sum for each batch.
        total_sum = K.sum(prob, axis=1, keepdims=True)
        prob /= K.cast(total_sum, K.floatx())

        # Enable this if you want access to internal probabilities.
        # Should only be used for testing that Attention works as expected.
        # return prob

        # Multiply each hidden value by the corresponding probability.
        prob = K.expand_dims(prob, -1)
        new_hidden_values = x * prob
        return K.sum(new_hidden_values, axis=1)
Esempio n. 4
0
    def call(self, inputs, **kwargs):
        assert isinstance(inputs, list) and len(inputs) == 3
        first, second, features = inputs[0], inputs[1], inputs[2]
        if not self.from_logits:
            first = kb.clip(first, 1e-10, 1.0)
            second = kb.clip(second, 1e-10, 1.0)
            first_, second_ = kb.log(first), kb.log(second)
        else:
            first_, second_ = first, second
        # embedded_features.shape = (M, T, 1)
        if self.use_intermediate_layer:
            features = kb.dot(features, self.first_kernel)
            features = kb.bias_add(features, self.first_bias, data_format="channels_last")
            features = self.intermediate_activation(features)
        embedded_features = kb.dot(features, self.features_kernel)
        embedded_features = kb.bias_add(
            embedded_features, self.features_bias, data_format="channels_last")
        if self.use_dimension_bias:
            tiling_shape = [1] * (kb.ndim(first)-1) + [kb.shape(first)[-1]]
            embedded_features = kb.tile(embedded_features, tiling_shape)
            embedded_features = kb.bias_add(
                embedded_features, self.dimensions_bias, data_format="channels_last")
        sigma = kb.sigmoid(embedded_features)

        result = weighted_sum(first_, second_, sigma,
                              self.first_threshold, self.second_threshold)
        probs = kb.softmax(result)
        if self.return_logits:
            return [probs, result]
        return probs
Esempio n. 5
0
	def step(self, x, states):
		M = states[0]  # (nb_samples, nb_slots, memory_size)
		h = states[1]  # (nb_samples, memory_size)
		w = states[2]  # (nb_samples, nb_slots)
		#------Memory read--------#
		k = self.W_k(h)  # (nb_samples, memory_size)
		w_hat = T.batched_tensordot(M, k, axes=[(2), (1)])  # (nb_samples, nb_slots)
		beta = K.sigmoid(self.W_b(h))  # (nb_samples, 1)
		beta = K.repeat(beta, self.nb_slots)  # (nb_samples, nb_slots, 1)
		beta = K.squeeze(beta, 2)  # (nb_samples, nb_slots)
		w_hat = softmax(w_hat * beta)  # (nb_samples, nb_slots)
		g = sigmoid(self.W_hg(h))  # (nb_samples, 1)
		g = K.repeat(g, self.nb_slots)  # (nb_samples, nb_slots, 1)
		g = K.squeeze(g, 2)  # (nb_samples, nb_slots)
		w = (1 - g) * w + g * w_hat  # (nb_samples, nb_slots)
		c = T.batched_tensordot(w, M, axes=[(1), (1)])
		h = tanh(self.W_ih(x) + self.W_c(c))
		y = self.W_ho(h)
		#---------Memory write---------#
		v = self.W_v(h)  # (nb_samples, memory_size)
		v = K.repeat(v, 1)
		e = sigmoid(self.W_he(h))  # (nb_samples, nb_slots)
		f = 1 - w * e  # (nb_samples, nb_slots)
		f = K.repeat(f, self.memory_size)  # (nb_samples, memory_size, nb_slots)
		f = K.permute_dimensions(f, (0, 2, 1))  # (nb_samples, nb_slots, memory_size)
		u = w  # (nb_samples, nb_slots)
		u = K.repeat(u, 1)
		uv = T.batched_tensordot(u, v, axes=[(1), (1)])
		M = M * f + uv
		return y, [M, h, w]
def yolo_v1_loss(y_true, y_pred):
    # Y_PRED is Batchx40x7 tensor. y_true is a 40x7 tensor

    truth_conf_tensor = K.expand_dims(y_true[:,:,0],2)#tf.slice(y_true, [0, 0, 0], [-1,-1, 0])
    truth_xy_tensor = y_true[:,:,1:3]#tf.slice(y_true, [0, 0, 1], [-1,-1, 2])
    truth_wh_tensor = y_true[:,:,3:5]#tf.slice(y_true, [0, 0, 3], [-1, -1, 4])
    truth_m_tensor = K.expand_dims(y_true[:,:,5],2)#tf.slice(y_true, [0, 0, 5], [-1, -1, 5])
    truth_v_tensor = K.expand_dims(y_true[:,:,6],2)#tf.slice(y_true, [0, 0, 6], [-1, -1, 6])

    pred_conf_tensor = K.expand_dims(y_pred[:,:,0],2)#tf.slice(y_pred, [0, 0, 0], [-1, -1, 0])
    #pred_conf_tensor = K.tanh(pred_conf_tensor)
    pred_xy_tensor = y_pred[:,:,1:3]#tf.slice(y_pred, [0, 0, 1], [-1, -1, 2])
    pred_wh_tensor = y_pred[:,:,3:5]#tf.slice(y_pred, [0, 0, 3], [-1, -1, 4])
    pred_m_tensor = K.expand_dims(y_pred[:,:,5],2)#tf.slice(y_pred, [0, 0, 5], [-1, -1, 5])
    pred_v_tensor = K.expand_dims(y_pred[:,:,6],2)#tf.slice(y_pred, [0, 0, 6], [-1, -1, 6])

    truth_xy_tensor = tf.Print(truth_xy_tensor, [truth_xy_tensor[:, 14:20, 0]], message='truth_xy', summarize=30)
    pred_xy_tensor = tf.Print(pred_xy_tensor, [pred_xy_tensor[:, 14:20, 0]], message='pred_xy', summarize=30)

    tens = K.greater(K.sigmoid(truth_conf_tensor), 0.5)
    tens_2d = K.concatenate([tens,tens], axis=-1)

    conf_loss = yolo_conf_loss(truth_conf_tensor, pred_conf_tensor,tens)
    xy_loss = yoloxyloss(truth_xy_tensor,pred_xy_tensor,tens_2d)
    wh_loss = yolo_wh_loss(truth_wh_tensor,pred_wh_tensor,tens_2d)
    m_loss = yolo_regressor_loss(truth_m_tensor,pred_m_tensor,tens)
    v_loss = yolo_regressor_loss(truth_v_tensor,pred_v_tensor,tens)

    loss = 2.0 * conf_loss + 0.25 * xy_loss + 0.25 * wh_loss + 1.5 * m_loss + 1.25 * v_loss # loss v1
    #loss = 2.0 * conf_loss + 0.1 * xy_loss + 1.0 * wh_loss + 5.0 * m_loss + 2.5 * v_loss  # loss v2


    return loss
Esempio n. 7
0
 def step(self, x, states):
     
     r_tm1, V_tm1,s_tm1,time = states[:4]
     h_tm1 = states[4:]
     
     def print_name_shape(name,x):
         return T.cast( K.sum(theano.printing.Print(name)(x.shape)) * 0,"float32")
     
     
     r_tm1 = r_tm1 +  print_name_shape("out\nr_tm1",r_tm1) + \
                       print_name_shape("V_tm1",V_tm1) + \
                       print_name_shape("s_tm1",s_tm1) + \
                       print_name_shape("x",x) + \
                       print_name_shape("h_tm1_0",h_tm1[0]) + \
                       print_name_shape("h_tm1_1",h_tm1[1]) 
                      
     
     op_t, h_t = self._update_controller( T.concatenate([x, r_tm1], axis=-1),
                                          h_tm1)
           
    # op_t = op_t  + print_name_shape("W_d",self.W_d.get_value()) 
     op_t = op_t + print_name_shape("afterop_t",op_t)
     #op_t = op_t[:,0,:]
     ao = K.dot(op_t, self.W_d)  
     ao = ao +print_name_shape("ao",ao)
     d_t = K.sigmoid( ao + self.b_d)  + print_name_shape("afterop2_t",op_t)
     u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)+ print_name_shape("d_t",op_t)
     v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v) + print_name_shape("u_t",u_t)
     o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) + print_name_shape("v_t",v_t)
     
     o_t = o_t + print_name_shape("afterbulk_t",o_t)
     
     time = time + 1
     V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                          u_t[::,0], v_t,time[0],stack=self.stack)
     
     #V_t, s_t, r_t = V_tm1,s_tm1,T.sum(V_tm1,axis = 1)
     V_t  = V_t + print_name_shape("o_t",o_t) + \
                       print_name_shape("r_t",r_t) + \
                       print_name_shape("V_t",V_t) +\
                       print_name_shape("s_t",s_t) 
                     # T.cast( theano.printing.Print("time")(time[0]),"float32")
     #time = T.set_subtensor(time[0],time[0] +)
     
     
    
     return o_t, [r_t, V_t, s_t, time] + h_t
Esempio n. 8
0
def sigmoid(x):
    """
    Sigmoid activation function.

    >>> sigmoid(0)
    0.5
    """
    return K.eval(K.sigmoid(K.variable(x))).tolist()
Esempio n. 9
0
File: stack.py Progetto: dytmas/seya
    def step(self, x, states):
        r_tm1, V_tm1,s_tm1,time = states[:4]
        h_tm1 = states[4:]

        op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1),
                                             h_tm1)
              
        d_t = K.sigmoid( K.dot(op_t, self.W_d)  + self.b_d)  
        u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)
        v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v)
        o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) 
        
        time = time + 1
        V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], 
                                             u_t[::,0], v_t,time[0],stack=self.stack)

        return o_t, [r_t, V_t, s_t, time] + h_t
Esempio n. 10
0
    def call(self, x, mask=None):
        N_DECISION = (2 ** (self.n_depth)) - 1  # Number of decision nodes
        N_LEAF  = 2 ** (self.n_depth + 1)  # Number of leaf nodes

        flat_decision_p_e = []
        leaf_p_e = []
        for w_d, w_l in zip(self.w_d_ensemble, self.w_l_ensemble):

            decision_p = K.sigmoid((K.dot(x, w_d)))
            leaf_p = K.softmax(w_l)

            decision_p_comp = 1 - decision_p

            decision_p_pack = K.concatenate([decision_p, decision_p_comp])

            flat_decision_p_e.append(decision_p_pack)
            leaf_p_e.append(leaf_p)

        #Construct tiling pattern for decision probability matrix
        #Could be done in TF, but I think it's better statically
        tiling_pattern = np.zeros((N_LEAF, self.n_depth), dtype=np.int32)
        comp_offset = N_DECISION
        dec_idx = 0
        for n in xrange(self.n_depth):
            j = 0
            for depth_idx in xrange(2**n):
                repeat_times = 2 ** (self.n_depth - n)
                for _ in xrange(repeat_times):
                    tiling_pattern[j][n] = dec_idx 
                    j = j + 1

                for _ in xrange(repeat_times):
                    tiling_pattern[j][n] = comp_offset + dec_idx 
                    j = j + 1

                dec_idx = dec_idx + 1

        flat_pattern = tiling_pattern.flatten()

        # iterate over each tree
        tree_ret = None
        for flat_decision_p, leaf_p in zip(flat_decision_p_e, leaf_p_e):
            flat_mu = tf.transpose(tf.gather(tf.transpose(flat_decision_p), flat_pattern))
            
            batch_size = tf.shape(flat_decision_p)[0]
            shape = tf.pack([batch_size, N_LEAF, self.n_depth])

            mu = K.reshape(flat_mu, shape)
            leaf_prob = K.prod(mu, [2])
            prob_label = K.dot(leaf_prob, leaf_p)

            if tree_ret is None:
              tree_ret = prob_label
            else:
              tree_ret = tree_ret + prob_label

        return tree_ret/self.n_trees
def pair_loss(y_true, y_pred):
    y_true = tf.cast(y_true, tf.int32)
    parts = tf.dynamic_partition(y_pred, y_true, 2)
    y_pos = parts[1]
    y_neg = parts[0]
    y_pos = tf.expand_dims(y_pos, 0)
    y_neg = tf.expand_dims(y_neg, -1)
    out = K.sigmoid(y_neg - y_pos)
    return K.mean(out)
Esempio n. 12
0
    def triplet_loss(self, y_true, y_pred):
        y_pred = K.sigmoid(y_pred)

        p_plus = K.sum(y_true * y_pred, axis=1, keepdims=True)
        p_gaps = y_pred - p_plus + self.margin

        L = K.maximum(0, p_gaps)
        # return T.max(L, axis=1)
        return K.sum(L, axis=1)
def bpr_triplet_loss(X):

    positive_item_latent, negative_item_latent, user_latent = X

    # BPR loss
    loss = 1.0 - K.sigmoid(
        K.sum(user_latent * positive_item_latent, axis=-1, keepdims=True) -
        K.sum(user_latent * negative_item_latent, axis=-1, keepdims=True))

    return loss
    def get_gate(self, state_j, key_j, inputs):
        """
        Implements the gate (scalar for each block). Equation 2:

        g_j <- \sigma(s_t^T h_j + s_t^T w_j)

        """
        a = tf.reduce_sum(inputs * state_j, axis=1)
        b = tf.reduce_sum(inputs * key_j, axis=1)
        return K.sigmoid(a + b)
def bpr_triplet_loss(X):

    user_latent, item_latent = X.values()
    positive_item_latent, negative_item_latent = item_latent.values()

    # BPR loss
    loss = - K.sigmoid(K.sum(user_latent * positive_item_latent, axis=-1, keepdims=True)
                       - K.sum(user_latent * negative_item_latent, axis=-1, keepdims=True))

    return loss
def yolo_conf_loss(y_true, y_pred, t):
    real_y_true = tf.where(t, y_true, K.zeros_like(y_true))
    pobj = K.sigmoid(y_pred)
    lo = K.square(real_y_true - pobj)
    value_if_true = 5.0 * (lo)
    value_if_false = 0.05 * (lo)
    loss1 = tf.where(t, value_if_true, value_if_false)

    loss = K.mean(loss1)
    return loss
Esempio n. 17
0
	def __call__(self, loss):
		if not hasattr(self, 'layer'):
			raise Exception('Need to call `set_layer` on '
							'ActivityRegularizer instance '
							'before calling the instance.')
		regularized_loss = loss
		for i in range(len(self.layer.inbound_nodes)):
			output = K.sigmoid(0.1 * self.layer.get_output_at(i))
			#output = self.layer.get_output_at(i)
			p_hat = K.mean(K.abs(output))
			regularized_loss += self.l * kl_divergence(self.p, p_hat)
		return K.in_train_phase(regularized_loss, loss)
Esempio n. 18
0
    def step(self, x, states):
        h, [h, c] = self.layer.step(x, states)
        attention = states[4]

        m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a)
        s = K.sigmoid(K.dot(m, self.U_s) + self.b_s)

        if self.single_attention_param:
            h = h * K.repeat_elements(s, self.layer.output_dim, axis=1)
        else:
            h = h * s

        return h, [h, c]
Esempio n. 19
0
    def sample_h_given_x(self, x):
        """
        Draw sample from p(h|x).

        For Bernoulli RBM the conditional probability distribution can be derived to be 
           p(h_j=1|x) = sigmoid(x^T W[:,j] + bh_j).
        """
        h_pre = K.dot(x, self.W) + self.bh          # pre-sigmoid (used in cross-entropy error calculation for better numerical stability)
        h_sigm = K.sigmoid(h_pre)              # mean of Bernoulli distribution ('p', prob. of variable taking value 1), sometimes called mean-field value
        h_samp = random_binomial(shape=h_sigm.shape, n=1, p=h_sigm)
                                                    # random sample
                                                    #   \hat{h} = 1,      if p(h=1|x) > uniform(0, 1)
                                                    #             0,      otherwise
        # pre and sigm are returned to compute cross-entropy
        return h_samp, h_pre, h_sigm
Esempio n. 20
0
    def step(self, x, states):
        h, [h, c] = super(AttentionLSTM, self).step(x, states)
        attention = states[4]

        m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a)
        # Intuitively it makes more sense to use a sigmoid (was getting some NaN problems
        # which I think might have been caused by the exponential function -> gradients blow up)
        s = K.sigmoid(K.dot(m, self.U_s) + self.b_s)

        if self.single_attention_param:
            h = h * K.repeat_elements(s, self.output_dim, axis=1)
        else:
            h = h * s

        return h, [h, c]
Esempio n. 21
0
    def sample_x_given_h(self, h):
        """
        Draw sample from p(x|h).

        For Bernoulli RBM the conditional probability distribution can be derived to be 
           p(x_i=1|h) = sigmoid(W[i,:] h + bx_i).
        """

        x_pre = K.dot(h, self.W.T) + self.bx        # pre-sigmoid (used in cross-entropy error calculation for better numerical stability)
        x_sigm = K.sigmoid(x_pre)              # mean of Bernoulli distribution ('p', prob. of variable taking value 1), sometimes called mean-field value
        x_samp = random_binomial(shape=x_sigm.shape, n=1, p=x_sigm)
                                                    # random sample
                                                    #   \hat{x} = 1,      if p(x=1|h) > uniform(0, 1)
                                                    #             0,      otherwise
        # pre and sigm are returned to compute cross-entropy
        return x_samp, x_pre, x_sigm
Esempio n. 22
0
    def call(self, inputs, mask=None):
        if not isinstance(inputs, list) or len(inputs) <= 1:
            raise TypeError('Attention must be called on a list of tensors '
                            '(at least 2). Got: ' + str(inputs))
        # (None(batch), MaxLen(time), spec_dim, embed_dim)
        mix_embed_l = inputs[0]
        # (None(batch), embed_dim)
        spk_embed_l = inputs[1]
        energy = None
        if self.mode == 'dot':
            # (batch, time, spec_dim, embed_dim) batch_dot(3,1) (batch, embed_dim) = (batch, time, spec_dim)
            energy = K.batch_dot(mix_embed_l, spk_embed_l, axes=(3, 1))
        elif self.mode == 'align':
            # (batch, time, spec_dim, embed_dim) dot (embed_dim, align_hidden)
            # -> (batch, time, spec_dim, align_hidden)
            hUa = K.dot(mix_embed_l, self.U_align)
            # (batch, embed_dim) dot (embed_dim, align_hidden)
            # -> (batch, align_hidden)
            sWa = K.dot(spk_embed_l, self.W_align)
            sWa = sWa.dimshuffle(0, 'x', 'x', 1)
            # -> (batch, time, spec_dim, align_hidden)
            tanh_sWahUa = K.tanh(sWa+hUa)
            # -> (batch, time, spec_dim, align_hidden) dot (align_hidden, 1)
            # -> (batch, time, spec_dim, 1)
            energy = K.dot(tanh_sWahUa, self.v_align)
            # -> (batch, time, spec_dim)
            energy = K.reshape(energy, (-1, self.time_step, self.spec_dim))
        else:
            raise ValueError('Unknown merge mode.')

        if self.nonlinearity == 'sigmoid':
            alpha = K.sigmoid(energy)
        elif self.nonlinearity == 'linear':
            alpha = energy
        else:
            raise Exception('Unknown nonlinearity mode for attention:'+self.nonlinearity)
        # (batch, time, spec_dim)
        return alpha
Esempio n. 23
0
    def build(self, input_shape=None):
        self.input_spec = InputSpec(shape=input_shape)
        if not self.layer.built:
            self.layer.build(input_shape)
            self.layer.built = True
        super(ConcreteDropout, self).build()  # this is very weird.. we must call super before we add new losses

        # initialise p
        self.p_logit = self.layer.add_weight(name='p_logit',
                                            shape=(1,),
                                            initializer=initializers.RandomUniform(self.init_min, self.init_max),
                                            trainable=True)
        self.p = K.sigmoid(self.p_logit[0])

        # initialise regulariser / prior KL term
        input_dim = np.prod(input_shape[1:])  # we drop only last dim
        weight = self.layer.kernel
        kernel_regularizer = self.weight_regularizer * K.sum(K.square(weight)) / (1. - self.p)
        dropout_regularizer = self.p * K.log(self.p)
        dropout_regularizer += (1. - self.p) * K.log(1. - self.p)
        dropout_regularizer *= self.dropout_regularizer * input_dim
        regularizer = K.sum(kernel_regularizer + dropout_regularizer)
        self.layer.add_loss(regularizer)
Esempio n. 24
0
    def concrete_dropout(self, x):
        '''
        Concrete dropout - used at training time (gradients can be propagated)
        :param x: input
        :return:  approx. dropped out input
        '''
        eps = K.cast_to_floatx(K.epsilon())
        temp = 0.1

        unif_noise = K.random_uniform(shape=K.shape(x))
        drop_prob = (
            K.log(self.p + eps)
            - K.log(1. - self.p + eps)
            + K.log(unif_noise + eps)
            - K.log(1. - unif_noise + eps)
        )
        drop_prob = K.sigmoid(drop_prob / temp)
        random_tensor = 1. - drop_prob

        retain_prob = 1. - self.p
        x *= random_tensor
        x /= retain_prob
        return x
Esempio n. 25
0
 def attention_step(self, x, args):
     ''' Attention step function
     #Arguments
         args: [h_,
                 context, projected_context, 
                 W_h_prj, 
                 w_prj_att, b_att,
                 W_x_h, U_h_h, W_ctx_h, b_h
                 W_x_p, W_h_p, W_ctx_p, b_p].
             h_: (batch_size, dim_hidden)
             context: (batch_size, nb_context, dim_context)
             projected_context: (batch_size, nb_context, dim_projected_context)
                 projected_context = context dot W_ctx_prj + b_ctx_prj
                 calculated before step.
             W_h_prj: (dim_hidden, dim_projected_context)
             w_prj_att: (dim_projected_context, 1)
             b_att: (1,)
             W_x_h: (dim_embedding, dim_hidden)
             U_h_h: (dim_hidden, dim_hidden)
             W_ctx_h: (dim_context, dim_hidden)
             b_h: (dim_hidden,)                
     '''
     assert len(args) == 1 + len(self.contexts) + len(self.params)
     [h_, context, projected_context, 
      W_h_prj, 
      w_prj_att, b_att,
      W_x_h, U_h_h, W_ctx_h, b_h] = args
     
     projected = K.expand_dims(K.dot(h_, W_h_prj), 1) + projected_context
     e = K.dot(K.tanh(projected), w_prj_att) + b_att
     alpha = K.softmax(K.flatten(e))
     weighted_context = K.sum((context * K.expand_dims(alpha)), 1)
     pre_act = K.dot(x, W_x_h) + K.dot(h_, U_h_h) + K.dot(weighted_context, W_ctx_h) + b_h
     h = K.sigmoid(pre_act)
     
 
     return h, [alpha, weighted_context], [h]
Esempio n. 26
0
 def call(self, inputs):
     return inputs * K.sigmoid(self.beta * inputs)
Esempio n. 27
0
def logsigmoid(x):
    x = K.sigmoid(x)
    x = K.log(x)
    return x
def scaled_sigmoid(x):
    """ Sigmoid scaled by 2 pi """
    return K.tf.constant(2*np.pi) * K.sigmoid(x)
Esempio n. 29
0
 def calc_score(context_word, context_word_sense, W_g, W_s):
     return K.sigmoid(K.dot(get_vector(word, word_sense, W_g, W_s),get_vector(context_word, context_word_sense, W_g, W_s)))
Esempio n. 30
0
def Margin_Loss(y_true, y_pred):
    score_best = y_pred[0]
    score_predict = y_pred[1]
    loss = K.maximum(0.0, 1.0 - K.sigmoid(score_best - score_predict))
    return K.mean(loss) + 0 * y_true
 def swish(x):
     return x * backend.sigmoid(x)
Esempio n. 32
0
def swish(x):
    return (K.sigmoid(x) * x)
Esempio n. 33
0
    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y]) #grid得到每个单元格与相对左上角的偏移量。
	'''
	(0,0)位置的cell相对于左上角,在X和Y的方向偏移量都是0,(1,0)位置的cell,在Y轴上有一个cell的偏移,在X轴方向上没有。
	以此类推,grid就记录每个box相对于左上角的偏移量,每个cell加上grid就得到每个box相对于cell的坐标
	'''
    grid = K.cast(grid, K.dtype(feats))
    #把模型的输出重新reshape,原始输出shape是[batchsize, grid_shape[0], grid_shape[1],num_anchors*(num_classes + 5)]
    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))#计算bx=cx+tx
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) #计算转换后的wh 这些计算公式来自yolov2的论文
    box_confidence = K.sigmoid(feats[..., 4:5])#检测出对象的置信程度
    box_class_probs = K.sigmoid(feats[..., 5:])#对象的类别概率

    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs

#取得真正的坐标信息,根据图片实际大小转换后的box真实坐标
def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
    box_yx = box_xy[..., ::-1]#将列的x,y以颠倒
    box_hw = box_wh[..., ::-1]#将列的w,h颠倒
    input_shape = K.cast(input_shape, K.dtype(box_yx))
    image_shape = K.cast(image_shape, K.dtype(box_yx))
    new_shape = K.round(image_shape * K.min(input_shape/image_shape))
def sigmoid_neg(x):
    return K.sigmoid(x) - 0.5
Esempio n. 35
0
def swish_activation(x):
    return (K.sigmoid(x) * x)
def swish(x):
    return (K.sigmoid(x) * x)
Esempio n. 37
0
    def decode(yolo_outputs):
        num_scales = len(yolo_outputs)
        anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]
                       ] if num_scales == 3 else [[3, 4, 5], [1, 2, 3]]

        b_min_max_list = []
        box_confidence_list = []
        class_probs_list = []

        for scale_idx in np.arange(3):
            anchors = all_anchors[anchor_mask[scale_idx]]
            output = yolo_outputs[scale_idx]
            num_anchors = len(anchors)

            batch_size = K.shape(output)[0]
            grid_shape = K.shape(output)[1:3]
            grid_height = grid_shape[0]  # height
            grid_width = grid_shape[1]  # width

            # reshape to tensor of dimensions batch_size, grid_height, grid_width, num_anchors, 5 + num_classes
            # the five box parameters are:
            #   t_x, t_y determine the center point of the box
            #   t_w, t_h determine the width and height of the box
            #   the box confidence indicates the confidence that box contains an object and box is accurate
            output = K.reshape(
                output,
                [-1, grid_height, grid_width, num_anchors, 5 + num_classes])

            # compute b_x, b_y for each cell and each anchor
            c_x = K.tile(K.reshape(K.arange(grid_width), [1, -1, 1, 1]),
                         [grid_height, 1, num_anchors, 1])
            c_y = K.tile(K.reshape(K.arange(grid_height), [-1, 1, 1, 1]),
                         [1, grid_width, num_anchors, 1])
            c_xy = K.concatenate([c_x, c_y])
            c_xy = K.cast(c_xy, K.dtype(output))
            b_xy = (K.sigmoid(output[..., :2]) + c_xy) / K.cast(
                grid_shape[::-1], K.dtype(output))

            # compute b_w and b_h for each cell and each anchor
            p_wh = K.tile(
                K.reshape(K.constant(anchors), [1, 1, num_anchors, 2]),
                [grid_height, grid_width, 1, 1])
            b_wh = p_wh * K.exp(output[..., 2:4]) / K.cast(
                input_shape[::-1], K.dtype(output))

            b_min_max = K.reshape(convert_box_params(
                b_xy, b_wh), [batch_size, -1, 4])  # y_min, x_min, y_max, x_max

            # compute box confidence for each cell and each anchor
            box_confidence = K.reshape(K.sigmoid(output[..., 4]),
                                       [batch_size, -1])

            # compute class probabilities for each cell and each anchor
            class_probs = K.reshape(K.sigmoid(output[..., 5:]),
                                    [batch_size, -1, num_classes])

            b_min_max_list.append(b_min_max)
            box_confidence_list.append(box_confidence)
            class_probs_list.append(class_probs)

        return [
            K.concatenate(b_min_max_list, axis=1),
            K.concatenate(box_confidence_list, axis=1),
            K.concatenate(class_probs_list, axis=1)
        ]
Esempio n. 38
0
 def call(self, inputs):
     a = K.cast(self.a, dtype=K.dtype(inputs))
     P = (K.sigmoid(a * (K.mean(inputs, axis=(1, 2)) - self.b)) - K.sigmoid(
         -a * self.b)) / (K.sigmoid(a *
                                    (1. - self.b)) - K.sigmoid(-a * self.b))
     return P
Esempio n. 39
0
File: demo.py Progetto: sanixa/align
 def call(self, inputs, mode='positive'):
     if mode == 'positive':
         scale = self.tau + (1 - self.tau) * K.sigmoid(self.scale)
     else:
         scale = (1 - self.tau) * K.sigmoid(-self.scale)
     return inputs * K.sqrt(scale)
Esempio n. 40
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]),
                               axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum +
                        coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss
Esempio n. 41
0
    def call(self, x, mask=None):
        #~~~~~~~~~~~Global Attention
        if self.attention_type == 'global':

            #eij = K.tanh(K.dot(x, self.W)) #worked but this is self
            eij = K.dot(x, self.W)

            if self.bias:
                eij += self.b
            eij = dot_product(eij, self.u)
            ai = K.exp(eij)

            weights = ai / K.cast(K.sum(ai, axis=1, keepdims=True),
                                  K.floatx())  #+ K.epsilon()

            weights = K.expand_dims(weights)
            weighted_input = x * weights

            if self.return_coefficients:
                return [K.sum(weighted_input, axis=1), weights]
            else:
                return K.sum(weighted_input, axis=1)

#~~~~~~~~~~~Local Attention

        elif self.attention_type == 'local':

            eij = K.dot(x, self.W)

            if self.bias:
                eij += self.b
            eij = dot_product(eij, self.u)

            D = 4
            pt = 5
            ii = np.array(range(np.shape(x)[-1]), dtype='float32')
            ai = K.exp(eij + (-1.0 * (ii - pt)**2 / (D * D / 2.0)))

            ai_local = K.exp(eij)
            ai_local = ai_local[:, pt - D:pt + D]
            weights = ai / K.cast(K.sum(ai_local, axis=1, keepdims=True),
                                  K.floatx())

            weights = K.expand_dims(weights)
            weighted_input = x * weights

            if self.return_coefficients:
                return [K.sum(weighted_input, axis=1), weights]
            else:
                return K.sum(weighted_input, axis=1)

########################################

        elif self.attention_type == 'localFULL':

            eij = K.dot(x, self.W)

            if self.bias:
                eij += self.b
            eij = dot_product(eij, self.u)

            D = 3  # local atention window size user defined

            pt = int(np.shape(x)[-1]) * K.eye(int(np.shape(x)[-1]))  #Tx
            sig = K.sigmoid(K.tanh(eij))  #sigmoid part
            pt = pt * sig  #Tx*sigmoid

            print(K.eval(pt))

            ii = np.array(range(np.shape(x)[-1]), dtype='float32')
            ai = K.exp(eij + (-1.0 * (ii - pt)**2 / (D * D / 2.0)))

            ai_local = K.exp(eij)
            ai_local = K.dot(ai_local, K.reshape(pt, (-1, -1)))

            weights = ai / K.cast(K.sum(ai_local, axis=1, keepdims=True),
                                  K.floatx())

            weights = K.expand_dims(weights)
            weighted_input = x * weights

            if self.return_coefficients:
                return [K.sum(weighted_input, axis=1), weights]
            else:
                return K.sum(weighted_input, axis=1)

#~~~~~~~~~~~Self Attention
        elif self.attention_type == 'self':

            eij = K.tanh(K.dot(x, self.W))

            if self.bias:
                eij += self.b
            eij = dot_product(eij, self.u)
            ai = K.exp(eij)

            weights = ai / K.cast(K.sum(ai, axis=1, keepdims=True), K.floatx())

            weights = K.expand_dims(weights)
            weighted_input = x * weights

            if self.return_coefficients:
                return [K.sum(weighted_input, axis=1), weights]
            else:
                return K.sum(weighted_input, axis=1)

        else:
            pass
Esempio n. 42
0
    def IoU(self, y_true, y_pred):
        '''IoU metric'''
        (yolo_output, true_boxes, detectors_mask,
         matching_true_boxes) = self.args

        num_anchors = len(self.anchors)

        # pred_*.shape = (n_images,13,13,n_boxes,1 or 2) 1 = conf/class 2 = xy or wh
        pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
            yolo_output, self.anchors, self.num_classes)

        #pred_xy = tf.Print(pred_xy,[tf.shape(pred_xy)[:]],message='PRED XY')

        # Unadjusted box predictions for loss.
        # TODO: Remove extra computation shared with yolo_head.
        yolo_output_shape = K.shape(yolo_output)
        feats = K.reshape(yolo_output, [
            -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
            self.num_classes + 5
        ])
        pred_boxes = K.concatenate(
            (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)

        # TODO: Adjust predictions by image width/height for non-square images?
        # IOUs may be off due to different aspect ratio.

        # Expand pred x,y,w,h to allow comparison with ground truth.
        # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
        pred_xy = K.expand_dims(pred_xy, 4)
        pred_wh = K.expand_dims(pred_wh, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        true_boxes_shape = K.shape(true_boxes)

        # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
        true_boxes = K.reshape(true_boxes, [
            true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1],
            true_boxes_shape[2]
        ])
        true_xy = true_boxes[..., 0:2]
        true_wh = true_boxes[..., 2:4]

        # Find IOU of each predicted box with each ground truth box.
        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        intersect_mins = K.maximum(pred_mins, true_mins)
        intersect_maxes = K.minimum(pred_maxes, true_maxes)
        intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
        true_areas = true_wh[..., 0] * true_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = intersect_areas / union_areas
        #iou_scores = tf.Print(iou_scores,[tf.shape(iou_scores)[:]],message='IOU SCORES')

        # Best IOUs for each location.
        best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
        best_ious = K.expand_dims(best_ious)

        #best_ious = tf.Print(best_ious,[tf.shape(best_ious)],message='BEST IOU SCORE')

        # A detector has found an object if IOU > thresh for some true box.
        object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))
        #object_detections = tf.Print(object_detections,[tf.shape(object_detections)],message = 'OBJECT DETECTION')

        total_IoU = K.sum(object_detections)

        return total_IoU
Esempio n. 43
0
def yolo_loss(args,
              anchors,
              num_classes,
              rescore_confidence=False,
              print_loss=False):
    """YOLO localization loss function.

    Parameters
    ----------
    yolo_output : tensor
        Final convolutional layer features.

    true_boxes : tensor
        Ground truth boxes tensor with shape [batch, num_true_boxes, 5]
        containing box x_center, y_center, width, height, and class.

    detectors_mask : array
        0/1 mask for detector positions where there is a matching ground truth.

    matching_true_boxes : array
        Corresponding ground truth boxes for positive detector positions.
        Already adjusted for conv height and width.

    anchors : tensor
        Anchor boxes for model.

    num_classes : int
        Number of object classes.

    rescore_confidence : bool, default=False
        If true then set confidence target to IOU of best predicted box with
        the closest matching ground truth box.

    print_loss : bool, default=False
        If True then use a tf.Print() to print the loss components.

    Returns
    -------
    mean_loss : float
        mean localization loss across minibatch
    """
    (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args
    num_anchors = len(anchors)
    object_scale = 5
    no_object_scale = 1
    class_scale = 1
    coordinates_scale = 1
    pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
        yolo_output, anchors, num_classes)

    # Unadjusted box predictions for loss.
    # TODO: Remove extra computation shared with yolo_head.
    yolo_output_shape = K.shape(yolo_output)
    feats = K.reshape(yolo_output, [
        -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
        num_classes + 5
    ])
    pred_boxes = K.concatenate(
        (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)

    # TODO: Adjust predictions by image width/height for non-square images?
    # IOUs may be off due to different aspect ratio.

    # Expand pred x,y,w,h to allow comparison with ground truth.
    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    pred_xy = K.expand_dims(pred_xy, 4)
    pred_wh = K.expand_dims(pred_wh, 4)

    pred_wh_half = pred_wh / 2.
    pred_mins = pred_xy - pred_wh_half
    pred_maxes = pred_xy + pred_wh_half

    true_boxes_shape = K.shape(true_boxes)

    # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
    true_boxes = K.reshape(true_boxes, [
        true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2]
    ])
    true_xy = true_boxes[..., 0:2]
    true_wh = true_boxes[..., 2:4]

    # Find IOU of each predicted box with each ground truth box.
    true_wh_half = true_wh / 2.
    true_mins = true_xy - true_wh_half
    true_maxes = true_xy + true_wh_half

    intersect_mins = K.maximum(pred_mins, true_mins)
    intersect_maxes = K.minimum(pred_maxes, true_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

    pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
    true_areas = true_wh[..., 0] * true_wh[..., 1]

    union_areas = pred_areas + true_areas - intersect_areas
    iou_scores = intersect_areas / union_areas

    # Best IOUs for each location.
    best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
    best_ious = K.expand_dims(best_ious)

    # A detector has found an object if IOU > thresh for some true box.
    object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))

    # TODO: Darknet region training includes extra coordinate loss for early
    # training steps to encourage predictions to match anchor priors.

    # Determine confidence weights from object and no_object weights.
    # NOTE: YOLO does not use binary cross-entropy here.
    no_object_weights = (no_object_scale * (1 - object_detections) *
                         (1 - detectors_mask))
    no_objects_loss = no_object_weights * K.square(-pred_confidence)

    if rescore_confidence:
        objects_loss = (object_scale * detectors_mask *
                        K.square(best_ious - pred_confidence))
    else:
        objects_loss = (object_scale * detectors_mask *
                        K.square(1 - pred_confidence))
    confidence_loss = objects_loss + no_objects_loss

    # Classification loss for matching detections.
    # NOTE: YOLO does not use categorical cross-entropy loss here.
    matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
    matching_classes = K.one_hot(matching_classes, num_classes)
    classification_loss = (class_scale * detectors_mask *
                           K.square(matching_classes - pred_class_prob))

    # Coordinate loss for matching detection boxes.
    matching_boxes = matching_true_boxes[..., 0:4]
    coordinates_loss = (coordinates_scale * detectors_mask *
                        K.square(matching_boxes - pred_boxes))

    confidence_loss_sum = K.sum(confidence_loss)
    classification_loss_sum = K.sum(classification_loss)
    coordinates_loss_sum = K.sum(coordinates_loss)
    total_loss = 0.5 * (
        confidence_loss_sum + classification_loss_sum + coordinates_loss_sum)
    if print_loss:
        total_loss = tf.Print(
            total_loss, [
                total_loss, confidence_loss_sum, classification_loss_sum,
                coordinates_loss_sum
            ],
            message='yolo_loss, conf_loss, class_loss, box_coord_loss:')

    return total_loss
Esempio n. 44
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])
    # conv_height_index是某一feats的左上角格子高度坐标

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)

    conv_width_index = K.tile(K.expand_dims(conv_width_index, 0),
                              [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    # conv_width_index = K.tile(conv_width_index, [conv_dims[0]])

    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))
    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    # 在整张图的相对位置
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_xy, box_wh, box_confidence, box_class_probs, feats
Esempio n. 45
0
def cross_entropy_loss(y_true, y_pred):
    #print np.shape(y_true), np.shape(y_pred)
    return -K.mean(
        K.log(K.sigmoid(K.clip(K.sum(y_pred, axis=1) * y_true, -6, 6))))
Esempio n. 46
0
def swish(x, beta = 1):
    return (x * sigmoid(beta * x))
Esempio n. 47
0
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5+3+3*BIN])

    # Adjust preditions to each spatial grid point and anchor size.
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4])
    # 3d dim noline 
    #box_dim = K.exp(feats[..., 5:8]) ##no exp 
    #box_dim = K.sigmoid(feats[..., 5:8]) ##no exp 
    box_dim = feats[..., 5:8] ##no exp 
    box_3d_conf = K.sigmoid(feats[..., 8:10])
    #box_3d_cossin = K.l2_normalize(K.reshape(feats[..., 10:14], [-1,-1,-1, BIN, 2]))
    box_3d_cossin = K.l2_normalize(feats[..., 10:14])
    #K.sigmoid(feats[..., 8:14])
    box_class_probs = K.sigmoid(feats[..., 14:])

    if calc_loss == True:
        return grid, feats, box_xy, box_wh, box_dim, box_3d_conf, box_3d_cossin
    return box_xy, box_wh, box_confidence, box_dim, box_3d_conf, box_3d_cossin, box_class_probs
Esempio n. 48
0
 def score_loss(y_true, y_pred):
     preds = K.flatten(K.sigmoid(y_pred - 1))
     trues = K.flatten(y_true)
     return K.square(preds - trues)
Esempio n. 49
0
def yolo_head(feats, anchors, num_classes):
    """Convert final layer features to bounding box parameters.

    Parameters
    ----------
    feats : tensor
        Final convolutional layer features.
    anchors : array-like
        Anchor box widths and heights.
    num_classes : int
        Number of target classes.

    Returns
    -------
    box_xy : tensor
        x, y box predictions adjusted by spatial location in conv layer.
    box_wh : tensor
        w, h box predictions adjusted by anchors and conv spatial resolution.
    box_conf : tensor
        Probability estimate for whether each box contains any object.
    box_class_pred : tensor
        Probability distribution estimate for each box over class labels.
    """
    num_anchors = len(anchors)
    # Reshape to batch, height, width, num_anchors, box_params.
    anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2])

    # Static implementation for fixed models.
    # TODO: Remove or add option for static implementation.
    # _, conv_height, conv_width, _ = K.int_shape(feats)
    # conv_dims = K.variable([conv_width, conv_height])

    # Dynamic implementation of conv dims for fully convolutional model.
    conv_dims = K.shape(feats)[1:3]  # assuming channels last
    # In YOLO the height index is the inner most iteration.
    conv_height_index = K.arange(0, stop=conv_dims[0])
    conv_width_index = K.arange(0, stop=conv_dims[1])
    conv_height_index = K.tile(conv_height_index, [conv_dims[1]])

    # TODO: Repeat_elements and tf.split doesn't support dynamic splits.
    # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0)
    conv_width_index = K.tile(
        K.expand_dims(conv_width_index, 0), [conv_dims[0], 1])
    conv_width_index = K.flatten(K.transpose(conv_width_index))
    conv_index = K.transpose(K.stack([conv_height_index, conv_width_index]))
    conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2])
    conv_index = K.cast(conv_index, K.dtype(feats))

    feats = K.reshape(
        feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5])
    conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats))

    # Static generation of conv_index:
    # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)])
    # conv_index = conv_index[:, [1, 0]]  # swap columns for YOLO ordering.
    # conv_index = K.variable(
    #     conv_index.reshape(1, conv_height, conv_width, 1, 2))
    # feats = Reshape(
    #     (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats)

    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.softmax(feats[..., 5:])

    # Adjust preditions to each spatial grid point and anchor size.
    # Note: YOLO iterates over height index before width index.
    box_xy = (box_xy + conv_index) / conv_dims
    box_wh = box_wh * anchors_tensor / conv_dims

    return box_xy, box_wh, box_confidence, box_class_probs
Esempio n. 50
0
    def classification_loss(self, y_true, y_pred):
        '''Classification loss metric'''
        (yolo_output, true_boxes, detectors_mask,
         matching_true_boxes) = self.args

        num_anchors = len(self.anchors)
        object_scale = LAMBDA_OBJ
        no_object_scale = LAMBDA_NOOBJ
        class_scale = LAMBDA_CLASS
        coordinates_scale = LAMBDA_COORD
        pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head(
            yolo_output, self.anchors, self.num_classes)

        # Unadjusted box predictions for loss.
        # TODO: Remove extra computation shared with yolo_head.
        yolo_output_shape = K.shape(yolo_output)
        feats = K.reshape(yolo_output, [
            -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors,
            self.num_classes + 5
        ])
        pred_boxes = K.concatenate(
            (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1)

        # TODO: Adjust predictions by image width/height for non-square images?
        # IOUs may be off due to different aspect ratio.

        # Expand pred x,y,w,h to allow comparison with ground truth.
        # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
        pred_xy = K.expand_dims(pred_xy, 4)
        pred_wh = K.expand_dims(pred_wh, 4)

        pred_wh_half = pred_wh / 2.
        pred_mins = pred_xy - pred_wh_half
        pred_maxes = pred_xy + pred_wh_half

        true_boxes_shape = K.shape(true_boxes)

        # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params
        true_boxes = K.reshape(true_boxes, [
            true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1],
            true_boxes_shape[2]
        ])
        true_xy = true_boxes[..., 0:2]
        true_wh = true_boxes[..., 2:4]

        # Find IOU of each predicted box with each ground truth box.
        true_wh_half = true_wh / 2.
        true_mins = true_xy - true_wh_half
        true_maxes = true_xy + true_wh_half

        intersect_mins = K.maximum(pred_mins, true_mins)
        intersect_maxes = K.minimum(pred_maxes, true_maxes)
        intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1]

        pred_areas = pred_wh[..., 0] * pred_wh[..., 1]
        true_areas = true_wh[..., 0] * true_wh[..., 1]

        union_areas = pred_areas + true_areas - intersect_areas
        iou_scores = intersect_areas / union_areas

        #iou_scores = tf.Print(iou_scores,[tf.shape(iou_scores)[:]],message='IOU SCORES')

        # Best IOUs for each location.
        best_ious = K.max(iou_scores, axis=4)  # Best IOU scores.
        best_ious = K.expand_dims(best_ious)

        #best_ious = tf.Print(best_ious,[tf.shape(best_ious)],message='BEST IOU SCORE')

        # A detector has found an object if IOU > thresh for some true box.
        object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious))
        #object_detections = tf.Print(object_detections,[tf.shape(object_detections)],message = 'OBJECT DETECTION')

        # TODO: Darknet region training includes extra coordinate loss for early
        # training steps to encourage predictions to match anchor priors.

        # Determine confidence weights from object and no_object weights.
        # NOTE: YOLO does not use binary cross-entropy here.
        no_object_weights = (no_object_scale * (1 - object_detections) *
                             (1 - detectors_mask))
        no_objects_loss = no_object_weights * K.square(-pred_confidence)

        if self.rescore_confidence:
            objects_loss = (object_scale * detectors_mask *
                            K.square(best_ious - pred_confidence))
        else:
            objects_loss = (object_scale * detectors_mask *
                            K.square(1 - pred_confidence))
        confidence_loss = objects_loss + no_objects_loss

        # Classification loss for matching detections.
        # NOTE: YOLO does not use categorical cross-entropy loss here.
        matching_classes = K.cast(matching_true_boxes[..., 4], 'int32')
        matching_classes = K.one_hot(matching_classes, self.num_classes)
        #matching_classes = tf.Print(matching_classes,[tf.shape(matching_classes)[3:]],message = 'MATCHING CLASSES')

        classification_loss = (class_scale * detectors_mask *
                               K.square(matching_classes - pred_class_prob))

        classification_loss_sum = K.sum(classification_loss)

        return classification_loss_sum
Esempio n. 51
0
def output_lambda(x,
                  init_alpha=1.0,
                  max_beta_value=5.0,
                  scalefactor=None,
                  alpha_kernel_scalefactor=None):
    """Elementwise (Lambda) computation of alpha and regularized beta.

        - Alpha:

            (activation)
            Exponential units seems to give faster training than
            the original papers softplus units. Makes sense due to logarithmic
            effect of change in alpha.
            (initialization)
            To get faster training and fewer exploding gradients,
            initialize alpha to be around its scale when beta is around 1.0,
            approx the expected value/mean of training tte.
            Because we're lazy we want the correct scale of output built
            into the model so initialize implicitly;
            multiply assumed exp(0)=1 by scale factor `init_alpha`.

        - Beta:

            (activation)
            We want slow changes when beta-> 0 so Softplus made sense in the original
            paper but we get similar effect with sigmoid. It also has nice features.
            (regularization) Use max_beta_value to implicitly regularize the model
            (initialization) Fixed to begin moving slowly around 1.0

        - Usage
            .. code-block:: python

                model.add(TimeDistributed(Dense(2)))
                model.add(Lambda(wtte.output_lambda, arguments={"init_alpha":init_alpha, 
                                                        "max_beta_value":2.0
                                                       }))


        :param x: tensor with last dimension having length 2 with x[...,0] = alpha, x[...,1] = beta
        :param init_alpha: initial value of `alpha`. Default value is 1.0.
        :param max_beta_value: maximum beta value. Default value is 5.0.
        :param max_alpha_value: maxumum alpha value. Default is `None`.
        :type x: Array
        :type init_alpha: Float
        :type max_beta_value: Float
        :type max_alpha_value: Float
        :return x: A positive `Tensor` of same shape as input
        :rtype: Array

    """
    if max_beta_value is None or max_beta_value > 3:
        if K.epsilon() > 1e-07 and K.backend() == 'tensorflow':
            # TODO need to think this through lol
            message = "\
            Using tensorflow backend and allowing high `max_beta_value` may lead to\n\
            gradient NaN during training unless `K.epsilon()` is small.\n\
            Call `keras.backend.set_epsilon(1e-08)` to lower epsilon \
            "

            warnings.warn(message)
    if alpha_kernel_scalefactor is not None:
        message = "`alpha_kernel_scalefactor` deprecated in favor of `scalefactor` scaling both.\n Setting `scalefactor = alpha_kernel_scalefactor`"
        warnings.warn(message)
        scalefactor = alpha_kernel_scalefactor

    a, b = _keras_unstack_hack(x)

    if scalefactor is not None:
        # Done after due to theano bug.
        a, b = scalefactor * a, scalefactor * b

    # Implicitly initialize alpha:
    a = init_alpha * K.exp(a)

    if max_beta_value > 1.05:  # some value >>1.0
        # shift to start around 1.0
        # assuming input is around 0.0
        _shift = np.log(max_beta_value - 1.0)

        b = b - _shift

    b = max_beta_value * K.sigmoid(b)

    x = K.stack([a, b], axis=-1)

    return x
def swish(x):
    return x * K.sigmoid(x)