def yolo_head(feats, anchors, num_classes, input_shape): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) return box_xy, box_wh, box_confidence, box_class_probs
def step(self, x, states): r_tm1, V_tm1,s_tm1,time = states[:4] h_tm1 = states[4:] r_tm1 = r_tm1 op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1), h_tm1) # op_t = op_t + print_name_shape("W_d",self.W_d.get_value()) op_t = op_t #op_t = op_t[:,0,:] d_t = K.sigmoid( K.dot(op_t, self.W_d) + self.b_d) u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u) v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v) o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) time = time + 1 V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], u_t[::,0], v_t,time[0],stack=self.stack) return o_t, [r_t, V_t, s_t, time] + h_t
def call(self, x): assert(K.backend() == 'tensorflow') temp = K.permute_dimensions(x, (0, 2, 1)) for i in range(0, self.attention_depth): temp = K.sigmoid(K.dot(temp, self.Ws[i]) + self.bs[i]) temp = K.permute_dimensions(temp, (0, 2, 1)) estimated_weight = K.squeeze(K.dot(temp, K.expand_dims(self.Wf, -1)), -1) biased_weight = estimated_weight + self.bias non_linear_weight = K.tanh(biased_weight) # For each hidded state calculate how much should it contribute # to the context vector. This is the main part of attention. # In order to convert weights to "probabilities" use a sigmoid # based function: exp(x) / sum(exp(xi)). prob = K.exp(non_linear_weight) # Compute the total sum for each batch. total_sum = K.sum(prob, axis=1, keepdims=True) prob /= K.cast(total_sum, K.floatx()) # Enable this if you want access to internal probabilities. # Should only be used for testing that Attention works as expected. # return prob # Multiply each hidden value by the corresponding probability. prob = K.expand_dims(prob, -1) new_hidden_values = x * prob return K.sum(new_hidden_values, axis=1)
def call(self, inputs, **kwargs): assert isinstance(inputs, list) and len(inputs) == 3 first, second, features = inputs[0], inputs[1], inputs[2] if not self.from_logits: first = kb.clip(first, 1e-10, 1.0) second = kb.clip(second, 1e-10, 1.0) first_, second_ = kb.log(first), kb.log(second) else: first_, second_ = first, second # embedded_features.shape = (M, T, 1) if self.use_intermediate_layer: features = kb.dot(features, self.first_kernel) features = kb.bias_add(features, self.first_bias, data_format="channels_last") features = self.intermediate_activation(features) embedded_features = kb.dot(features, self.features_kernel) embedded_features = kb.bias_add( embedded_features, self.features_bias, data_format="channels_last") if self.use_dimension_bias: tiling_shape = [1] * (kb.ndim(first)-1) + [kb.shape(first)[-1]] embedded_features = kb.tile(embedded_features, tiling_shape) embedded_features = kb.bias_add( embedded_features, self.dimensions_bias, data_format="channels_last") sigma = kb.sigmoid(embedded_features) result = weighted_sum(first_, second_, sigma, self.first_threshold, self.second_threshold) probs = kb.softmax(result) if self.return_logits: return [probs, result] return probs
def step(self, x, states): M = states[0] # (nb_samples, nb_slots, memory_size) h = states[1] # (nb_samples, memory_size) w = states[2] # (nb_samples, nb_slots) #------Memory read--------# k = self.W_k(h) # (nb_samples, memory_size) w_hat = T.batched_tensordot(M, k, axes=[(2), (1)]) # (nb_samples, nb_slots) beta = K.sigmoid(self.W_b(h)) # (nb_samples, 1) beta = K.repeat(beta, self.nb_slots) # (nb_samples, nb_slots, 1) beta = K.squeeze(beta, 2) # (nb_samples, nb_slots) w_hat = softmax(w_hat * beta) # (nb_samples, nb_slots) g = sigmoid(self.W_hg(h)) # (nb_samples, 1) g = K.repeat(g, self.nb_slots) # (nb_samples, nb_slots, 1) g = K.squeeze(g, 2) # (nb_samples, nb_slots) w = (1 - g) * w + g * w_hat # (nb_samples, nb_slots) c = T.batched_tensordot(w, M, axes=[(1), (1)]) h = tanh(self.W_ih(x) + self.W_c(c)) y = self.W_ho(h) #---------Memory write---------# v = self.W_v(h) # (nb_samples, memory_size) v = K.repeat(v, 1) e = sigmoid(self.W_he(h)) # (nb_samples, nb_slots) f = 1 - w * e # (nb_samples, nb_slots) f = K.repeat(f, self.memory_size) # (nb_samples, memory_size, nb_slots) f = K.permute_dimensions(f, (0, 2, 1)) # (nb_samples, nb_slots, memory_size) u = w # (nb_samples, nb_slots) u = K.repeat(u, 1) uv = T.batched_tensordot(u, v, axes=[(1), (1)]) M = M * f + uv return y, [M, h, w]
def yolo_v1_loss(y_true, y_pred): # Y_PRED is Batchx40x7 tensor. y_true is a 40x7 tensor truth_conf_tensor = K.expand_dims(y_true[:,:,0],2)#tf.slice(y_true, [0, 0, 0], [-1,-1, 0]) truth_xy_tensor = y_true[:,:,1:3]#tf.slice(y_true, [0, 0, 1], [-1,-1, 2]) truth_wh_tensor = y_true[:,:,3:5]#tf.slice(y_true, [0, 0, 3], [-1, -1, 4]) truth_m_tensor = K.expand_dims(y_true[:,:,5],2)#tf.slice(y_true, [0, 0, 5], [-1, -1, 5]) truth_v_tensor = K.expand_dims(y_true[:,:,6],2)#tf.slice(y_true, [0, 0, 6], [-1, -1, 6]) pred_conf_tensor = K.expand_dims(y_pred[:,:,0],2)#tf.slice(y_pred, [0, 0, 0], [-1, -1, 0]) #pred_conf_tensor = K.tanh(pred_conf_tensor) pred_xy_tensor = y_pred[:,:,1:3]#tf.slice(y_pred, [0, 0, 1], [-1, -1, 2]) pred_wh_tensor = y_pred[:,:,3:5]#tf.slice(y_pred, [0, 0, 3], [-1, -1, 4]) pred_m_tensor = K.expand_dims(y_pred[:,:,5],2)#tf.slice(y_pred, [0, 0, 5], [-1, -1, 5]) pred_v_tensor = K.expand_dims(y_pred[:,:,6],2)#tf.slice(y_pred, [0, 0, 6], [-1, -1, 6]) truth_xy_tensor = tf.Print(truth_xy_tensor, [truth_xy_tensor[:, 14:20, 0]], message='truth_xy', summarize=30) pred_xy_tensor = tf.Print(pred_xy_tensor, [pred_xy_tensor[:, 14:20, 0]], message='pred_xy', summarize=30) tens = K.greater(K.sigmoid(truth_conf_tensor), 0.5) tens_2d = K.concatenate([tens,tens], axis=-1) conf_loss = yolo_conf_loss(truth_conf_tensor, pred_conf_tensor,tens) xy_loss = yoloxyloss(truth_xy_tensor,pred_xy_tensor,tens_2d) wh_loss = yolo_wh_loss(truth_wh_tensor,pred_wh_tensor,tens_2d) m_loss = yolo_regressor_loss(truth_m_tensor,pred_m_tensor,tens) v_loss = yolo_regressor_loss(truth_v_tensor,pred_v_tensor,tens) loss = 2.0 * conf_loss + 0.25 * xy_loss + 0.25 * wh_loss + 1.5 * m_loss + 1.25 * v_loss # loss v1 #loss = 2.0 * conf_loss + 0.1 * xy_loss + 1.0 * wh_loss + 5.0 * m_loss + 2.5 * v_loss # loss v2 return loss
def step(self, x, states): r_tm1, V_tm1,s_tm1,time = states[:4] h_tm1 = states[4:] def print_name_shape(name,x): return T.cast( K.sum(theano.printing.Print(name)(x.shape)) * 0,"float32") r_tm1 = r_tm1 + print_name_shape("out\nr_tm1",r_tm1) + \ print_name_shape("V_tm1",V_tm1) + \ print_name_shape("s_tm1",s_tm1) + \ print_name_shape("x",x) + \ print_name_shape("h_tm1_0",h_tm1[0]) + \ print_name_shape("h_tm1_1",h_tm1[1]) op_t, h_t = self._update_controller( T.concatenate([x, r_tm1], axis=-1), h_tm1) # op_t = op_t + print_name_shape("W_d",self.W_d.get_value()) op_t = op_t + print_name_shape("afterop_t",op_t) #op_t = op_t[:,0,:] ao = K.dot(op_t, self.W_d) ao = ao +print_name_shape("ao",ao) d_t = K.sigmoid( ao + self.b_d) + print_name_shape("afterop2_t",op_t) u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u)+ print_name_shape("d_t",op_t) v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v) + print_name_shape("u_t",u_t) o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) + print_name_shape("v_t",v_t) o_t = o_t + print_name_shape("afterbulk_t",o_t) time = time + 1 V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], u_t[::,0], v_t,time[0],stack=self.stack) #V_t, s_t, r_t = V_tm1,s_tm1,T.sum(V_tm1,axis = 1) V_t = V_t + print_name_shape("o_t",o_t) + \ print_name_shape("r_t",r_t) + \ print_name_shape("V_t",V_t) +\ print_name_shape("s_t",s_t) # T.cast( theano.printing.Print("time")(time[0]),"float32") #time = T.set_subtensor(time[0],time[0] +) return o_t, [r_t, V_t, s_t, time] + h_t
def sigmoid(x): """ Sigmoid activation function. >>> sigmoid(0) 0.5 """ return K.eval(K.sigmoid(K.variable(x))).tolist()
def step(self, x, states): r_tm1, V_tm1,s_tm1,time = states[:4] h_tm1 = states[4:] op_t, h_t = _update_controller(self, T.concatenate([x, r_tm1], axis=-1), h_tm1) d_t = K.sigmoid( K.dot(op_t, self.W_d) + self.b_d) u_t = K.sigmoid(K.dot(op_t, self.W_u) + self.b_u) v_t = K.tanh(K.dot(op_t, self.W_v) + self.b_v) o_t = K.tanh(K.dot(op_t, self.W_o) + self.b_o) time = time + 1 V_t, s_t, r_t = _update_neural_stack(self, V_tm1, s_tm1, d_t[::,0], u_t[::,0], v_t,time[0],stack=self.stack) return o_t, [r_t, V_t, s_t, time] + h_t
def call(self, x, mask=None): N_DECISION = (2 ** (self.n_depth)) - 1 # Number of decision nodes N_LEAF = 2 ** (self.n_depth + 1) # Number of leaf nodes flat_decision_p_e = [] leaf_p_e = [] for w_d, w_l in zip(self.w_d_ensemble, self.w_l_ensemble): decision_p = K.sigmoid((K.dot(x, w_d))) leaf_p = K.softmax(w_l) decision_p_comp = 1 - decision_p decision_p_pack = K.concatenate([decision_p, decision_p_comp]) flat_decision_p_e.append(decision_p_pack) leaf_p_e.append(leaf_p) #Construct tiling pattern for decision probability matrix #Could be done in TF, but I think it's better statically tiling_pattern = np.zeros((N_LEAF, self.n_depth), dtype=np.int32) comp_offset = N_DECISION dec_idx = 0 for n in xrange(self.n_depth): j = 0 for depth_idx in xrange(2**n): repeat_times = 2 ** (self.n_depth - n) for _ in xrange(repeat_times): tiling_pattern[j][n] = dec_idx j = j + 1 for _ in xrange(repeat_times): tiling_pattern[j][n] = comp_offset + dec_idx j = j + 1 dec_idx = dec_idx + 1 flat_pattern = tiling_pattern.flatten() # iterate over each tree tree_ret = None for flat_decision_p, leaf_p in zip(flat_decision_p_e, leaf_p_e): flat_mu = tf.transpose(tf.gather(tf.transpose(flat_decision_p), flat_pattern)) batch_size = tf.shape(flat_decision_p)[0] shape = tf.pack([batch_size, N_LEAF, self.n_depth]) mu = K.reshape(flat_mu, shape) leaf_prob = K.prod(mu, [2]) prob_label = K.dot(leaf_prob, leaf_p) if tree_ret is None: tree_ret = prob_label else: tree_ret = tree_ret + prob_label return tree_ret/self.n_trees
def pair_loss(y_true, y_pred): y_true = tf.cast(y_true, tf.int32) parts = tf.dynamic_partition(y_pred, y_true, 2) y_pos = parts[1] y_neg = parts[0] y_pos = tf.expand_dims(y_pos, 0) y_neg = tf.expand_dims(y_neg, -1) out = K.sigmoid(y_neg - y_pos) return K.mean(out)
def triplet_loss(self, y_true, y_pred): y_pred = K.sigmoid(y_pred) p_plus = K.sum(y_true * y_pred, axis=1, keepdims=True) p_gaps = y_pred - p_plus + self.margin L = K.maximum(0, p_gaps) # return T.max(L, axis=1) return K.sum(L, axis=1)
def bpr_triplet_loss(X): positive_item_latent, negative_item_latent, user_latent = X # BPR loss loss = 1.0 - K.sigmoid( K.sum(user_latent * positive_item_latent, axis=-1, keepdims=True) - K.sum(user_latent * negative_item_latent, axis=-1, keepdims=True)) return loss
def get_gate(self, state_j, key_j, inputs): """ Implements the gate (scalar for each block). Equation 2: g_j <- \sigma(s_t^T h_j + s_t^T w_j) """ a = tf.reduce_sum(inputs * state_j, axis=1) b = tf.reduce_sum(inputs * key_j, axis=1) return K.sigmoid(a + b)
def bpr_triplet_loss(X): user_latent, item_latent = X.values() positive_item_latent, negative_item_latent = item_latent.values() # BPR loss loss = - K.sigmoid(K.sum(user_latent * positive_item_latent, axis=-1, keepdims=True) - K.sum(user_latent * negative_item_latent, axis=-1, keepdims=True)) return loss
def yolo_conf_loss(y_true, y_pred, t): real_y_true = tf.where(t, y_true, K.zeros_like(y_true)) pobj = K.sigmoid(y_pred) lo = K.square(real_y_true - pobj) value_if_true = 5.0 * (lo) value_if_false = 0.05 * (lo) loss1 = tf.where(t, value_if_true, value_if_false) loss = K.mean(loss1) return loss
def __call__(self, loss): if not hasattr(self, 'layer'): raise Exception('Need to call `set_layer` on ' 'ActivityRegularizer instance ' 'before calling the instance.') regularized_loss = loss for i in range(len(self.layer.inbound_nodes)): output = K.sigmoid(0.1 * self.layer.get_output_at(i)) #output = self.layer.get_output_at(i) p_hat = K.mean(K.abs(output)) regularized_loss += self.l * kl_divergence(self.p, p_hat) return K.in_train_phase(regularized_loss, loss)
def step(self, x, states): h, [h, c] = self.layer.step(x, states) attention = states[4] m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a) s = K.sigmoid(K.dot(m, self.U_s) + self.b_s) if self.single_attention_param: h = h * K.repeat_elements(s, self.layer.output_dim, axis=1) else: h = h * s return h, [h, c]
def sample_h_given_x(self, x): """ Draw sample from p(h|x). For Bernoulli RBM the conditional probability distribution can be derived to be p(h_j=1|x) = sigmoid(x^T W[:,j] + bh_j). """ h_pre = K.dot(x, self.W) + self.bh # pre-sigmoid (used in cross-entropy error calculation for better numerical stability) h_sigm = K.sigmoid(h_pre) # mean of Bernoulli distribution ('p', prob. of variable taking value 1), sometimes called mean-field value h_samp = random_binomial(shape=h_sigm.shape, n=1, p=h_sigm) # random sample # \hat{h} = 1, if p(h=1|x) > uniform(0, 1) # 0, otherwise # pre and sigm are returned to compute cross-entropy return h_samp, h_pre, h_sigm
def step(self, x, states): h, [h, c] = super(AttentionLSTM, self).step(x, states) attention = states[4] m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a) # Intuitively it makes more sense to use a sigmoid (was getting some NaN problems # which I think might have been caused by the exponential function -> gradients blow up) s = K.sigmoid(K.dot(m, self.U_s) + self.b_s) if self.single_attention_param: h = h * K.repeat_elements(s, self.output_dim, axis=1) else: h = h * s return h, [h, c]
def sample_x_given_h(self, h): """ Draw sample from p(x|h). For Bernoulli RBM the conditional probability distribution can be derived to be p(x_i=1|h) = sigmoid(W[i,:] h + bx_i). """ x_pre = K.dot(h, self.W.T) + self.bx # pre-sigmoid (used in cross-entropy error calculation for better numerical stability) x_sigm = K.sigmoid(x_pre) # mean of Bernoulli distribution ('p', prob. of variable taking value 1), sometimes called mean-field value x_samp = random_binomial(shape=x_sigm.shape, n=1, p=x_sigm) # random sample # \hat{x} = 1, if p(x=1|h) > uniform(0, 1) # 0, otherwise # pre and sigm are returned to compute cross-entropy return x_samp, x_pre, x_sigm
def call(self, inputs, mask=None): if not isinstance(inputs, list) or len(inputs) <= 1: raise TypeError('Attention must be called on a list of tensors ' '(at least 2). Got: ' + str(inputs)) # (None(batch), MaxLen(time), spec_dim, embed_dim) mix_embed_l = inputs[0] # (None(batch), embed_dim) spk_embed_l = inputs[1] energy = None if self.mode == 'dot': # (batch, time, spec_dim, embed_dim) batch_dot(3,1) (batch, embed_dim) = (batch, time, spec_dim) energy = K.batch_dot(mix_embed_l, spk_embed_l, axes=(3, 1)) elif self.mode == 'align': # (batch, time, spec_dim, embed_dim) dot (embed_dim, align_hidden) # -> (batch, time, spec_dim, align_hidden) hUa = K.dot(mix_embed_l, self.U_align) # (batch, embed_dim) dot (embed_dim, align_hidden) # -> (batch, align_hidden) sWa = K.dot(spk_embed_l, self.W_align) sWa = sWa.dimshuffle(0, 'x', 'x', 1) # -> (batch, time, spec_dim, align_hidden) tanh_sWahUa = K.tanh(sWa+hUa) # -> (batch, time, spec_dim, align_hidden) dot (align_hidden, 1) # -> (batch, time, spec_dim, 1) energy = K.dot(tanh_sWahUa, self.v_align) # -> (batch, time, spec_dim) energy = K.reshape(energy, (-1, self.time_step, self.spec_dim)) else: raise ValueError('Unknown merge mode.') if self.nonlinearity == 'sigmoid': alpha = K.sigmoid(energy) elif self.nonlinearity == 'linear': alpha = energy else: raise Exception('Unknown nonlinearity mode for attention:'+self.nonlinearity) # (batch, time, spec_dim) return alpha
def build(self, input_shape=None): self.input_spec = InputSpec(shape=input_shape) if not self.layer.built: self.layer.build(input_shape) self.layer.built = True super(ConcreteDropout, self).build() # this is very weird.. we must call super before we add new losses # initialise p self.p_logit = self.layer.add_weight(name='p_logit', shape=(1,), initializer=initializers.RandomUniform(self.init_min, self.init_max), trainable=True) self.p = K.sigmoid(self.p_logit[0]) # initialise regulariser / prior KL term input_dim = np.prod(input_shape[1:]) # we drop only last dim weight = self.layer.kernel kernel_regularizer = self.weight_regularizer * K.sum(K.square(weight)) / (1. - self.p) dropout_regularizer = self.p * K.log(self.p) dropout_regularizer += (1. - self.p) * K.log(1. - self.p) dropout_regularizer *= self.dropout_regularizer * input_dim regularizer = K.sum(kernel_regularizer + dropout_regularizer) self.layer.add_loss(regularizer)
def concrete_dropout(self, x): ''' Concrete dropout - used at training time (gradients can be propagated) :param x: input :return: approx. dropped out input ''' eps = K.cast_to_floatx(K.epsilon()) temp = 0.1 unif_noise = K.random_uniform(shape=K.shape(x)) drop_prob = ( K.log(self.p + eps) - K.log(1. - self.p + eps) + K.log(unif_noise + eps) - K.log(1. - unif_noise + eps) ) drop_prob = K.sigmoid(drop_prob / temp) random_tensor = 1. - drop_prob retain_prob = 1. - self.p x *= random_tensor x /= retain_prob return x
def attention_step(self, x, args): ''' Attention step function #Arguments args: [h_, context, projected_context, W_h_prj, w_prj_att, b_att, W_x_h, U_h_h, W_ctx_h, b_h W_x_p, W_h_p, W_ctx_p, b_p]. h_: (batch_size, dim_hidden) context: (batch_size, nb_context, dim_context) projected_context: (batch_size, nb_context, dim_projected_context) projected_context = context dot W_ctx_prj + b_ctx_prj calculated before step. W_h_prj: (dim_hidden, dim_projected_context) w_prj_att: (dim_projected_context, 1) b_att: (1,) W_x_h: (dim_embedding, dim_hidden) U_h_h: (dim_hidden, dim_hidden) W_ctx_h: (dim_context, dim_hidden) b_h: (dim_hidden,) ''' assert len(args) == 1 + len(self.contexts) + len(self.params) [h_, context, projected_context, W_h_prj, w_prj_att, b_att, W_x_h, U_h_h, W_ctx_h, b_h] = args projected = K.expand_dims(K.dot(h_, W_h_prj), 1) + projected_context e = K.dot(K.tanh(projected), w_prj_att) + b_att alpha = K.softmax(K.flatten(e)) weighted_context = K.sum((context * K.expand_dims(alpha)), 1) pre_act = K.dot(x, W_x_h) + K.dot(h_, U_h_h) + K.dot(weighted_context, W_ctx_h) + b_h h = K.sigmoid(pre_act) return h, [alpha, weighted_context], [h]
def call(self, inputs): return inputs * K.sigmoid(self.beta * inputs)
def logsigmoid(x): x = K.sigmoid(x) x = K.log(x) return x
def scaled_sigmoid(x): """ Sigmoid scaled by 2 pi """ return K.tf.constant(2*np.pi) * K.sigmoid(x)
def calc_score(context_word, context_word_sense, W_g, W_s): return K.sigmoid(K.dot(get_vector(word, word_sense, W_g, W_s),get_vector(context_word, context_word_sense, W_g, W_s)))
def Margin_Loss(y_true, y_pred): score_best = y_pred[0] score_predict = y_pred[1] loss = K.maximum(0.0, 1.0 - K.sigmoid(score_best - score_predict)) return K.mean(loss) + 0 * y_true
def swish(x): return x * backend.sigmoid(x)
def swish(x): return (K.sigmoid(x) * x)
grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) #grid得到每个单元格与相对左上角的偏移量。 ''' (0,0)位置的cell相对于左上角,在X和Y的方向偏移量都是0,(1,0)位置的cell,在Y轴上有一个cell的偏移,在X轴方向上没有。 以此类推,grid就记录每个box相对于左上角的偏移量,每个cell加上grid就得到每个box相对于cell的坐标 ''' grid = K.cast(grid, K.dtype(feats)) #把模型的输出重新reshape,原始输出shape是[batchsize, grid_shape[0], grid_shape[1],num_anchors*(num_classes + 5)] feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))#计算bx=cx+tx box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) #计算转换后的wh 这些计算公式来自yolov2的论文 box_confidence = K.sigmoid(feats[..., 4:5])#检测出对象的置信程度 box_class_probs = K.sigmoid(feats[..., 5:])#对象的类别概率 if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs #取得真正的坐标信息,根据图片实际大小转换后的box真实坐标 def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): box_yx = box_xy[..., ::-1]#将列的x,y以颠倒 box_hw = box_wh[..., ::-1]#将列的w,h颠倒 input_shape = K.cast(input_shape, K.dtype(box_yx)) image_shape = K.cast(image_shape, K.dtype(box_yx)) new_shape = K.round(image_shape * K.min(input_shape/image_shape))
def sigmoid_neg(x): return K.sigmoid(x) - 0.5
def swish_activation(x): return (K.sigmoid(x) * x)
def decode(yolo_outputs): num_scales = len(yolo_outputs) anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_scales == 3 else [[3, 4, 5], [1, 2, 3]] b_min_max_list = [] box_confidence_list = [] class_probs_list = [] for scale_idx in np.arange(3): anchors = all_anchors[anchor_mask[scale_idx]] output = yolo_outputs[scale_idx] num_anchors = len(anchors) batch_size = K.shape(output)[0] grid_shape = K.shape(output)[1:3] grid_height = grid_shape[0] # height grid_width = grid_shape[1] # width # reshape to tensor of dimensions batch_size, grid_height, grid_width, num_anchors, 5 + num_classes # the five box parameters are: # t_x, t_y determine the center point of the box # t_w, t_h determine the width and height of the box # the box confidence indicates the confidence that box contains an object and box is accurate output = K.reshape( output, [-1, grid_height, grid_width, num_anchors, 5 + num_classes]) # compute b_x, b_y for each cell and each anchor c_x = K.tile(K.reshape(K.arange(grid_width), [1, -1, 1, 1]), [grid_height, 1, num_anchors, 1]) c_y = K.tile(K.reshape(K.arange(grid_height), [-1, 1, 1, 1]), [1, grid_width, num_anchors, 1]) c_xy = K.concatenate([c_x, c_y]) c_xy = K.cast(c_xy, K.dtype(output)) b_xy = (K.sigmoid(output[..., :2]) + c_xy) / K.cast( grid_shape[::-1], K.dtype(output)) # compute b_w and b_h for each cell and each anchor p_wh = K.tile( K.reshape(K.constant(anchors), [1, 1, num_anchors, 2]), [grid_height, grid_width, 1, 1]) b_wh = p_wh * K.exp(output[..., 2:4]) / K.cast( input_shape[::-1], K.dtype(output)) b_min_max = K.reshape(convert_box_params( b_xy, b_wh), [batch_size, -1, 4]) # y_min, x_min, y_max, x_max # compute box confidence for each cell and each anchor box_confidence = K.reshape(K.sigmoid(output[..., 4]), [batch_size, -1]) # compute class probabilities for each cell and each anchor class_probs = K.reshape(K.sigmoid(output[..., 5:]), [batch_size, -1, num_classes]) b_min_max_list.append(b_min_max) box_confidence_list.append(box_confidence) class_probs_list.append(class_probs) return [ K.concatenate(b_min_max_list, axis=1), K.concatenate(box_confidence_list, axis=1), K.concatenate(class_probs_list, axis=1) ]
def call(self, inputs): a = K.cast(self.a, dtype=K.dtype(inputs)) P = (K.sigmoid(a * (K.mean(inputs, axis=(1, 2)) - self.b)) - K.sigmoid( -a * self.b)) / (K.sigmoid(a * (1. - self.b)) - K.sigmoid(-a * self.b)) return P
def call(self, inputs, mode='positive'): if mode == 'positive': scale = self.tau + (1 - self.tau) * K.sigmoid(self.scale) else: scale = (1 - self.tau) * K.sigmoid(-self.scale) return inputs * K.sqrt(scale)
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss
def call(self, x, mask=None): #~~~~~~~~~~~Global Attention if self.attention_type == 'global': #eij = K.tanh(K.dot(x, self.W)) #worked but this is self eij = K.dot(x, self.W) if self.bias: eij += self.b eij = dot_product(eij, self.u) ai = K.exp(eij) weights = ai / K.cast(K.sum(ai, axis=1, keepdims=True), K.floatx()) #+ K.epsilon() weights = K.expand_dims(weights) weighted_input = x * weights if self.return_coefficients: return [K.sum(weighted_input, axis=1), weights] else: return K.sum(weighted_input, axis=1) #~~~~~~~~~~~Local Attention elif self.attention_type == 'local': eij = K.dot(x, self.W) if self.bias: eij += self.b eij = dot_product(eij, self.u) D = 4 pt = 5 ii = np.array(range(np.shape(x)[-1]), dtype='float32') ai = K.exp(eij + (-1.0 * (ii - pt)**2 / (D * D / 2.0))) ai_local = K.exp(eij) ai_local = ai_local[:, pt - D:pt + D] weights = ai / K.cast(K.sum(ai_local, axis=1, keepdims=True), K.floatx()) weights = K.expand_dims(weights) weighted_input = x * weights if self.return_coefficients: return [K.sum(weighted_input, axis=1), weights] else: return K.sum(weighted_input, axis=1) ######################################## elif self.attention_type == 'localFULL': eij = K.dot(x, self.W) if self.bias: eij += self.b eij = dot_product(eij, self.u) D = 3 # local atention window size user defined pt = int(np.shape(x)[-1]) * K.eye(int(np.shape(x)[-1])) #Tx sig = K.sigmoid(K.tanh(eij)) #sigmoid part pt = pt * sig #Tx*sigmoid print(K.eval(pt)) ii = np.array(range(np.shape(x)[-1]), dtype='float32') ai = K.exp(eij + (-1.0 * (ii - pt)**2 / (D * D / 2.0))) ai_local = K.exp(eij) ai_local = K.dot(ai_local, K.reshape(pt, (-1, -1))) weights = ai / K.cast(K.sum(ai_local, axis=1, keepdims=True), K.floatx()) weights = K.expand_dims(weights) weighted_input = x * weights if self.return_coefficients: return [K.sum(weighted_input, axis=1), weights] else: return K.sum(weighted_input, axis=1) #~~~~~~~~~~~Self Attention elif self.attention_type == 'self': eij = K.tanh(K.dot(x, self.W)) if self.bias: eij += self.b eij = dot_product(eij, self.u) ai = K.exp(eij) weights = ai / K.cast(K.sum(ai, axis=1, keepdims=True), K.floatx()) weights = K.expand_dims(weights) weighted_input = x * weights if self.return_coefficients: return [K.sum(weighted_input, axis=1), weights] else: return K.sum(weighted_input, axis=1) else: pass
def IoU(self, y_true, y_pred): '''IoU metric''' (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = self.args num_anchors = len(self.anchors) # pred_*.shape = (n_images,13,13,n_boxes,1 or 2) 1 = conf/class 2 = xy or wh pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, self.anchors, self.num_classes) #pred_xy = tf.Print(pred_xy,[tf.shape(pred_xy)[:]],message='PRED XY') # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, self.num_classes + 5 ]) pred_boxes = K.concatenate( (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas #iou_scores = tf.Print(iou_scores,[tf.shape(iou_scores)[:]],message='IOU SCORES') # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) #best_ious = tf.Print(best_ious,[tf.shape(best_ious)],message='BEST IOU SCORE') # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) #object_detections = tf.Print(object_detections,[tf.shape(object_detections)],message = 'OBJECT DETECTION') total_IoU = K.sum(object_detections) return total_IoU
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate( (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * ( confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # conv_height_index是某一feats的左上角格子高度坐标 # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) # conv_width_index = K.tile(conv_width_index, [conv_dims[0]]) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. # 在整张图的相对位置 box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs, feats
def cross_entropy_loss(y_true, y_pred): #print np.shape(y_true), np.shape(y_pred) return -K.mean( K.log(K.sigmoid(K.clip(K.sum(y_pred, axis=1) * y_true, -6, 6))))
def swish(x, beta = 1): return (x * sigmoid(beta * x))
# Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5+3+3*BIN]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4]) # 3d dim noline #box_dim = K.exp(feats[..., 5:8]) ##no exp #box_dim = K.sigmoid(feats[..., 5:8]) ##no exp box_dim = feats[..., 5:8] ##no exp box_3d_conf = K.sigmoid(feats[..., 8:10]) #box_3d_cossin = K.l2_normalize(K.reshape(feats[..., 10:14], [-1,-1,-1, BIN, 2])) box_3d_cossin = K.l2_normalize(feats[..., 10:14]) #K.sigmoid(feats[..., 8:14]) box_class_probs = K.sigmoid(feats[..., 14:]) if calc_loss == True: return grid, feats, box_xy, box_wh, box_dim, box_3d_conf, box_3d_cossin return box_xy, box_wh, box_confidence, box_dim, box_3d_conf, box_3d_cossin, box_class_probs
def score_loss(y_true, y_pred): preds = K.flatten(K.sigmoid(y_pred - 1)) trues = K.flatten(y_true) return K.square(preds - trues)
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile( K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def classification_loss(self, y_true, y_pred): '''Classification loss metric''' (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = self.args num_anchors = len(self.anchors) object_scale = LAMBDA_OBJ no_object_scale = LAMBDA_NOOBJ class_scale = LAMBDA_CLASS coordinates_scale = LAMBDA_COORD pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, self.anchors, self.num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, self.num_classes + 5 ]) pred_boxes = K.concatenate( (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas #iou_scores = tf.Print(iou_scores,[tf.shape(iou_scores)[:]],message='IOU SCORES') # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) #best_ious = tf.Print(best_ious,[tf.shape(best_ious)],message='BEST IOU SCORE') # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) #object_detections = tf.Print(object_detections,[tf.shape(object_detections)],message = 'OBJECT DETECTION') # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if self.rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, self.num_classes) #matching_classes = tf.Print(matching_classes,[tf.shape(matching_classes)[3:]],message = 'MATCHING CLASSES') classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) classification_loss_sum = K.sum(classification_loss) return classification_loss_sum
def output_lambda(x, init_alpha=1.0, max_beta_value=5.0, scalefactor=None, alpha_kernel_scalefactor=None): """Elementwise (Lambda) computation of alpha and regularized beta. - Alpha: (activation) Exponential units seems to give faster training than the original papers softplus units. Makes sense due to logarithmic effect of change in alpha. (initialization) To get faster training and fewer exploding gradients, initialize alpha to be around its scale when beta is around 1.0, approx the expected value/mean of training tte. Because we're lazy we want the correct scale of output built into the model so initialize implicitly; multiply assumed exp(0)=1 by scale factor `init_alpha`. - Beta: (activation) We want slow changes when beta-> 0 so Softplus made sense in the original paper but we get similar effect with sigmoid. It also has nice features. (regularization) Use max_beta_value to implicitly regularize the model (initialization) Fixed to begin moving slowly around 1.0 - Usage .. code-block:: python model.add(TimeDistributed(Dense(2))) model.add(Lambda(wtte.output_lambda, arguments={"init_alpha":init_alpha, "max_beta_value":2.0 })) :param x: tensor with last dimension having length 2 with x[...,0] = alpha, x[...,1] = beta :param init_alpha: initial value of `alpha`. Default value is 1.0. :param max_beta_value: maximum beta value. Default value is 5.0. :param max_alpha_value: maxumum alpha value. Default is `None`. :type x: Array :type init_alpha: Float :type max_beta_value: Float :type max_alpha_value: Float :return x: A positive `Tensor` of same shape as input :rtype: Array """ if max_beta_value is None or max_beta_value > 3: if K.epsilon() > 1e-07 and K.backend() == 'tensorflow': # TODO need to think this through lol message = "\ Using tensorflow backend and allowing high `max_beta_value` may lead to\n\ gradient NaN during training unless `K.epsilon()` is small.\n\ Call `keras.backend.set_epsilon(1e-08)` to lower epsilon \ " warnings.warn(message) if alpha_kernel_scalefactor is not None: message = "`alpha_kernel_scalefactor` deprecated in favor of `scalefactor` scaling both.\n Setting `scalefactor = alpha_kernel_scalefactor`" warnings.warn(message) scalefactor = alpha_kernel_scalefactor a, b = _keras_unstack_hack(x) if scalefactor is not None: # Done after due to theano bug. a, b = scalefactor * a, scalefactor * b # Implicitly initialize alpha: a = init_alpha * K.exp(a) if max_beta_value > 1.05: # some value >>1.0 # shift to start around 1.0 # assuming input is around 0.0 _shift = np.log(max_beta_value - 1.0) b = b - _shift b = max_beta_value * K.sigmoid(b) x = K.stack([a, b], axis=-1) return x
def swish(x): return x * K.sigmoid(x)