def loss_units(x): t = x / K.max(K.abs(x)) x = K.switch(K.less(t, K.epsilon()), K.zeros_like(x), x) m = K.sum(K.cast(K.greater(x, 0.), K.floatx())) sum_x = K.sum(x) moving_units = K.switch(K.less_equal(m, self.units), m, (1. - self.moving_decay) * self.moving_units) epsilon_minus = 0. epsilon_plus = K.switch(K.less_equal(m, self.units), self.moving_units, 0.) return K.relu(moving_units - sum_x - epsilon_minus) + K.relu(sum_x - moving_units - epsilon_plus)
def _get_update_list(self, kernel): update_list = super(E2EFSRanking, self)._get_update_list(kernel) update_list += [ (self.moving_factor, K.switch(K.less_equal(self.moving_T, self.warmup_T), self.start_alpha, K.minimum(self.alpha_M, self.start_alpha + (1. - self.start_alpha) * (self.moving_T - self.warmup_T) / self.T))), (self.moving_T, self.moving_T + 1), (self.moving_units, K.switch(K.less_equal(self.moving_T, self.warmup_T), K.cast_to_floatx((1. - self.start_alpha) * np.prod(K.int_shape(kernel))), K.maximum(self.alpha_M, np.prod(K.int_shape(kernel)) * K.pow(K.cast_to_floatx(1. / np.prod(K.int_shape(kernel))), self.speedup * (self.moving_T - self.warmup_T) / self.T)))), # K.maximum(1., (self.T - self.start_alpha - self.speedup * (self.moving_T - self.warmup_T)) * np.prod(K.int_shape(kernel)) / self.T))), ] return update_list
def rpn_loss_regr_fixed_num(y_true, y_pred): if K.image_data_format() == 'channels_first': x = y_true[:, 4 * num_anchors:, :, :] - y_pred x_abs = K.abs(x) x_bool = K.less_equal(x_abs, 1.0) return lambda_rpn_regr * K.sum( y_true[:, :4 * num_anchors, :, :] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :4 * num_anchors, :, :]) else: x = y_true[:, :, :, 4 * num_anchors:] - y_pred x_abs = K.abs(x) x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32) return lambda_rpn_regr * K.sum( y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])
def masked(): # pick cval beta = K.sigmoid(self.beta) cval = self.min_value * beta + self.max_value * (1 - beta) # determine a mask ratio = K.sigmoid(self.ratio) size = K.random_uniform([], maxval=0.2, dtype='float32') offset = K.random_uniform([], maxval=1 - size, dtype='float32') ''' ratio = K.concatenate([self.ratio, [0.]]) ratio = ratio + K.random_normal([3,], dtype='float32') ratio = K.softmax(ratio) ''' mask = K.arange(0., 1., 1 / freq, dtype='float32') ge = K.cast(K.greater_equal(mask, offset), dtype='float32') le = K.cast(K.less_equal(mask, size + offset), dtype='float32') mask = 1 - ge * le mask = K.reshape(mask, broadcast_shape) outputs = inputs * mask + cval * (1 - mask) return outputs
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr adam_lr = self.adam_lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) adam_lr = adam_lr * (1. / (1. + self.decay * K.cast( self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 adam_lr_t = adam_lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) # momentum shapes = [K.int_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.ms = K.zeros(K.int_shape(params[0]), dtype=K.dtype(params[0])) self.vs = K.zeros(K.int_shape(params[0]), dtype=K.dtype(params[0])) self.weights = [self.iterations] + moments + vhats + [self.ms ] + [self.vs] for i, (p, g, m, vhat) in enumerate(zip(params, grads, moments, vhats)): v = self.momentum * m - lr * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v if i == 0 and self.e2efs_layer is not None: nnz = K.sum(K.cast(K.greater(p, 0.), K.floatx())) m_t = (self.beta_1 * self.ms) + (1. - self.beta_1) * g v_t = (self.beta_2 * self.vs) + (1. - self.beta_2) * K.square(g) if self.amsgrad: vhat_t = K.maximum(vhat, v_t) p_t = p - adam_lr_t * m_t / (K.sqrt(vhat_t) + K.epsilon()) self.updates.append(K.update(vhat, vhat_t)) else: p_t = p - adam_lr_t * m_t / (K.sqrt(v_t) + K.epsilon()) self.updates.append(K.update(self.ms, m_t)) self.updates.append(K.update(self.vs, v_t)) new_p = K.switch(K.less_equal(nnz, self.e2efs_layer.units), new_p, p_t) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def class_loss_reg_fixed_num(y_true, y_pred): x = y_true[:, :, 4 * num_classes:] - y_pred valid = y_true[:, :, :4 * num_classes] x_abs = bk.abs(x) x_bool = bk.cast(bk.less_equal(x_abs, 1.0), 'float32') loss = x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5) return lambda_cls_reg * bk.sum(valid * loss) / bk.sum(epsilon + valid)
def class_loss_regr_fixed_num(y_true, y_pred): x = y_true[:, :, 4 * num_classes:] - y_pred x_abs = K.abs(x) x_bool = K.cast(K.less_equal(x_abs, 1.0), 'float32') return lambda_cls_regr *\ K.sum(y_true[:, :, : 4 * num_classes] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) /\ K.sum(epsilon + y_true[:, :, :4*num_classes])
def call(self, inputs): # step 1: adapative average # from (batch, rows, n_features) to (batch, n_features, rows) inputs = self.transpose(inputs) avg = K.mean(inputs, axis=2) adaptive_avg = self.mean_layer(avg) adaptive_avg = K.reshape(adaptive_avg, (-1, self.n_features, 1)) inputs -= adaptive_avg # # step 2: adapative scaling std = K.mean(inputs ** 2, axis=2) std = K.sqrt(std + self.eps) adaptive_std = self.scaling_layer(std) fn = lambda elem: K.switch(K.less_equal(elem, 1.0), K.ones_like(elem), elem) adaptive_std = K.map_fn(fn, adaptive_std) adaptive_std = K.reshape(adaptive_std, (-1, self.n_features, 1)) inputs /= adaptive_std # # step 3: gating avg = K.mean(inputs, axis=2) gate = self.gating_layer(avg) gate = K.reshape(gate, (-1, self.n_features, 1)) inputs *= gate # from (batch, n_features, rows) => (batch, rows, n_features) inputs = self.transpose(inputs) return inputs
def _init_cel(self, A_graph, b_graph, c_graph, y): # Sanity Checks y = tf.check_numerics(y, 'Problem with input y') # Find intersection points between Ax-b and the line joining the c and y Ac = tf.reduce_sum(A_graph * tf.expand_dims(c_graph, axis=-2), axis=-1) bMinusAc = b_graph - Ac yMinusc = y - c_graph ADotyMinusc = tf.reduce_sum((A_graph * tf.expand_dims(yMinusc, -2)), axis=-1) intersection_alphas = bMinusAc / (ADotyMinusc + K.epsilon()) # Enforce intersection_alpha > 0 because the point must lie on the ray from c to y less_equal_0 = K.less_equal(intersection_alphas, K.zeros_like(intersection_alphas)) candidate_alpha = K.switch( less_equal_0, K.ones_like(intersection_alphas) * tf.constant(np.inf, dtype='float32'), intersection_alphas) # Find closest the intersection point closest to the interior point to get projection point intersection_alpha = K.min(candidate_alpha, axis=-1, keepdims=True) # If it is an interior point, y itself is the projection point is_interior_point = K.greater_equal(intersection_alpha, K.ones_like(intersection_alpha)) alpha = K.switch(is_interior_point, K.ones_like(intersection_alpha), intersection_alpha) # Return z = \alpha.y + (1 - \alpha).c z = alpha * y + ((1 - alpha) * c_graph) return z
def huber_loss(y, y_pred, delta: float = 1.0): """ Return the Huber loss between tensors. Reference: https://en.wikipedia.org/wiki/Huber_loss https://web.stanford.edu/class/cs20si/2017/lectures/slides_03.pdf https://keras.io/backend/ Args: y: ground truth y labels y_pred: predicted y labels delta: the separating constant between MSE and MAE Returns: a scalar loss between the ground truth and predicted labels """ # calculate the residuals residual = K.abs(y_pred - y) # determine the result of the logical comparison to delta condition = K.less_equal(residual, delta) # calculate the two possible returns (MSE and MAE) then_this = 0.5 * K.square(residual) else_this = delta * residual - 0.5 * K.square(delta) # use the condition to determine the resulting tensor return K.switch(condition, then_this, else_this)
def rpn_loss_reg_fixed_num(y_true, y_pred): diff = y_true[:, :, :, :, 4 * num_anchors:] - y_pred valid = y_true[:, :, :, :, :4 * num_anchors] x_abs = bk.abs(diff) x_bool = bk.cast(bk.less_equal(x_abs, 1.0), tf.float32) loss = x_bool * (0.5 * diff * diff) + (1 - x_bool) * (x_abs - 0.5) return lambda_rpn_reg * bk.sum(valid * loss) / bk.sum(epsilon + valid)
def class_loss_regr_fixed_num(y_true, y_pred): #print("xx", y_true.shape, y_pred.shape) y_pred = tf.cast(y_pred,tf.float32) y_true = tf.cast(y_true, tf.float32) x = y_true[:, :, 4*num_classes:] - y_pred x_abs = K.abs(x) x_bool = K.cast(K.less_equal(x_abs, 1.0), 'float32') return lambda_cls_regr * K.sum(y_true[:, :, :4*num_classes] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :4*num_classes])
def rpn_loss_regr_tf(y_true, y_pred): x = y_true[:, :, :, 4 * num_anchors:] - y_pred x_abs = K.abs(x) x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32) return lambda_rpn_regr * K.sum( y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])
def call(self, inputs, **kwargs): input_shape = K.int_shape(inputs) sequence_length, d_model = input_shape[-2:] # output of the "sigmoid halting unit" (not the probability yet) halting = K.sigmoid( K.reshape( K.bias_add(K.dot(K.reshape(inputs, [-1, d_model]), self.halting_kernel), self.halting_biases, data_format='channels_last'), [-1, sequence_length])) if self.zeros_like_halting is None: self.initialize_control_tensors(halting) # useful flags step_is_active = K.greater(self.halt_budget, 0) no_further_steps = K.less_equal(self.halt_budget - halting, 0) # halting probability is equal to # a. halting output if this isn't the last step (we have some budget) # b. to remainder if it is, # c. and zero for the steps that shouldn't be executed at all # (out of budget for them) halting_prob = K.switch( step_is_active, K.switch(no_further_steps, self.remainder, halting), self.zeros_like_halting) self.active_steps += K.switch(step_is_active, self.ones_like_halting, self.zeros_like_halting) # We don't know which step is the last, so we keep updating # expression for the loss with each call of the layer self.ponder_cost = (self.time_penalty_t * K.mean(self.remainder + self.active_steps)) # Updating "the remaining probability" and the halt budget self.remainder = K.switch(no_further_steps, self.remainder, self.remainder - halting) self.halt_budget -= halting # OK to become negative # If none of the inputs are active at this step, then instead # of zeroing them out by multiplying to all-zeroes halting_prob, # we can simply use a constant tensor of zeroes, which means that # we won't even calculate the output of those steps, saving # some real computational time. if self.zeros_like_input is None: self.zeros_like_input = K.zeros_like(inputs, name='zeros_like_input') # just because K.any(step_is_active) doesn't work in PlaidML any_step_is_active = K.greater(K.sum(K.cast(step_is_active, 'int32')), 0) step_weighted_output = K.switch( any_step_is_active, K.expand_dims(halting_prob, -1) * inputs, self.zeros_like_input) if self.weighted_output is None: self.weighted_output = step_weighted_output else: self.weighted_output += step_weighted_output return [inputs, self.weighted_output]
def rpn_reg_loss(y_true, y_pred): """ The method to calculate rpn regression loss. :param y_true: (1,width,height,anchor_number*4+anchor_number*4) :param y_pred: (1,width,height,anchor_number*4) :return: the loss of regression. """ x = y_true[:, :, :, 4 * cfg.TRAIN.ANCHOR_NUM:] - y_pred x_abs = K.abs(x) x_bool = K.cast(K.less_equal(x_abs, 1.0), 'float32') return K.sum(y_true[:, :, :, :4*cfg.TRAIN.ANCHOR_NUM] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) \ / (K.sum( 1e-4+y_true[:, :, :, :4*cfg.TRAIN.ANCHOR_NUM])*0.25)
def rpn_loss_regr_fixed_num(y_true, y_pred): # x is the difference between true value and predicted value x = y_true[:, :, :, 4 * num_anchors:] - y_pred # absolute value of x x_abs = K.abs(x) # If x_abs <= 1.0, x_bool = 1 x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32) return lambda_rpn_regr * K.sum( y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])
def class_loss_regr_fixed_num(y_true, y_pred): x = y_true[:, :, 4 * num_classes:] - y_pred # Subtraction 2D Matrizes (true matrix & prediction matrix)! x_abs = K.abs(x) x_bool = K.cast(K.less_equal(x_abs, 1.0), 'float32') # True (x <= 1.0) | False (x > 1.0) # Loss function -> regression return lambda_cls_regr * K.sum( y_true[:, :, :4 * num_classes] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :4 * num_classes])
def reg_loss(y_true, y_pred): """ The method to calculate the regression loss for classifier y_true is [1,rois_num,class_num] y_pred is [1,rois_num,class_num] """ num_classes = cfg.NUM_CLASSES - 1 x = y_true[:, :, 4 * num_classes:] - y_pred x_abs = backend.abs(x) x_bool = backend.cast(backend.less_equal(x_abs, 1.0), 'float32') return backend.sum(y_true[:, :, :4 * num_classes] * ( (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5)))) / ( backend.sum(1e-4 + y_true[:, :, :4 * num_classes]) * 0.25)
def rpn_loss_regr_fixed_num(y_true, y_pred): #print('rpn_loss_regr') #print(np.shape(y_true)) #print(np.shape(y_pred)) x = y_true[:, :, :, 4 * num_anchors:] - y_pred #rpn网络输出的是(0,width,heigh , 4*anchors) x_abs = K.abs(x) x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32) #print('rpn_loss_regr') return lambda_rpn_regr * K.sum( y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])
def rpn_loss_regr_fixed_num(y_true, y_pred): x = y_true[:, :, :, 4 * num_anchors:] - y_pred # Subtraction tesnors (true tensor & prediction tensor)! x_abs = K.abs(x) x_bool = K.cast(K.less_equal(x_abs, 1.0), tf.float32) # true (x <= 1.0) | false (x > 1.0) # Loss function -> regression return lambda_rpn_regr * K.sum( y_true[:, :, :, :4 * num_anchors] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / K.sum(epsilon + y_true[:, :, :, :4 * num_anchors])
def discretize_with_histogram(tensor, bins): _min = K.min(tensor) _max = K.max(tensor) _len_shape = len(tensor.shape) _bins = K.cast(tf.range(bins), dtype=K.floatx()) _range = tf.linspace(_min, _max, bins + 1) for _ in range(_len_shape): _bins = K.expand_dims(_bins, axis=-1) _range = K.expand_dims(_range, axis=-1) _cond1 = K.greater_equal(tensor, _range[:-1]) _cond2 = K.less(tensor, _range[1:]) _cond3 = K.less_equal(tensor, _range[1:]) _cond4 = K.concatenate((_cond2[:-1], _cond3[-1:]), axis=0) _all_cond = K.cast(K.all(K.stack((_cond1, _cond4), axis=0), axis=0), dtype=K.floatx()) _axis = tuple([i + 1 for i in range(_len_shape)]) _discrete = K.sum(_all_cond * _bins, axis=0) _histogram = tf.count_nonzero(_all_cond, axis=_axis) return _discrete, _histogram
def _get_update_list(self, kernel): super(E2EFSRanking, self)._get_update_list(kernel) self.moving_factor.assign( K.switch( K.less(self.moving_T, self.warmup_T), self.start_alpha, K.minimum( self.alpha_M, self.start_alpha + (1. - self.start_alpha) * (self.moving_T - self.warmup_T) / self.T))) self.moving_T.assign_add(1.) self.moving_units.assign( K.switch( K.less_equal(self.moving_T, self.warmup_T), K.cast_to_floatx( (1. - self.start_alpha) * np.prod(K.int_shape(kernel))), K.maximum( self.alpha_M, np.prod(K.int_shape(kernel)) * K.pow( K.cast_to_floatx(1. / np.prod(K.int_shape(kernel))), self.speedup * (self.moving_T - self.warmup_T) / self.T))))
def accuracy(self, y_true, y_pred): y_pred = K.cast(y_pred, dtype=K.floatx()) output_dimensions = tf.shape(y_pred)[2] upper_bound = K.cast(y_true[:, :, :output_dimensions], dtype=K.floatx()) mask = K.cast(K.greater_equal(upper_bound, self._tf_zero), dtype=K.floatx()) accuracy_mask = K.cast(y_true[:, :, 2 * output_dimensions:], dtype=K.floatx()) # because the accuracy_mask is originally also padded with -1, we mask it accuracy_mask = mask * accuracy_mask error_with_slack = K.abs(y_pred - upper_bound) - self._slack error_with_slack = K.cast(K.less_equal(error_with_slack, self._tf_zero), dtype=K.floatx()) # number of right predicted sequences divided by count of sequences return K.sum(accuracy_mask * error_with_slack) / K.sum(accuracy_mask)
def class_loss_regr_fixed_num(y_true, y_pred): """ 计算classifier的回归损失 :param y_true: 真实值 [batch_size, num_rois, num_classes * 8] :param y_pred: 预测值 [batch_size, num_rois, num_classes * 4] :return: classifier regr_loss """ regr_loss = 0 batch_size = len(y_true) for i in range(batch_size): x = y_true[i, :, 4 * num_classes:] - y_pred[i, :, :] # 取出y_true后一半的数据,与y_pred做差值 x_abs = backend.abs(x) # 计算绝对值 x_bool = backend.cast(backend.less_equal(x_abs, 1.0), 'float32') # 小于1的值 # 1、差值绝对值小于1时0.5 * X^2,大于1的绝对值减0.5然后相加 # 2、在乘上是否要计算这个loss # 3、求和在除以个数,得均值 loss = 4 * backend.sum( y_true[i, :, :4 * num_classes] * (x_bool * (0.5 * x * x) + (1 - x_bool) * (x_abs - 0.5))) / backend.sum( epsilon + y_true[i, :, :4 * num_classes]) regr_loss += loss return regr_loss / backend.constant(batch_size)
def call(self, x): return K.less_equal(x, K.constant(0))
def call(self, x): a, b = x return K.less_equal(a, b)
def softmax_activation(mem): """Softmax activation.""" return k.cast( k.less_equal(k.random_uniform(k.shape(mem)), k.softmax(mem)), k.floatx())