def loss_2nd(y_true, y_pred): b_ = K.ones_like(y_true) betas = K.ones_like(y_true) betas = tf.fill(tf.shape(betas), beta) b_ = tf.where(tf.not_equal(y_true, 0), betas, b_) x = K.square((y_true - y_pred) * b_) t = K.sum( x, axis=-1, ) return K.mean(t)
def fp_score(y_true, y_pred, threshold=0.1): fp_3d = K.concatenate([ K.cast(K.expand_dims(K.flatten(K.abs(y_true - K.ones_like(y_true)))), 'bool'), K.cast( K.expand_dims(K.flatten(K.greater(y_pred, K.constant(threshold)))), 'bool'), K.cast(K.ones_like(K.expand_dims(K.flatten(y_pred))), 'bool') ], axis=-1) fp = K.sum(K.cast(K.all(fp_3d, axis=1), 'int32')) return fp
def call(self, y): # Sanity Check if isinstance(y, list): raise ValueError('TSG layer has only 1 input') # y = tf_print(y, [y], message='{}: The unconstrained action is:'.format(y.name.split('/')[0]), summarize=-1) y = check_numerics(y, 'Problem with input y') # Calculate A.c Ac = tensordot(self.A_graph, self.c_graph, 1) # Calculate b - Ac bMinusAc = self.b_graph - Ac # Calculate y - c yMinusc = y - self.c_graph # Calculate A.(y - c) ADotyMinusc = K.sum((self.A_graph * expand_dims(yMinusc, -2)), axis=2) # Do elem-wise division intersection_points = bMinusAc / (ADotyMinusc + K.epsilon() ) # Do we need the K.epsilon()? # Enforce 0 <= intersection_points <= 1 because the point must lie between c and y greater_1 = K.greater(intersection_points, K.ones_like(intersection_points)) candidate_alpha = K.switch(greater_1, K.ones_like(intersection_points) + 1, intersection_points) less_0 = K.less(candidate_alpha, K.zeros_like(intersection_points)) candidate_alpha = K.switch(less_0, K.ones_like(intersection_points) + 1, candidate_alpha) # Find farthest intersection point from y to get projection point alpha = K.min(candidate_alpha, axis=-1, keepdims=True) # If it is an interior point, y itself is the projection point interior_point = K.greater(alpha, K.ones_like(alpha)) alpha = K.switch(interior_point, K.ones_like(alpha), alpha) # alpha = tf_print(alpha, [alpha], message="{}: The value of alpha is: ".format(alpha.name.split('/')[0])) # Return \alpha.y + (1 - \alpha).c z = alpha * y + ((1 - alpha) * self.c_graph) # z = tf_print(z, [z], message='{}: The constrained action is:'.format(z.name.split('/')[0]), summarize=-1) return z
def weighted_bce_dice_loss(y_true, y_pred): y_true = K.cast(y_true, 'float32') y_pred = K.cast(y_pred, 'float32') # if we want to get same size of output, kernel size must be odd number if K.int_shape(y_pred)[1] == 128: kernel_size = 11 elif K.int_shape(y_pred)[1] == 256: kernel_size = 21 elif K.int_shape(y_pred)[1] == 512: kernel_size = 21 elif K.int_shape(y_pred)[1] == 1024: kernel_size = 41 else: raise ValueError('Unexpected image size') averaged_mask = K.pool2d(y_true, pool_size=(kernel_size, kernel_size), strides=(1, 1), padding='same', pool_mode='avg') border = K.cast(K.greater(averaged_mask, 0.005), 'float32') * K.cast( K.less(averaged_mask, 0.995), 'float32') weight = K.ones_like(averaged_mask) w0 = K.sum(weight) weight += border * 2 w1 = K.sum(weight) weight *= (w0 / w1) loss = weighted_bce_loss(y_true, y_pred, weight) + ( 1 - weighted_dice_coeff(y_true, y_pred, weight)) return loss
def build_predictor(self, predict_activation=None): """ Construct the predictor network from the list of layers After the last layer in self.predictorLayers_, a final Dense layer is added that with self.predDim_ units (i.e. outputs the prediction) Args: predict_activation: activation function for the final dense layer """ if len(self.predictorLayers_) == 0: raise ValueError("Must add at least one predictor hidden layer") pred_in = self._build_decoder_inputs() h = self._edit_decoder_inputs(pred_in) for hid in self.predictorLayers_: h = hid(h) y_pred = Dense(units=self.predDim_, activation=predict_activation)(h) log_var_y = Dense(self.predDim_, name='log_var_y')(h) if not self.learnUncertainty_: log_var_y = Lambda(lambda lv: 0 * lv + K.ones_like(lv) * K.log(K.variable(self.predVar_)))(log_var_y) self.predictor_ = Model(inputs=pred_in, outputs=[y_pred, log_var_y], name='predictor')
def focal(y_true, y_pred, alpha=0.25, gamma=2.0, axis=None): """Compute the focal loss given the target tensor and the predicted tensor. As defined in https://arxiv.org/abs/1708.02002 Args: y_true: Tensor of target data with shape (B, N, num_classes). y_pred: Tensor of predicted data with shape (B, N, num_classes). alpha: Scale the focal weight with alpha. gamma: Take the power of the focal weight with gamma. Returns: The focal loss of y_pred w.r.t. y_true. """ if axis is None: axis = 1 if K.image_data_format( ) == 'channels_first' else K.ndim(y_pred) - 1 # compute the focal loss alpha_factor = K.ones_like(y_true) * alpha alpha_factor = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor) focal_weight = tf.where(K.equal(y_true, 1), 1 - y_pred, y_pred) focal_weight = alpha_factor * focal_weight**gamma cls_loss = focal_weight * K.binary_crossentropy(y_true, y_pred) return K.sum(cls_loss, axis=axis)
def call(self, inputs, output_shape=None): updates, mask = inputs[0], inputs[1] mask = tf.cast(mask, 'int32') input_shape = tf.shape(updates, out_type='int32') # calculation new shape if output_shape is None: output_shape = (input_shape[0], input_shape[1] * self.size[0], input_shape[2] * self.size[1], input_shape[3]) # calculation indices for batch, height, width and feature maps one_like_mask = K.ones_like(mask, dtype='int32') batch_shape = K.concatenate([[input_shape[0]], [1], [1], [1]], axis=0) batch_range = K.reshape(tf.range(output_shape[0], dtype='int32'), shape=batch_shape) b = one_like_mask * batch_range y = mask // (output_shape[2] * output_shape[3]) x = (mask // output_shape[3]) % output_shape[2] feature_range = tf.range(output_shape[3], dtype='int32') f = one_like_mask * feature_range # transpose indices & reshape update values to one dimension updates_size = tf.size(updates) indices = K.transpose( K.reshape(K.stack([b, y, x, f]), [4, updates_size])) values = K.reshape(updates, [updates_size]) ret = tf.scatter_nd(indices, values, output_shape) return ret
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 # Applies bounds on actual learning rate step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) final_lr = self.final_lr * lr / self.base_lr lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.)) upper_bound = final_lr * (1. + 1. / (self.gamma * t)) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsbound: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): # apply weight decay if self.weight_decay != 0.: g += self.weight_decay * K.stop_gradient(p) m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsbound: vhat_t = K.maximum(vhat, v_t) denom = (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: denom = (K.sqrt(v_t) + self.epsilon) # Compute the bounds step_size_p = step_size * K.ones_like(denom) step_size_p_bound = step_size_p / denom bounded_lr_t = m_t * K.minimum( K.maximum(step_size_p_bound, lower_bound), upper_bound) p_t = p - bounded_lr_t self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def focal_loss(y_true, y_pred): # Define espislon so that the backpropagation will not result int NaN # for 0 divisor case epsilon = K.epsilon() # Add the epsilon to prediction value # y_pred = y_pred + epsilon # Clip the prediction value y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon) alpha_factor = K.ones_like(y_true) * alpha # Calculate p_t p_t = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor) # Calculate alpha_t alpha_t = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor) # Calculate cross entropy cross_entropy = -K.log(p_t) weight = alpha_t * K.pow((1 - p_t), gamma) # Calculate focal loss loss = weight * cross_entropy # Sum the losses in mini_batch loss = K.sum(loss, axis=1) return loss
def call(self, inputs, mask=None, training=None): inputs, relatives, memories, bias_context, bias_relative = inputs full = K.concatenate([memories, inputs], axis=1) # (batch, prev_len + seq_len, units) w_q = K.dot(inputs, self.kernel_q) # (batch, seq_len, units) w_kv = K.dot(full, self.kernel_kv) # (batch, prev_len + seq_len, units * 2) w_r = K.dot(relatives, self.kernel_r) # (batch, prev_len + seq_len, units) if self.use_bias: w_q = K.bias_add(w_q, self.bias_q) w_kv = K.bias_add(w_kv, self.bias_kv) w_r = K.bias_add(w_r, self.bias_r) if self.activation is not None: w_q = self.activation(w_q) w_kv = self.activation(w_kv) w_r = self.activation(w_r) w_k = w_kv[:, :, :self.units] # (batch, prev_len + seq_len, units) w_v = w_kv[:, :, self.units:] # (batch, prev_len + seq_len, units) w_qc = K.bias_add(w_q, bias_context) w_qc = self._reshape_to_batches(w_qc) # (batch * n_head, seq_len, units_head) w_k = self._reshape_to_batches(w_k) # (batch * n_head, prev_len + seq_len, units_head) a_context = K.batch_dot(w_qc, w_k, axes=2) # (batch * n_head, seq_len, prev_len + seq_len) w_qr = K.bias_add(w_q, bias_relative) w_qr = self._reshape_to_batches(w_qr) # (batch * n_head, seq_len, units_head) w_r = self._reshape_to_batches(w_r) # (batch * n_head, prev_len + seq_len, units_head) a_relative = K.batch_dot(w_qr, w_r, axes=2) # (batch * n_head, seq_len, prev_len + seq_len) a_relative = self._relative_shift(a_relative) # (batch * n_head, seq_len, prev_len + seq_len) att = (a_context + a_relative) / K.sqrt(K.constant(self.units_head, dtype=K.floatx())) exp = K.exp(att - K.max(att, axis=-1, keepdims=True)) q_len, k_len = K.shape(w_q)[1], K.shape(w_k)[1] indices = K.expand_dims(K.arange(0, k_len), axis=0) upper = K.expand_dims(K.arange(k_len - q_len, k_len), axis=-1) exp *= K.expand_dims(K.cast(indices <= upper, K.floatx()), axis=0) if mask is not None and mask[0] is not None: mask = K.cast(mask[0], K.floatx()) mask = K.concatenate([K.ones_like(memories[:, :, 0]), mask], axis=1) exp *= K.expand_dims(self._reshape_mask(mask), axis=1) att = exp / K.sum(exp, axis=-1, keepdims=True) if self.att_drop_layer is not None: att = self.att_drop_layer(att, training=training) w_v = self._reshape_to_batches(w_v) # (batch * n_head, prev_len + seq_len, units_head) w_o = K.batch_dot(att, w_v) # (batch * n_head, seq_len, units_head) w_o = self._reshape_from_batches(w_o) # (batch, seq_len, units) w_o = K.dot(w_o, self.kernel_o) # (batch, seq_len, units) if self.use_bias: w_o = K.bias_add(w_o, self.bias_o) if self.activation is not None: w_o = self.activation(w_o) # Add shape information to tensor when using `tf.keras` input_shape = K.int_shape(inputs) if input_shape[1] is not None: w_o = K.reshape(w_o, (-1,) + input_shape[1:]) return w_o
def mycrossentropy(y_true, y_pred, e=0.1): loss1 = K.categorical_crossentropy(y_true, y_pred) loss2 = K.categorical_crossentropy( K.ones_like(y_pred) / nb_classes, y_pred) # K.ones_like(y_pred) / nb_classes return (1 - e) * loss1 + e * loss2
def inverse_root_via_eigenvalues(m): ev, v = tf.linalg.eigh(m) epsillon = 1e-8 # for numerical stability - clip ev = tf.where(ev > epsillon, x=ev, y=K.ones_like(ev)) v = tf.where(ev > epsillon, x=v, y=K.zeros_like(v)) u = v ev_inv_root = tf.math.reciprocal(tf.math.sqrt(ev)) res = tf.matmul(tf.matmul(u, tf.diag(ev_inv_root)), tf.transpose(v)) return res
def loss_2nd(y_true, y_pred): print(y_true) b_ = K.ones_like(y_true) b_[y_true != 0] = beta x = K.square((y_true - y_pred) * b_) t = K.sum( x, axis=-1, ) return K.mean(t)
def myCrossEntropy(y_true, y_pred, e=0.3): loss = K.sparse_categorical_crossentropy(y_true, y_pred) loss0 = K.sparse_categorical_crossentropy(K.zeros_like(y_true), y_pred) loss1 = K.sparse_categorical_crossentropy(K.ones_like(y_true), y_pred) loss2 = K.sparse_categorical_crossentropy(K.ones_like(y_true) * 2, y_pred) loss3 = K.sparse_categorical_crossentropy(K.ones_like(y_true) * 3, y_pred) loss4 = K.sparse_categorical_crossentropy(K.ones_like(y_true) * 4, y_pred) loss5 = K.sparse_categorical_crossentropy(K.ones_like(y_true) * 5, y_pred) loss6 = K.sparse_categorical_crossentropy(K.ones_like(y_true) * 6, y_pred) loss7 = K.sparse_categorical_crossentropy(K.ones_like(y_true) * 7, y_pred) loss8 = K.sparse_categorical_crossentropy(K.ones_like(y_true) * 8, y_pred) loss9 = K.sparse_categorical_crossentropy(K.ones_like(y_true) * 9, y_pred) return ((100.0 - 5.765 - 1.359 - 1.000 - 1.348 - 1.554 - 1.995 - 3.042 - 6.347 - 10.431 - 17.632) * loss + 5.765 * loss0 + 1.359 * loss1 + 1.000 * loss2 + 1.348 * loss3 + 1.553 * loss4 + 1.995 * loss5 + 3.042 * loss6 + 6.347 * loss7 + 10.421 * loss8 + 17.632 * loss9)
def YOLOEval(yolo_outputs, anchors, num_classes, image_shape, max_boxes=20, score_threshold=.6, iou_threshold=.5): '''Returns evaluated filtered boxes based on given input.''' num_layers = len(yolo_outputs) anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] for i in range(num_layers): _boxes, _box_scores = YOLOBoxesAndScores(yolo_outputs[i], anchors[anchor_mask[i]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_, scores_, classes_ = [], [], [] for i in range(num_classes): _class_boxes = tf.boolean_mask(boxes, mask[:, i]) _class_boxes_scores = tf.boolean_mask(box_scores[:, i], mask[:, i]) _nms_index = tf.image.non_max_suppression(_class_boxes, _class_boxes_scores, max_boxes_tensor, iou_threshold=iou_threshold) _class_boxes = K.gather(_class_boxes, _nms_index) _class_boxes_scores = K.gather(_class_boxes_scores, _nms_index) _classes = K.ones_like(_class_boxes_scores, dtype='int32') * i boxes_.append(_class_boxes) scores_.append(_class_boxes_scores) classes_.append(_classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def custom_loss(y_true, y_pred, loss_weights = loss_weights): # Verified zero_index = K.zeros_like(y_true[:, 0]) ones_index = K.ones_like(y_true[:, 0]) # Classifier labels = y_true[:, 0] class_preds = y_pred[:, 0] bi_crossentropy_loss = -labels * K.log(class_preds) - (1 - labels) * K.log(1 - class_preds) classify_valid_index = tf.where(K.less(y_true[:, 0], 0), zero_index, ones_index) classify_keep_num = K.cast(tf.cast(tf.reduce_sum(classify_valid_index), tf.float32) * SAMPLE_KEEP_RATIO, dtype = tf.int32) # For classification problem, only pick 70% of the valid samples. classify_loss_sum = bi_crossentropy_loss * tf.cast(classify_valid_index, bi_crossentropy_loss.dtype) classify_loss_sum_filtered, _ = tf.nn.top_k(classify_loss_sum, k = classify_keep_num) classify_loss = tf.where(K.equal(classify_keep_num, 0), tf.constant(0, dtype = tf.float32), K.mean(classify_loss_sum_filtered)) # Bounding box regressor rois = y_true[:, 1: 5] roi_preds = y_pred[:, 1: 5] roi_raw_mean_square_error = K.sum(K.square(rois - roi_preds), axis = 1) # mse # roi_raw_smooth_l1_loss = K.mean(tf.where(K.abs(rois - roi_preds) < 1, 0.5 * K.square(rois - roi_preds), K.abs(rois - roi_preds) - 0.5)) # L1 Smooth Loss roi_valid_index = tf.where(K.equal(K.abs(y_true[:, 0]), 1), ones_index, zero_index) roi_keep_num = K.cast(tf.reduce_sum(roi_valid_index), dtype = tf.int32) roi_valid_mean_square_error = roi_raw_mean_square_error * tf.cast(roi_valid_index, roi_raw_mean_square_error.dtype) roi_filtered_mean_square_error, _ = tf.nn.top_k(roi_valid_mean_square_error, k = roi_keep_num) roi_loss = tf.where(K.equal(roi_keep_num, 0), tf.constant(0, dtype = tf.float32), K.mean(roi_filtered_mean_square_error)) # roi_valid_smooth_l1_loss = roi_raw_smooth_l1_loss * roi_valid_index # roi_filtered_smooth_l1_loss, _ = tf.nn.top_k(roi_valid_smooth_l1_loss, k = roi_keep_num) # roi_loss = K.mean(roi_filtered_smooth_l1_loss) # Landmark regressor pts = y_true[:, 5: 17] pt_preds = y_pred[:, 5: 17] pts_raw_mean_square_error = K.sum(K.square(pts - pt_preds), axis = 1) # mse # pts_raw_smooth_l1_loss = K.mean(tf.where(K.abs(pts - pt_preds) < 1, 0.5 * K.square(pts - pt_preds), K.abs(pts - pt_preds) - 0.5)) # L1 Smooth Loss pts_valid_index = tf.where(K.equal(y_true[:, 0], -2), ones_index, zero_index) pts_keep_num = K.cast(tf.reduce_sum(pts_valid_index), dtype = tf.int32) pts_valid_mean_square_error = pts_raw_mean_square_error * tf.cast(pts_valid_index, tf.float32) pts_filtered_mean_square_error, _ = tf.nn.top_k(pts_valid_mean_square_error, k = pts_keep_num) pts_loss = tf.where(K.equal(pts_keep_num, 0), tf.constant(0, dtype = tf.float32), K.mean(pts_filtered_mean_square_error)) # pts_valid_smooth_l1_loss = pts_raw_smooth_l1_loss * pts_valid_index # pts_filtered_smooth_l1_loss, _ = tf.nn.top_k(pts_valid_smooth_l1_loss, k = pts_keep_num) # pts_loss = K.mean(pts_filtered_smooth_l1_loss) loss = classify_loss * loss_weights[0] + roi_loss * loss_weights[1] + pts_loss * loss_weights[2] return loss
def _generate_dropout_mask(self, inputs, training=None): if 0 < self.dropout < 1: ones = K.ones_like(K.squeeze(inputs[:, 0:1, :], axis=1)) def dropped_inputs(): return K.dropout(ones, self.dropout) self._dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] else: self._dropout_mask = None
def initialize_control_tensors(self, halting): """ Initializes constants and some step-tracking variables during the first call of the layer (since for the Universal Transformer all the following calls are supposed to be with inputs of identical shapes). """ self.zeros_like_halting = K.zeros_like(halting, name='zeros_like_halting') self.ones_like_halting = K.ones_like(halting, name='ones_like_halting') self.remainder = self.ones_like_halting self.active_steps = self.zeros_like_halting self.halt_budget = self.ones_like_halting - self.halt_epsilon
def yolo_eval( yolo_outputs, #通过nms生成相对大小的预测框 anchors, num_classes, image_shape, max_boxes=50, score_threshold=.6, iou_threshold=.5): """Evaluate YOLO model on given input and return filtered boxes.""" num_layers = len(yolo_outputs) anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [[ 3, 4, 5 ], [1, 2, 3]] # default setting input_shape = K.shape(yolo_outputs[0])[1:3] * 32 boxes = [] box_scores = [] for l in range(num_layers): _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape) boxes.append(_boxes) box_scores.append(_box_scores) boxes = K.concatenate(boxes, axis=0) box_scores = K.concatenate(box_scores, axis=0) mask = box_scores >= score_threshold max_boxes_tensor = K.constant(max_boxes, dtype='int32') boxes_ = [] scores_ = [] classes_ = [] for c in range(num_classes): # TODO: use keras backend instead of tf. class_boxes = tf.boolean_mask(boxes, mask[:, c]) class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) nms_index = tf.image.non_max_suppression(class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) class_boxes = K.gather(class_boxes, nms_index) class_box_scores = K.gather(class_box_scores, nms_index) classes = K.ones_like(class_box_scores, 'int32') * c boxes_.append(class_boxes) scores_.append(class_box_scores) classes_.append(classes) boxes_ = K.concatenate(boxes_, axis=0) scores_ = K.concatenate(scores_, axis=0) classes_ = K.concatenate(classes_, axis=0) return boxes_, scores_, classes_
def _time_distributed_dense(x, w, b=None, dropout=None, input_dim=None, output_dim=None, timesteps=None, training=None): """Apply `y . w + b` for every temporal slice y of x. # Arguments x: input tensor. w: weight matrix. b: optional bias vector. dropout: wether to apply dropout (same dropout mask for every temporal slice of the input). input_dim: integer; optional dimensionality of the input. output_dim: integer; optional dimensionality of the output. timesteps: integer; optional number of timesteps. training: training phase tensor or boolean. # Returns Output tensor. """ if not input_dim: input_dim = K.shape(x)[2] if not timesteps: timesteps = K.shape(x)[1] if not output_dim: output_dim = K.int_shape(w)[1] if dropout is not None and 0. < dropout < 1.: # apply the same dropout pattern at every timestep ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) dropout_matrix = K.dropout(ones, dropout) expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) # collapse time dimension and batch dimension together x = K.reshape(x, (-1, input_dim)) x = K.dot(x, w) if b is not None: x = K.bias_add(x, b) # reshape to 3D tensor if K.backend() == 'tensorflow': x = K.reshape(x, K.stack([-1, timesteps, output_dim])) x.set_shape([None, None, output_dim]) else: x = K.reshape(x, (-1, timesteps, output_dim)) return x
def _generate_recurrent_dropout_mask(self, inputs, training=None): if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) def dropped_inputs(): return K.dropout(ones, self.dropout) self._recurrent_dropout_mask = [ K.in_train_phase(dropped_inputs, ones, training=training) for _ in range(4) ] else: self._recurrent_dropout_mask = None
def call(self, x, **kwargs): if (self.size is None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) batch_size, seq_len = K.shape(x)[0], K.shape(x)[1] position_j = 1. / K.pow( 10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size) position_j = K.expand_dims(position_j, 0) # K.arange不支持变长,只好用这种方法生成 position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) position_ij = K.concatenate( [K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], 2)
def call(self, x, mask=None): if (self.size == None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) position_j = 1. / \ K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size) position_j = K.expand_dims(position_j, 0) position_i = tf.cumsum(K.ones_like(x[:, :, 0]), 1) - 1 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) outputs = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': if self.scale: outputs = outputs * self.size**0.5 return x + outputs elif self.mode == 'concat': return K.concatenate([outputs, x], 2)
def contingency_table(y, z): """Note: if y and z are not rounded to 0 or 1, they are ignored """ y = K.cast(K.round(y), K.floatx()) z = K.cast(K.round(z), K.floatx()) def count_matches(y, z): return K.sum(K.cast(y, K.floatx()) * K.cast(z, K.floatx())) ones = K.ones_like(y) zeros = K.zeros_like(y) y_ones = K.equal(y, ones) y_zeros = K.equal(y, zeros) z_ones = K.equal(z, ones) z_zeros = K.equal(z, zeros) tp = count_matches(y_ones, z_ones) tn = count_matches(y_zeros, z_zeros) fp = count_matches(y_zeros, z_ones) fn = count_matches(y_ones, z_zeros) return (tp, tn, fp, fn)
def precision(y_true, y_pred): y_true = K.ones_like(y_true) true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision
def recall(y_true, y_pred): y_true = K.ones_like(y_true) true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) all_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (all_positives + K.epsilon()) return recall
def call(self, inputs, states, training=None): if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask( K.ones_like(inputs), self.dropout, training=training, count=4) if (0 < self.recurrent_dropout < 1 and self._recurrent_dropout_mask is None): self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(states[1]), self.recurrent_dropout, training=training, count=4) # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 x_i = self.input_conv(inputs_i, self.kernel_i, self.bias_i, padding=self.padding) x_f = self.input_conv(inputs_f, self.kernel_f, self.bias_f, padding=self.padding) x_c = self.input_conv(inputs_c, self.kernel_c, self.bias_c, padding=self.padding) x_o = self.input_conv(inputs_o, self.kernel_o, self.bias_o, padding=self.padding) h_i = self.recurrent_conv(h_tm1_i, self.recurrent_kernel_i) h_f = self.recurrent_conv(h_tm1_f, self.recurrent_kernel_f) h_c = self.recurrent_conv(h_tm1_c, self.recurrent_kernel_c) h_o = self.recurrent_conv(h_tm1_o, self.recurrent_kernel_o) i = self.recurrent_activation(x_i + h_i) f = self.recurrent_activation(x_f + h_f) c = f * c_tm1 + i * self.activation(x_c + h_c) o = self.recurrent_activation(x_o + h_o) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]
def call(self, inputs, states, training=None): # assert isinstance(inputs, list) inputs_x, input_t = inputs if 0 < self.dropout < 1 and self._dropout_mask is None: self._dropout_mask = _generate_dropout_mask(K.ones_like(inputs_x), self.dropout, training=training, count=4) if 0 < self.recurrent_dropout < 1 and self._dropout_mask is None: self._recurrent_dropout_mask = _generate_dropout_mask( K.ones_like(states[0]), self.recurrent_dropout, training=training, count=4) dp_mask = self._dropout_mask rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # h_(t-1) c_tm1 = states[1] # c_(t-1) if 0 < self.dropout < 1: inputs_i = inputs_x * dp_mask[0] inputs_f = inputs_x * dp_mask[1] inputs_c = inputs_x * dp_mask[2] inputs_o = inputs_x * dp_mask[3] inputs_t = inputs_x * dp_mask[4] else: inputs_i = inputs_x inputs_f = inputs_x inputs_c = inputs_x inputs_o = inputs_x inputs_t = inputs_x # x相关的所有数据 x_i = K.dot(inputs_i, self.kernel_i) x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) x_o = K.dot(inputs_o, self.kernel_o) x_t = K.dot(inputs_t, self.kernel_t) if self.use_bias: x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) x_t = K.bias_add(x_t, self.bias_t) if 0 < self.recurrent_dropout < 1: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 # 计算各个门控单元的过程 i = self.recurrent_activation(x_i + K.dot(h_tm1_i, self.recurrent_kernel_i)) f = self.recurrent_activation(x_f + K.dot(h_tm1_f, self.recurrent_kernel_f)) t = self.recurrent_activation( x_t + self.recurrent_activation(K.dot(input_t, self.kernel_time_t))) c = f * c_tm1 + i * self.activation( x_c + K.dot(h_tm1_c, self.recurrent_kernal_c)) * t o = self.recurrent_activation(x_o + K.dot(h_tm1_o, self.recurrent_kernel_o) + K.dot(input_t, self.kernel_time_o)) h = self.activation(c) * o if 0 < self.dropout + self.recurrent_dropout: if training is None: h._use_learning_phase = True return h, [h, c]
def _attend_over_memory(self, inputs, count, ns, position, memory, ws): inputs = K.dot(inputs, ws["input_kernel"]) inputs = K.bias_add(inputs, ws["input_bias"]) rp_out, rp_neighbor1, rp_neighbor2 = self._make_relative_position_table( count, position) rpe_out = self._make_relative_position_embedding(rp_out, ws) rpe_neighbor1 = self._make_relative_position_embedding( rp_neighbor1, ws) rpe_neighbor2 = self._make_relative_position_embedding( rp_neighbor2, ws) memory_plus_inputs = K.concatenate( [memory, K.expand_dims(inputs, axis=1)], axis=1) ns_plus_one = K.concatenate([ns, K.ones_like(count)]) position_plus_count = K.concatenate([position, count]) context_layer, neighbor_score = self._attention_layer( memory_plus_inputs, ns_plus_one, rpe_out, rpe_neighbor1, rpe_neighbor2, ws) beta1, beta2 = array_ops.split(ws["layer_norm_beta"], 2, axis=0) mlp_b1, mlp_b2 = array_ops.split(ws["mlp_bias"], 2, axis=0) context_layer = inputs + context_layer context_layer = K.l2_normalize( context_layer - K.mean(context_layer, axis=-1, keepdims=True), axis=-1) context_layer = context_layer * ws["layer_norm_gamma"][:, :self.units] context_layer = K.bias_add(context_layer, beta1) mlp_layer = K.dot(context_layer, ws["mlp_kernel"][:, :self.units]) mlp_layer = K.bias_add(mlp_layer, mlp_b1) mlp_layer = self.mlp_activation(mlp_layer) mlp_layer = K.dot(mlp_layer, ws["mlp_kernel"][:, self.units:]) mlp_layer = K.bias_add(mlp_layer, mlp_b2) context_layer = context_layer + mlp_layer context_layer = K.l2_normalize( context_layer - K.mean(context_layer, axis=-1, keepdims=True), axis=-1) context_layer = context_layer * ws["layer_norm_gamma"][:, self.units:] context_layer = K.bias_add(context_layer, beta2) # inductive bias for neighbor score, which encourage old memories being compressed neighbor_score += K.exp((count - position) / 10000.0) cpos = K.expand_dims(K.cast(K.argmax(neighbor_score), tf.float32)) un_memory_plus_inputs = memory_plus_inputs * K.expand_dims(ns_plus_one) un_position_plus_count = position_plus_count * ns_plus_one range = ws["range"] left = K.cast(range <= cpos, tf.float32) right = K.cast(range >= cpos, tf.float32) next_ns = ns_plus_one[:, :-1] * left + ns_plus_one[:, 1:] * right next_position = (un_position_plus_count[:, :-1] * left + un_position_plus_count[:, 1:] * right) / K.maximum( 1.0, next_ns) next_memory = (un_memory_plus_inputs[:, :-1, :] * K.expand_dims(left) + un_memory_plus_inputs[:, 1:, :] * K.expand_dims(right) ) / K.maximum(1.0, K.expand_dims(next_ns)) return context_layer, next_ns, next_position, next_memory
def variable_repeat(x): # matrix with ones, shaped as (batch, steps, 1) step_matrix = K.ones_like(x[0][:, :, :1]) # latent vars, shaped as (batch, 1, latent_dim) latent_matrix = K.expand_dims(x[1], axis=1) return K.batch_dot(step_matrix, latent_matrix)