def call(self, inputs): """ Applies the layer. Args: inputs (list): a list of 3 input tensors that includes node features (size 1 x N x F), output indices (size 1 x M) graph adjacency matrix (size N x N), where N is the number of nodes in the graph, and F is the dimensionality of node features. Returns: Keras Tensor that represents the output of the layer. """ features, *As = inputs batch_dim, n_nodes, _ = K.int_shape(features) if batch_dim != 1: raise ValueError( "Currently full-batch methods only support a batch dimension of one" ) # Remove singleton batch dimension features = K.squeeze(features, 0) # Calculate the layer operation of GCN A = As[0] h_graph = K.dot(A, features) output = K.dot(h_graph, self.kernel) # Add optional bias & apply activation if self.bias is not None: output += self.bias output = self.activation(output) return output
def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor: """ Return the Gradient Magnitude Similarity Deviation Loss. Parameters ---------- y_true: :class:`tf.Tensor` The ground truth value y_pred: :class:`tf.Tensor` The predicted value Returns ------- :class:`tf.Tensor` The loss value """ true_edge = self._scharr_edges(y_true, True) pred_edge = self._scharr_edges(y_pred, True) ephsilon = 0.0025 upper = 2.0 * true_edge * pred_edge lower = K.square(true_edge) + K.square(pred_edge) gms = (upper + ephsilon) / (lower + ephsilon) gmsd = K.std(gms, axis=(1, 2, 3), keepdims=True) gmsd = K.squeeze(gmsd, axis=-1) return gmsd
def test_RelationalGraphConvolution_dense(): G, features = create_graph_features() n_edge_types = len(G.edge_types) # We need to specify the batch shape as one for the GraphConvolutional logic to work n_nodes = features.shape[0] n_feat = features.shape[1] # Inputs for features & target indices x_t = Input(batch_shape=(1, n_nodes, n_feat)) out_indices_t = Input(batch_shape=(1, None), dtype="int32") # Create inputs for sparse or dense matrices # Placeholders for the sparse adjacency matrix A_placeholders = [ Input(batch_shape=(1, n_nodes, n_nodes)) for _ in range(n_edge_types) ] A_in = [Lambda(lambda A: K.squeeze(A, 0))(A_p) for A_p in A_placeholders] x_inp_model = [x_t] + A_placeholders x_inp_conv = [x_t] + A_in out = RelationalGraphConvolution( 2, num_relationships=n_edge_types)(x_inp_conv) As = [np.expand_dims(A.todense(), 0) for A in get_As(G)] out_indices = np.array([[0, 1]], dtype="int32") x = features[None, :, :] model = keras.Model(inputs=x_inp_model, outputs=out) preds = model.predict([x] + As, batch_size=1) assert preds.shape == (1, 3, 2)
def viterbi_decoding(self, X, mask=None): input_energy = self.activation(K.dot(X, self.kernel) + self.bias) if self.use_boundary: input_energy = self.add_boundary_energy(input_energy, mask, self.left_boundary, self.right_boundary) argmin_tables = self.recursion(input_energy, mask, return_logZ=False) argmin_tables = K.cast(argmin_tables, 'int32') # backward to find best path, `initial_best_idx` can be any, # as all elements in the last argmin_table are the same argmin_tables = K.reverse(argmin_tables, 1) # matrix instead of vector is required by tf `K.rnn` initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] def gather_each_row(params, indices): n = K.shape(indices)[0] import tensorflow as tf indices = K.transpose(K.stack([tf.range(n), indices])) return tf.gather_nd(params, indices) def find_path(argmin_table, best_idx): next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) next_best_idx = K.expand_dims(next_best_idx) return next_best_idx, [next_best_idx] _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, input_length=K.int_shape(X)[1], unroll=self.unroll) best_paths = K.reverse(best_paths, 1) best_paths = K.squeeze(best_paths, 2) return K.one_hot(best_paths, self.units)
def mean_iou(y_true, y_pred): y_pred = K.cast(K.greater(y_pred, .5), dtype='float32') # .5 is the threshold inter = K.sum(K.sum(K.squeeze(y_true * y_pred, axis=3), axis=2), axis=1) union = K.sum(K.sum(K.squeeze(y_true + y_pred, axis=3), axis=2), axis=1) - inter return K.mean((inter + K.epsilon()) / (union + K.epsilon()))
def attRNN(): sr = 8000 inputs = Input((8000, 1), name='input') x = Reshape((1, -1))(inputs) m = Melspectrogram(n_dft=1024, n_hop=128, input_shape=(1, 8000), padding='same', sr=sr, n_mels=80, fmin=40.0, fmax=sr / 2, power_melgram=1.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False, name='mel_stft') m.trainable = False x = m(x) x = Normalization2D(int_axis=0, name='mel_stft_norm')(x) # note that Melspectrogram puts the sequence in shape (batch_size, melDim, timeSteps, 1) # we would rather have it the other way around for LSTMs x = Permute((2, 1, 3))(x) x = Conv2D(10, (5, 1), activation='relu', padding='same')(x) x = BatchNormalization()(x) x = Conv2D(1, (5, 1), activation='relu', padding='same')(x) x = BatchNormalization()(x) # x = Reshape((125, 80)) (x) # keras.backend.squeeze(x, axis) x = Lambda(lambda q: K.squeeze(q, -1), name='squeeze_last_dim')(x) x = Bidirectional(LSTM(64, return_sequences=True))( x) # [b_s, seq_len, vec_dim] x = Bidirectional(LSTM(64, return_sequences=True))( x) # [b_s, seq_len, vec_dim] xFirst = Lambda(lambda q: q[:, -1])(x) # [b_s, vec_dim] query = Dense(128)(xFirst) # dot product attention attScores = Dot(axes=[1, 2])([query, x]) attScores = Softmax(name='attSoftmax')(attScores) # [b_s, seq_len] # rescale sequence attVector = Dot(axes=[1, 1])([attScores, x]) # [b_s, vec_dim] x = Dense(64, activation='relu')(attVector) x = Dense(32)(x) output = Dense(9, activation='softmax', name='output')(x) model = Model(inputs=[inputs], outputs=[output]) model.compile(optimizer='adam', loss=['sparse_categorical_crossentropy'], metrics=['sparse_categorical_accuracy']) model.summary() return model
def _backward_step(gamma_t, states): y_tm1 = K.squeeze(states[0], 0) y_t = batch_gather(gamma_t, y_tm1) return y_t, [K.expand_dims(y_t, 0)]
def yolo2_loss(args, anchors, num_classes, label_smoothing=0, use_crossentropy_loss=False, use_crossentropy_obj_loss=False, rescore_confidence=False, use_giou_loss=False, use_diou_loss=False): """YOLOv2 loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. y_true : array output of preprocess_true_boxes, with shape [conv_height, conv_width, num_anchors, 6] anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. Returns ------- total_loss : float total mean YOLOv2 loss across minibatch """ (yolo_output, y_true) = args num_anchors = len(anchors) yolo_output_shape = K.shape(yolo_output) input_shape = K.cast(yolo_output_shape[1:3] * 32, K.dtype(y_true)) grid_shape = K.cast(yolo_output_shape[1:3], K.dtype(y_true)) # height, width batch_size_f = K.cast(yolo_output_shape[0], K.dtype(yolo_output)) # batch size, float tensor object_scale = 5 no_object_scale = 1 class_scale = 1 location_scale = 1 grid, raw_pred, pred_xy, pred_wh = yolo2_head(yolo_output, anchors, num_classes, input_shape, calc_loss=True) pred_confidence = K.sigmoid(raw_pred[..., 4:5]) pred_class_prob = K.softmax(raw_pred[..., 5:]) object_mask = y_true[..., 4:5] # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_boxes = K.concatenate([pred_xy, pred_wh]) pred_boxes = K.expand_dims(pred_boxes, 4) raw_true_boxes = y_true[..., 0:4] raw_true_boxes = K.expand_dims(raw_true_boxes, 4) iou_scores = box_iou(pred_boxes, raw_true_boxes) iou_scores = K.squeeze(iou_scores, axis=0) # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # Determine confidence weights from object and no_object weights. # NOTE: YOLOv2 does not use binary cross-entropy. Here we try it. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - object_mask)) if use_crossentropy_obj_loss: no_objects_loss = no_object_weights * K.binary_crossentropy( K.zeros(K.shape(pred_confidence)), pred_confidence, from_logits=False) if rescore_confidence: objects_loss = (object_scale * object_mask * K.binary_crossentropy( best_ious, pred_confidence, from_logits=False)) else: objects_loss = ( object_scale * object_mask * K.binary_crossentropy(K.ones(K.shape(pred_confidence)), pred_confidence, from_logits=False)) else: no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * object_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * object_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLOv2 does not use categorical cross-entropy loss. # Here we try it. matching_classes = K.cast(y_true[..., 5], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) if label_smoothing: matching_classes = _smooth_labels(matching_classes, label_smoothing) if use_crossentropy_loss: classification_loss = ( class_scale * object_mask * K.expand_dims(K.categorical_crossentropy( matching_classes, pred_class_prob, from_logits=False), axis=-1)) else: classification_loss = (class_scale * object_mask * K.square(matching_classes - pred_class_prob)) if use_giou_loss: # Calculate GIoU loss as location loss giou = box_giou(pred_boxes, raw_true_boxes) giou = K.squeeze(giou, axis=-1) giou_loss = location_scale * object_mask * (1 - giou) location_loss = giou_loss elif use_diou_loss: # Calculate DIoU loss as location loss diou = box_diou(pred_boxes, raw_true_boxes) diou = K.squeeze(diou, axis=-1) diou_loss = location_scale * object_mask * (1 - diou) location_loss = diou_loss else: # YOLOv2 location loss for matching detection boxes. # Darknet trans box to calculate loss. trans_true_xy = y_true[..., :2] * grid_shape[..., ::-1] - grid trans_true_wh = K.log(y_true[..., 2:4] / anchors * input_shape[..., ::-1]) trans_true_wh = K.switch( object_mask, trans_true_wh, K.zeros_like(trans_true_wh)) # avoid log(0)=-inf trans_true_boxes = K.concatenate([trans_true_xy, trans_true_wh]) # Unadjusted box predictions for loss. trans_pred_boxes = K.concatenate( (K.sigmoid(raw_pred[..., 0:2]), raw_pred[..., 2:4]), axis=-1) location_loss = (location_scale * object_mask * K.square(trans_true_boxes - trans_pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) / batch_size_f classification_loss_sum = K.sum(classification_loss) / batch_size_f location_loss_sum = K.sum(location_loss) / batch_size_f total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + location_loss_sum) # Fit for tf 2.0.0 loss shape total_loss = K.expand_dims(total_loss, axis=-1) return total_loss, location_loss_sum, confidence_loss_sum, classification_loss_sum
def call(self, inputs, **kwargs): """ Creates the layer as a Keras graph Notes: This does not add self loops to the adjacency matrix. Args: inputs (list): list of inputs with 4 items: node features (size b x N x F), sparse graph adjacency matrix (size N x N), where N is the number of nodes in the graph, F is the dimensionality of node features M is the number of output nodes """ X = inputs[0] # Node features (1 x N x F) A_sparse = inputs[1] # Adjacency matrix (1 x N x N) if not isinstance(A_sparse, tf.SparseTensor): raise TypeError("A is not sparse") # Get undirected graph edges (E x 2) A_indices = A_sparse.indices batch_dim, n_nodes, _ = K.int_shape(X) if batch_dim != 1: raise ValueError( "Currently full-batch methods only support a batch dimension of one" ) else: # Remove singleton batch dimension X = K.squeeze(X, 0) outputs = [] for head in range(self.attn_heads): kernel = self.kernels[head] # W in the paper (F x F') attention_kernel = self.attn_kernels[ head] # Attention kernel a in the paper (2F' x 1) # Compute inputs to attention network features = K.dot(X, kernel) # (N x F') # Compute feature combinations # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_j]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j] attn_for_self = K.dot( features, attention_kernel[0]) # (N x 1), [a_1]^T [Wh_i] attn_for_neighs = K.dot( features, attention_kernel[1]) # (N x 1), [a_2]^T [Wh_j] # Create sparse attention vector (All non-zero values of the matrix) sparse_attn_self = tf.gather(K.reshape(attn_for_self, [-1]), A_indices[:, 0], axis=0) sparse_attn_neighs = tf.gather(K.reshape(attn_for_neighs, [-1]), A_indices[:, 1], axis=0) attn_values = sparse_attn_self + sparse_attn_neighs # Add nonlinearity attn_values = LeakyReLU(alpha=0.2)(attn_values) # Apply dropout to features and attention coefficients dropout_feat = Dropout(self.in_dropout_rate)(features) # (N x F') dropout_attn = Dropout(self.attn_dropout_rate)( attn_values) # (N x N) # Convert to sparse matrix sparse_attn = tf.sparse.SparseTensor( A_indices, values=dropout_attn, dense_shape=[n_nodes, n_nodes]) # Apply softmax to get attention coefficients sparse_attn = tf.sparse.softmax( sparse_attn) # (N x N), Eq. 3 of the paper # Linear combination with neighbors' features [YT: see Eq. 4] node_features = tf.sparse.sparse_dense_matmul( sparse_attn, dropout_feat) # (N x F') if self.use_bias: node_features = K.bias_add(node_features, self.biases[head]) # Add output of attention head to final output outputs.append(node_features) # Aggregate the heads' output according to the reduction method if self.attn_heads_reduction == "concat": output = K.concatenate(outputs) # (N x KF') else: output = K.mean(K.stack(outputs), axis=0) # N x F') output = self.activation(output) # Add batch dimension back if we removed it if batch_dim == 1: output = K.expand_dims(output, 0) return output
def _init_models(self): # inputs X = keras.Input(name='X', shape=self.INPUT_SHAPE, dtype=tf.uint8) R = keras.Input(name='R', shape=(1, ), dtype=tf.float32) D = keras.Input(name='D', shape=(1, ), dtype=tf.bool) X_next = keras.Input(name='X_next', shape=self.INPUT_SHAPE, dtype=tf.uint8) A_next = keras.Input(name='A_next', shape=(1, ), dtype=tf.int32) def construct_x_p(X): # X.shape = (None, 105, 80, 3) x_prev1, x_prev0, x = tf.split(tf.cast(X, tf.float32) / 255., 3, 3) dx = x - x_prev0 # "velocity" ddx = (x - x_prev0) - (x_prev0 - x_prev1) # "acceleration" return tf.concat([x, dx, ddx], axis=3) # shape: (None, 105, 80, 3) # sequential model def layers(variable_scope): def v(name): return '{}/{}'.format(variable_scope, name) return [ keras.layers.Lambda(construct_x_p, name=v('construct_x_p')), keras.layers.Conv2D(name=v('conv1'), filters=16, kernel_size=8, strides=4, activation='relu'), keras.layers.Conv2D(name=v('conv2'), filters=32, kernel_size=4, strides=2, activation='relu'), keras.layers.Flatten(name=v('flatten')), keras.layers.Dense(name=v('dense1'), units=256, activation='relu'), keras.layers.Dense(name=v('outputs'), units=self.num_actions, kernel_initializer='zeros') ] # forward pass def forward_pass(X, variable_scope): Y = X for layer in layers(variable_scope): Y = layer(Y) return Y # predict Q = forward_pass(X, variable_scope='primary') # bootstrapped target bootstrap = K.squeeze(1 - tf.cast(D, tf.float32), axis=1) Q_next = forward_pass(X_next, variable_scope='target') R_flat = K.squeeze(R, axis=1) if self.update_strategy == 'q_learning': Q_next_proj = K.max(Q_next, axis=1) G = R_flat + bootstrap * self.gamma * Q_next_proj elif self.update_strategy == 'double_q_learning': Q_next_prim = forward_pass(X_next, variable_scope='primary') A_next_prim = tf.argmax(Q_next_prim, axis=1) Q_next_proj = MaskedLoss.project_onto_actions(Q_next, A_next_prim) G = R_flat + bootstrap * self.gamma * Q_next_proj elif self.update_strategy == 'sarsa': Q_next_proj = MaskedLoss.project_onto_actions(Q_next, A_next) G = R_flat + bootstrap * self.gamma * Q_next_proj else: raise ValueError( "bad update_strategy; valid options are: {}".format( self.UPDATE_STRATEGIES)) # models self.predict_model = keras.Model(inputs=X, outputs=Q) self.train_model = keras.Model(inputs=[X, R, D, X_next, A_next], outputs=Q) self.train_model.compile(loss=MaskedLoss(G, tf.losses.huber_loss), optimizer=keras.optimizers.Adam( self.learning_rate)) # op for syncing target model self._tau = tf.placeholder(tf.float32, shape=()) self._target_model_sync_op = tf.group(*(K.update( w_targ, w_targ + self._tau * (w_prim - w_targ) ) for w_prim, w_targ in zip( tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='primary'), tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target'))))
def din(item_count, cate_count, deep_field_size, deep_feature_size, continuous_feature_size, emb_size, hidden_units=32, learning_rate=1, learning_rate_decay=0.01, drop_rate=0.2, deep_layers=(80, 40, 1)): """ DIN 可接收稠密特征、稀疏特征、item ids、user interest ids sequence :param item_count: 商品数 :param cate_count: 类别数 :param hidden_units: 隐藏单元数 :param deep_field_size :param deep_feature_size :param continuous_feature_size :param emb_size :param learning_rate :param learning_rate_decay :param drop_rate :param deep_layers :return: model """ initializer = tf.keras.initializers.VarianceScaling(scale=1.0, mode="fan_avg", distribution="normal") # !!! Input start !!! # 候选item target_item = keras.layers.Input(shape=(1, ), name='target_item', dtype="int32") # 候选item对应的所属类别 target_cate = keras.layers.Input(shape=(1, ), name='target_cate', dtype="int32") # user hist hist_item_seq = keras.layers.Input(shape=(None, ), name="hist_item_seq", dtype="int32") # user hist cate hist_cate_seq = keras.layers.Input(shape=(None, ), name="hist_cate_seq", dtype="int32") # hist length hist_len = keras.layers.Input(shape=(1, ), name='hist_len', dtype="int32") # deep feature index deep_feat_index = keras.layers.Input(shape=(deep_field_size, ), name="deep_feat_index") # deep feature value deep_feat_value = keras.layers.Input(shape=(deep_field_size, ), name="deep_feat_value") # continuous_feature continuous_feature_value = keras.layers.Input( shape=(continuous_feature_size, ), name="continuous_feature_value") # !!! Input end !!! # !!! deep part start !!! # libsvm 特征处理 embeddings = keras.layers.Embedding( deep_feature_size, emb_size, name='deep_feature_embedding', embeddings_initializer=tf.keras.initializers.VarianceScaling( scale=1.0, mode="fan_avg", distribution="normal"), embeddings_regularizer=tf.keras.regularizers.l2(0.01))(deep_feat_index) feat_value = keras.layers.Reshape( (deep_field_size, 1), name="deep_feat_value_reshape")(deep_feat_value) embeddings = keras.layers.Multiply(name="deep_feature_embedding_multiply")( [embeddings, feat_value]) deep_emb_dense = keras.layers.Reshape( (deep_field_size * emb_size, ), name="deep_emb_dense_reshape")(embeddings) # concat continuous feature & sparse features deep_dense = keras.layers.Lambda( lambda x: K.concatenate([x[0], x[1]], axis=-1))( [deep_emb_dense, continuous_feature_value]) deep_dense = keras.layers.Dense(hidden_units, activation="sigmoid", kernel_initializer=initializer, name="deep_dense")(deep_dense) deep_dense = keras.layers.BatchNormalization()(deep_dense) deep_dense = keras.layers.Dropout(rate=drop_rate)(deep_dense) # !!! deep part end !!! # !!! attention part start !!! item_emb = keras.layers.Embedding( input_dim=item_count, output_dim=hidden_units // 2, name="item_emb", embeddings_initializer=tf.keras.initializers.VarianceScaling( scale=1.0, mode="fan_avg", distribution="normal"), embeddings_regularizer=tf.keras.regularizers.l2(0.01)) cate_emb = keras.layers.Embedding( input_dim=cate_count, output_dim=hidden_units // 2, name="cate_emb", embeddings_initializer=tf.keras.initializers.VarianceScaling( scale=1.0, mode="fan_avg", distribution="normal"), embeddings_regularizer=tf.keras.regularizers.l2(0.01)) item_b = keras.layers.Embedding( input_dim=item_count, output_dim=1, name="item_bias", embeddings_initializer=keras.initializers.Constant(0.0), embeddings_regularizer=tf.keras.regularizers.l2(0.01)) # get target bias embedding target_item_bias_emb = item_b(target_item) target_item_bias_emb = keras.layers.Lambda(lambda x: K.squeeze(x, axis=1))( target_item_bias_emb) # get target embedding target_item_emb = item_emb(target_item) target_cate_emb = cate_emb(target_cate) i_emb = keras.layers.Lambda( lambda x: K.concatenate([x[0], x[1]], axis=-1))( [target_item_emb, target_cate_emb]) i_emb = keras.layers.Lambda(lambda x: K.squeeze(x, axis=1))(i_emb) # get history item embedding hist_item_emb = item_emb(hist_item_seq) hist_cate_emb = cate_emb(hist_cate_seq) hist_emb = keras.layers.Lambda( lambda x: K.concatenate([x[0], x[1]], axis=-1))( [hist_item_emb, hist_cate_emb]) # 构建点击序列与候选的attention关系 din_attention = attention([i_emb, hist_emb, hist_len]) din_attention = keras.layers.Lambda( lambda x: tf.reshape(x, [-1, hidden_units]))(din_attention) # !!! attention part end !!! # !!! concat feature dense start !!! din_item = keras.layers.Lambda( lambda x: K.concatenate([x[0], x[1], x[2]], axis=-1))( [i_emb, din_attention, deep_dense]) for i in range(0, len(deep_layers)): activation = None if i == len(deep_layers) - 1 else "sigmoid" din_item = keras.layers.Dense( deep_layers[i], kernel_initializer=initializer, kernel_regularizer=tf.keras.regularizers.l2(0.01), activation=activation, name="concat_dense_{0}".format(i))(din_item) din_item = keras.layers.BatchNormalization()(din_item) din_item = keras.layers.Dropout(rate=drop_rate)(din_item) # !!! concat feature dense end !!! logits = keras.layers.Add()([din_item, target_item_bias_emb]) output = keras.layers.Activation('sigmoid')(logits) model = keras.models.Model(inputs=[ hist_item_seq, hist_cate_seq, target_item, target_cate, hist_len, deep_feat_index, deep_feat_value, continuous_feature_value ], outputs=output, name="model") model.compile(optimizer=keras.optimizers.SGD(learning_rate=learning_rate, decay=learning_rate_decay), loss="binary_crossentropy") return model
conv_4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv_3) # pooling layer with kernel size (2,1) pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4) conv_5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool_4) # Batch normalization layer batch_norm_5 = BatchNormalization()(conv_5) conv_6 = Conv2D(512, (3, 3), activation='relu', padding='same')(batch_norm_5) batch_norm_6 = BatchNormalization()(conv_6) pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6) conv_7 = Conv2D(512, (2, 2), activation='relu')(pool_6) squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7) # bidirectional LSTM layers with units=128 blstm_1 = Bidirectional(LSTM(128, return_sequences=True, dropout=0.2))(squeezed) blstm_2 = Bidirectional(LSTM(128, return_sequences=True, dropout=0.2))(blstm_1) outputs = Dense(len(char_list) + 1, activation='softmax')(blstm_2) # model to be used at test time act_model = Model(inputs, outputs) act_model.load_weights('weights/crnn_model.h5') img = cv2.cvtColor(cv2.imread("t1.jpg"), cv2.COLOR_BGR2GRAY)
def similarity(self, context, query): e = context*query c = K.concatenate([context, query, e], axis=-1) dot = K.squeeze(K.dot(c, self.W), axis=-1) return keras.activations.linear(dot + self.b)
def squeeze_wrapper(self, tensor): return squeeze(tensor, axis=1)
def fpn_classifier_graph(rois, feature_maps, image_meta, pool_size, num_classes, train_bn=True, fc_layers_size=1024): """Builds the computation graph of the feature pyramid network classifier and regressor heads. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from different layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. num_classes: number of classes, which determines the depth of the results train_bn: Boolean. Train or freeze Batch Norm layers fc_layers_size: Size of the 2 FC layers Returns: logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax) probs: [batch, num_rois, NUM_CLASSES] classifier probabilities bbox_deltas: [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Deltas to apply to proposal boxes """ # ROI Pooling # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels] x = PyramidROIAlign([pool_size, pool_size], name="roi_align_classifier")([rois, image_meta] + feature_maps) # Two 1024 FC layers (implemented with Conv2D for consistency) x = TimeDistributed(Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"), name="mrcnn_class_conv1")(x) x = TimeDistributed(BatchNormalization(), name='mrcnn_class_bn1')(x, training=train_bn) x = Activation('relu')(x) x = TimeDistributed(Conv2D(fc_layers_size, (1, 1)), name="mrcnn_class_conv2")(x) x = TimeDistributed(BatchNormalization(), name='mrcnn_class_bn2')(x, training=train_bn) x = Activation('relu')(x) shared = Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), name="pool_squeeze")(x) # Classifier head mrcnn_class_logits = TimeDistributed(Dense(num_classes), name='mrcnn_class_logits')(shared) mrcnn_probs = TimeDistributed(Activation("softmax"), name="mrcnn_class")(mrcnn_class_logits) # BBox head # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))] x = TimeDistributed(Dense(num_classes * 4, activation='linear'), name='mrcnn_bbox_fc')(shared) # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] s = K.int_shape(x) # TODO: Reshape was -> (s[1], num_classes, 4) mrcnn_bbox = Reshape((-1, num_classes, 4), name="mrcnn_bbox")(x) return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
def from_transcoder_output_tavuk(x): tmp = x[:, :, tavuk_pos:tavuk_pos + 1, 10:13] tmp = K.squeeze(tmp, axis=2) return tmp
def GhostNet(input_shape=(224, 224, 3), include_top=True, classes=0, width=1.3, strides=2, name="GhostNet"): inputs = Input(shape=input_shape) out_channel = _make_divisible(16 * width, 4) nn = Conv2D(out_channel, (3, 3), strides=strides, padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER)( inputs) # padding=1 nn = BatchNormalization(axis=-1)(nn) nn = activation(nn) # nn = Conv2D(960, (1, 1), strides=(1, 1), padding='same', use_bias=False)(nn) dwkernels = [3, 3, 3, 5, 5, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5] exps = [ 16, 48, 72, 72, 120, 240, 200, 184, 184, 480, 672, 672, 960, 960, 960, 512 ] outs = [ 16, 24, 24, 40, 40, 80, 80, 80, 80, 112, 112, 160, 160, 160, 160, 160 ] use_ses = [ 0, 0, 0, 0.25, 0.25, 0, 0, 0, 0, 0.25, 0.25, 0.25, 0, 0.25, 0, 0.25 ] strides = [1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1] pre_out = out_channel for dwk, stride, exp, out, se in zip(dwkernels, strides, exps, outs, use_ses): out = _make_divisible(out * width, 4) exp = _make_divisible(exp * width, 4) shortcut = False if out == pre_out and stride == 1 else True nn = ghost_bottleneck(nn, dwk, stride, exp, out, se, shortcut) pre_out = out out = _make_divisible(exps[-1] * width, 4) nn = Conv2D(out, (1, 1), strides=(1, 1), padding='valid', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER)(nn) # padding=0 nn = BatchNormalization(axis=-1)(nn) nn = activation(nn) if include_top: nn = GlobalAveragePooling2D()(nn) nn = Reshape((1, 1, int(nn.shape[1])))(nn) nn = Conv2D(1280, (1, 1), strides=(1, 1), padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER)(nn) nn = BatchNormalization(axis=-1)(nn) nn = activation(nn) nn = Conv2D(classes, (1, 1), strides=(1, 1), padding='same', use_bias=False, kernel_initializer=CONV_KERNEL_INITIALIZER)(nn) nn = K.squeeze(nn, 1) nn = Activation('softmax')(nn) return Model(inputs=inputs, outputs=nn, name=name)
def call(self,x): et=K.squeeze(K.tanh(K.dot(x,self.W)+self.b),axis=-1) at=K.softmax(et) at=K.expand_dims(at,axis=-1) output=x*at return K.sum(output,axis=1)
def call(self, inputs): """ Creates the layer as a Keras graph. Note that the inputs are tensors with a batch dimension of 1: Keras requires this batch dimension, and for full-batch methods we only have a single "batch". There are two inputs required, the node features, and the graph adjacency matrix Notes: This does not add self loops to the adjacency matrix. Args: inputs (list): list of inputs with 3 items: node features (size 1 x N x F), graph adjacency matrix (size N x N), where N is the number of nodes in the graph, F is the dimensionality of node features M is the number of output nodes """ X = inputs[0] # Node features (1 x N x F) A = inputs[1] # Adjacency matrix (N x N) N = K.int_shape(A)[-1] batch_dim, n_nodes, _ = K.int_shape(X) if batch_dim != 1: raise ValueError( "Currently full-batch methods only support a batch dimension of one" ) else: # Remove singleton batch dimension X = K.squeeze(X, 0) outputs = [] for head in range(self.attn_heads): kernel = self.kernels[head] # W in the paper (F x F') attention_kernel = self.attn_kernels[ head] # Attention kernel a in the paper (2F' x 1) # Compute inputs to attention network features = K.dot(X, kernel) # (N x F') # Compute feature combinations # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j] attn_for_self = K.dot( features, attention_kernel[0]) # (N x 1), [a_1]^T [Wh_i] attn_for_neighs = K.dot( features, attention_kernel[1]) # (N x 1), [a_2]^T [Wh_j] # Attention head a(Wh_i, Wh_j) = a^T [[Wh_i], [Wh_j]] dense = attn_for_self + K.transpose( attn_for_neighs) # (N x N) via broadcasting # Add nonlinearity dense = LeakyReLU(alpha=0.2)(dense) # Mask values before activation (Vaswani et al., 2017) # YT: this only works for 'binary' A, not for 'weighted' A! # YT: if A does not have self-loops, the node itself will be masked, so A should have self-loops # YT: this is ensured by setting the diagonal elements of A tensor to 1 above if not self.saliency_map_support: mask = -10e9 * (1.0 - A) dense += mask dense = K.softmax(dense) # (N x N), Eq. 3 of the paper else: # dense = dense - tf.reduce_max(dense) # GAT with support for saliency calculations W = (self.delta * A ) * K.exp(dense - K.max(dense, axis=1, keepdims=True)) * ( 1 - self.non_exist_edge) + self.non_exist_edge * ( A + self.delta * (tf.ones((N, N)) - A) + tf.eye(N) ) * K.exp(dense - K.max(dense, axis=1, keepdims=True)) dense = W / K.sum(W, axis=1, keepdims=True) # Apply dropout to features and attention coefficients dropout_feat = Dropout(self.in_dropout_rate)(features) # (N x F') dropout_attn = Dropout(self.attn_dropout_rate)(dense) # (N x N) # Linear combination with neighbors' features [YT: see Eq. 4] node_features = K.dot(dropout_attn, dropout_feat) # (N x F') if self.use_bias: node_features = K.bias_add(node_features, self.biases[head]) # Add output of attention head to final output outputs.append(node_features) # Aggregate the heads' output according to the reduction method if self.attn_heads_reduction == "concat": output = K.concatenate(outputs) # (N x KF') else: output = K.mean(K.stack(outputs), axis=0) # N x F') # Nonlinear activation function output = self.activation(output) # Add batch dimension back if we removed it if batch_dim == 1: output = K.expand_dims(output, 0) return output
def line_lstm_ctc(input_shape, output_shape, window_width=28, window_stride=14, conv_dim=128, lstm_dim=128): image_height, image_width = input_shape output_length, num_classes = output_shape num_windows = int((image_width - window_width) / window_stride) + 1 if num_windows < output_length: raise ValueError(f'Window width/stride need to generate at least {output_length} windows (currently {num_windows})') image_input = Input(shape=input_shape, name='image') y_true = Input(shape=(output_length,), name='y_true') input_length = Input(shape=(1,), name='input_length') label_length = Input(shape=(1,), name='label_length') gpu_present = len(device_lib.list_local_devices()) > 1 lstm_fn = CuDNNLSTM if gpu_present else LSTM # Your code should use slide_window and extract image patches from image_input. # Pass a convolutional model over each image patch to generate a feature vector per window. # Pass these features through one or more LSTM layers. # Convert the lstm outputs to softmax outputs. # Note that lstms expect a input of shape (num_batch_size, num_timesteps, feature_length). ##### Your code below (Lab 3) image_reshaped = Reshape((image_height, image_width, 1))(image_input) # (image_height, image_width, 1) #image_patches = Lambda( # slide_window, # arguments={'window_width': window_width, 'window_stride': window_stride} #)(image_reshaped) # (num_windows, image_height, window_width, 1) # Slide a conv filter stack over the image in the horizontal direction conv = Conv2D(conv_dim, (image_height, window_width), (1, window_stride), activation='relu')(image_reshaped) # (1, num_windows, conv_dim) conv_squeezed = Lambda(lambda x: K.squeeze(x,1))(conv) # (num_windows, conv_dim) lstm_output = lstm_fn(lstm_dim, return_sequences=True)(conv_squeezed) # (num_windows, lstm_dim) # Make a LeNet and get rid of the last two layers (softmax and dropout) #convnet = lenet((image_height, window_width, 1), (num_classes,)) #convnet = KerasModel(inputs=convnet.inputs, outputs=convnet.layers[-2].output) #convnet_outputs = TimeDistributed(convnet)(image_patches) # (num_windows, lstm_dim) lstm_outpu2t = lstm_fn(lstm_dim, return_sequences=True)(lstm_output) # (num_windows, lstm_dim) softmax_output = Dense(num_classes, activation='softmax', name='softmax_output')(lstm_output2) # (num_windows, num_classes) ##### Your code above (Lab 3) input_length_processed = Lambda( lambda x, num_windows=None: x * num_windows, arguments={'num_windows': num_windows} )(input_length) ctc_loss_output = Lambda( lambda x: K.ctc_batch_cost(x[0], x[1], x[2], x[3]), name='ctc_loss' )([y_true, softmax_output, input_length_processed, label_length]) ctc_decoded_output = Lambda( lambda x: ctc_decode(x[0], x[1], output_length), name='ctc_decoded' )([softmax_output, input_length_processed]) model = KerasModel( inputs=[image_input, y_true, input_length, label_length], outputs=[ctc_loss_output, ctc_decoded_output] ) return model
def caps_batch_dot(x, y): x = K.expand_dims(x, 2) if K.int_shape(x)[3] is not None: y = K.permute_dimensions(y, (0, 1, 3, 2)) o = tf.matmul(x, y) return K.squeeze(o, 2)
def call(self, inputs, return_attention=False): query, key, value = inputs, inputs, inputs # get sizes data_length = query.shape[1] # embedding query = self.query_embedding( query) # (?, data_length, num_head * head_dim) key = self.key_embedding(key) # (?, data_length, num_head * head_dim) value = K.sigmoid( self.value_embedding(value)) # (?, data_length, num_head) multi_head_query = tf.concat( tf.split(query[None, ...], self.num_head, axis=3), axis=0) # (num_head, ?, data_length, head_dim) multi_head_key = tf.concat( tf.split(key[None, ...], self.num_head, axis=3), axis=0) # (num_head, ?, data_length, head_dim) multi_head_value = K.permute_dimensions( value, (2, 0, 1)) # (num_head, ?, data_length) # calculate distance attention attention = tf.matmul( multi_head_query + self.u_pe, multi_head_key, transpose_b=True) # (num_head, ?, data_length, data_length) # distance padding attention = tf.linalg.diag_part( attention, k=(-self.max_distance, self.max_distance)) # (num_head, ?, data_length, 2 * max_d + 1) attention = K.permute_dimensions(attention, (0, 1, 3, 2)) # transpose attention = K.reverse(attention, axes=(-2, -1)) # relative positional encoding smooth_distance_pe = tf.image.resize( self.distance_pe, [self.head_dim, 2 * self.max_distance + 1], 'bilinear') # (num_head, head_dim, 2 * max_d + 1, 1) smooth_distance_pe = K.squeeze( K.expand_dims(smooth_distance_pe, axis=1), axis=-1) # (num_head, 1, head_dim, 2 * max_d + 1) attention = attention + tf.matmul( multi_head_query + self.v_pe, smooth_distance_pe) # (num_head, ?, data_length, 2 * max_d + 1) attention = attention * (float(self.head_dim)**-0.5) attention = tf.keras.layers.Softmax()(attention) attention = attention * multi_head_value[..., None] if self.distance_norm: # (num_head, ?, data_length, 2 * max_d + 1) -> (?, num_head * data_length, 2 * max_d + 1) attention = K.permute_dimensions(attention, (1, 0, 2, 3)) attention = K.reshape( attention, (-1, self.num_head * data_length, 2 * self.max_distance + 1)) attention = DistanceNorm()(attention) # (?, num_head * data_length, 2 * max_d + 1) -> (num_head, ?, data_length, 2 * max_d + 1) attention = K.reshape( attention, (-1, self.num_head, data_length, 2 * self.max_distance + 1)) attention = K.permute_dimensions(attention, (1, 0, 2, 3)) if self.mode == 'global': output = K.sum(attention, axis=2) # (num_head, ?, 2 * max_d + 1) output = K.permute_dimensions( output, (1, 2, 0)) # (?, 2 * max_d + 1, num_head) else: output = self.output_embedding( attention) # (num_head, ?, data_length, output_dim) output = K.permute_dimensions( output, (1, 2, 3, 0)) # (?, data_length, output_dim, num_head) output = K.reshape( output, (-1, data_length, self.output_dim * self.num_head)) # (?, data_length, output_dim * num_head) if return_attention: return output, K.permute_dimensions( attention, (1, 2, 3, 0)) # (?, data_length, 2 * max_d + 1, num_head) return output
def ConvAttRNNSpeechModel(nCategories, samplingrate=16000, inputLength=16000, rnn_func=L.LSTM, bigger_blocks=False, more_blocks=False, blocks_layers=[20, 40, 80, 160, 320]): # simple LSTM sr = samplingrate iLen = inputLength inputs = L.Input((inputLength, ), name='input') x = L.Reshape((1, -1))(inputs) m = Melspectrogram(n_dft=1024, n_hop=128, input_shape=(1, iLen), padding='same', sr=sr, n_mels=80, fmin=40.0, fmax=sr / 2, power_melgram=1.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False, name='mel_stft') m.trainable = False x = m(x) x = Normalization2D(int_axis=0, name='mel_stft_norm')(x) # note that Melspectrogram puts the sequence in shape (batch_size, melDim, timeSteps, 1) # we would rather have it the other way around for LSTMs x = L.Permute((2, 1, 3))(x) c1 = L.Conv2D(blocks_layers[0], (5, 1), activation='relu', padding='same')(x) c1 = L.BatchNormalization()(c1) p1 = L.MaxPooling2D((2, 1))(c1) p1 = L.Dropout(0.2)(p1) c2 = L.Conv2D(blocks_layers[1], (3, 3), activation='relu', padding='same')(p1) c2 = L.BatchNormalization()(c2) if bigger_blocks: c2 = L.Conv2D(blocks_layers[1], (3, 3), activation='relu', padding='same')(c2) c2 = L.BatchNormalization()(c2) p2 = L.MaxPooling2D((2, 2))(c2) p2 = L.Dropout(0.3)(p2) block_channels = blocks_layers[2] if more_blocks: block_channels = blocks_layers[3] c2 = L.Conv2D(blocks_layers[2], (3, 3), activation='relu', padding='same')(p2) c2 = L.BatchNormalization()(c2) if bigger_blocks: c2 = L.Conv2D(blocks_layers[2], (3, 3), activation='relu', padding='same')(c2) c2 = L.BatchNormalization()(c2) p2 = L.MaxPooling2D((2, 2))(c2) p2 = L.Dropout(0.3)(p2) c3 = L.Conv2D(block_channels, (3, 3), activation='relu', padding='same')(p2) c3 = L.BatchNormalization()(c3) c3 = L.Conv2D(1, (5, 1), activation='relu', padding='same')(c3) x = L.Lambda(lambda q: K.squeeze(q, -1), name='squeeze_last_dim')(c3) # x = L.Conv2D(10, (5, 1), activation='relu', padding='same')(x) # x = L.BatchNormalization()(x) # x = L.Dropout(0.2)(x) # x = L.Conv2D(1, (5, 1), activation='relu', padding='same')(x) # x = L.BatchNormalization()(x) # x = L.Dropout(0.2)(x) # # x = Reshape((125, 80)) (x) # # keras.backend.squeeze(x, axis) # x = L.Lambda(lambda q: K.squeeze(q, -1), name='squeeze_last_dim')(x) x = L.Bidirectional(rnn_func(64, return_sequences=True))( x) # [b_s, seq_len, vec_dim] x = L.Dropout(0.2)(x) x = L.Bidirectional(rnn_func(64, return_sequences=True))( x) # [b_s, seq_len, vec_dim] x = L.Dropout(0.2)(x) xFirst = L.Lambda(lambda q: q[:, -1])(x) # [b_s, vec_dim] query = L.Dense(128)(xFirst) # dot product attention attScores = L.Dot(axes=[1, 2])([query, x]) attScores = L.Softmax(name='attSoftmax')(attScores) # [b_s, seq_len] # rescale sequence attVector = L.Dot(axes=[1, 1])([attScores, x]) # [b_s, vec_dim] x = L.Dense(64, activation='relu')(attVector) x = L.Dropout(0.2)(x) x = L.Dense(32)(x) x = L.Dropout(0.2)(x) output = L.Dense(nCategories, activation='softmax', name='output')(x) model = Model(inputs=[inputs], outputs=[output]) return model
def policy_loss_with_metrics(self, Adv, A): """ This method constructs the policy loss as a scalar-valued Tensor, together with a dictionary of metrics (also scalars). This method may be overridden to construct a custom policy loss and/or to change the accompanying metrics. Parameters ---------- Adv : 1d Tensor, shape: [batch_size] A batch of advantages. A : nd Tensor, shape: [batch_size, ...] A batch of actions taken under the behavior policy. Returns ------- loss, metrics : (Tensor, dict of Tensors) The policy loss along with some metrics, which is a dict of type ``{name <str>: metric <Tensor>}``. The loss and each of the metrics (dict values) are scalar Tensors, i.e. Tensors with ``ndim=0``. The ``loss`` is passed to a keras Model using ``train_model.add_loss(loss)``. Similarly, each metric in the metric dict is passed to the model using ``train_model.add_metric(metric, name=name, aggregation='mean')``. """ Adv = K.stop_gradient(Adv) if K.ndim(Adv) == 2: Adv = K.squeeze(Adv, axis=1) check_tensor(Adv, ndim=1) if self.update_strategy == 'vanilla': log_pi = self.dist.log_proba(A) check_tensor(log_pi, same_as=Adv) entropy = K.mean(self.dist.entropy()) # flip sign to get loss from objective loss = -K.mean(Adv * log_pi) + self.entropy_beta * entropy # no metrics related to behavior_dist since its not used in loss metrics = {'policy/entropy': entropy} elif self.update_strategy == 'ppo': log_pi = self.dist.log_proba(A) log_pi_old = K.stop_gradient(self.target_dist.log_proba(A)) check_tensor(log_pi, same_as=Adv) check_tensor(log_pi_old, same_as=Adv) eps = self.ppo_clip_eps ratio = K.exp(log_pi - log_pi_old) ratio_clip = K.clip(ratio, 1 - eps, 1 + eps) check_tensor(log_pi, same_as=Adv) check_tensor(log_pi_old, same_as=Adv) clip_objective = K.mean(K.minimum(Adv * ratio, Adv * ratio_clip)) entropy = K.mean(self.dist.entropy()) kl_div = K.mean(self.target_dist.kl_divergence(self.dist)) # flip sign to get loss from objective loss = -(clip_objective + self.entropy_beta * entropy) metrics = {'policy/entropy': entropy, 'policy/kl_div': kl_div} elif self.update_strategy == 'cross_entropy': raise NotImplementedError('cross_entropy') else: raise ValueError("unknown update_strategy '{}'".format( self.update_strategy)) # rename loss = tf.identity(loss, name='policy_loss') return loss, metrics
def build_model(input_size, d_model, learning_rate=1e-3): inputs = Input(shape=(input_size)) x = tf.keras.layers.experimental.preprocessing.Rescaling(1. / 255)(inputs) # Block 1 x = Conv2D(64, (3, 3), padding='same')(x) x = MaxPool2D(pool_size=(3, 3), strides=3)(x) x = Activation('relu')(x) # Block 2 x = Conv2D(128, (3, 3), padding='same')(x) x = MaxPool2D(pool_size=(3, 3), strides=3)(x) x = Activation('relu')(x) # Block 3 x = Conv2D(256, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x_1 = x x = Conv2D(256, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Add()([x, x_1]) x = Activation('relu')(x) # Block 4 x = Conv2D(512, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x_2 = x x = Conv2D(512, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Add()([x, x_2]) x = Activation('relu')(x) # Block 5 x = Conv2D(1024, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = MaxPool2D(pool_size=(3, 1))(x) x = Activation('relu')(x) x = MaxPool2D(pool_size=(3, 1))(x) squeezed = Lambda(lambda x: K.squeeze(x, 1))(x) blstm_1 = Bidirectional(LSTM(512, return_sequences=True, dropout=0.2))(squeezed) blstm_2 = Bidirectional(LSTM(512, return_sequences=True, dropout=0.2))(blstm_1) outputs = Dense(units=VOCAB_SIZE + 1, activation='softmax')(blstm_2) model = Model(inputs, outputs) model.summary() optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) model.compile(optimizer=optimizer, loss=ctc_loss_lambda_func) return model
segment_input = Input(shape=(max_len, ), dtype=tf.int32) Bert_model = TFBertModel.from_pretrained('bert-base-chinese') output = Bert_model([tokens_input, mask_input, segment_input]) mask = Lambda(lambda x: x[:, 1:text_maxlen + 1])(mask_input) mask = K.cast_to_floatx(mask) answer = Lambda(lambda x: x[:, 0, :])(output[0]) answer = Dense(1, activation='sigmoid', name='answerable')(answer) output_start = Lambda(lambda x: x[:, 1:text_maxlen + 1, :])(output[0]) output_start = Dense(1)(output_start) output_start = K.squeeze(output_start, axis=-1) output_start = Multiply()([output_start, mask]) output_start = Activation('softmax', name='start')(output_start) output_end = Lambda(lambda x: x[:, 1:text_maxlen + 1, :])(output[0]) output_end = Dropout(0.2, seed=seed_value)(output_end) output_end = Dense(1)(output_end) output_end = K.squeeze(output_end, axis=-1) output_end = Multiply()([output_end, mask]) output_end = Activation('softmax', name='end')(output_end) model = Model([tokens_input, mask_input, segment_input], [answer, output_start, output_end])
def _scharr_edges(cls, image, magnitude): """ Returns a tensor holding modified Scharr edge maps. Parameters ---------- image: tensor Image tensor with shape [batch_size, h, w, d] and type float32. The image(s) must be 2x2 or larger. magnitude: bool Boolean to determine if the edge magnitude or edge direction is returned Returns ------- tensor Tensor holding edge maps for each channel. Returns a tensor with shape `[batch_size, h, w, d, 2]` where the last two dimensions hold `[[dy[0], dx[0]], [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]]` calculated using the Scharr filter. """ # Define vertical and horizontal Scharr filters. static_image_shape = image.get_shape() image_shape = K.shape(image) # 5x5 modified Scharr kernel ( reshape to (5,5,1,2) ) matrix = np.array([[[[0.00070, 0.00070]], [[0.00520, 0.00370]], [[0.03700, 0.00000]], [[0.00520, -0.0037]], [[0.00070, -0.0007]]], [[[0.00370, 0.00520]], [[0.11870, 0.11870]], [[0.25890, 0.00000]], [[0.11870, -0.1187]], [[0.00370, -0.0052]]], [[[0.00000, 0.03700]], [[0.00000, 0.25890]], [[0.00000, 0.00000]], [[0.00000, -0.2589]], [[0.00000, -0.0370]]], [[[-0.0037, 0.00520]], [[-0.1187, 0.11870]], [[-0.2589, 0.00000]], [[-0.1187, -0.1187]], [[-0.0037, -0.0052]]], [[[-0.0007, 0.00070]], [[-0.0052, 0.00370]], [[-0.0370, 0.00000]], [[-0.0052, -0.0037]], [[-0.0007, -0.0007]]]]) num_kernels = [2] kernels = K.constant(matrix, dtype='float32') kernels = K.tile(kernels, [1, 1, image_shape[-1], 1]) # Use depth-wise convolution to calculate edge maps per channel. # Output tensor has shape [batch_size, h, w, d * num_kernels]. pad_sizes = [[0, 0], [2, 2], [2, 2], [0, 0]] padded = tf.pad( image, # pylint:disable=unexpected-keyword-arg,no-value-for-parameter pad_sizes, mode='REFLECT') output = K.depthwise_conv2d(padded, kernels) if not magnitude: # direction of edges # Reshape to [batch_size, h, w, d, num_kernels]. shape = K.concatenate([image_shape, num_kernels], axis=0) output = K.reshape(output, shape=shape) output.set_shape(static_image_shape.concatenate(num_kernels)) output = tf.atan( K.squeeze(output[:, :, :, :, 0] / output[:, :, :, :, 1], axis=None)) # magnitude of edges -- unified x & y edges don't work well with Neural Networks return output
def call(self, inputs): merged_context, modeled_passage = inputs span_begin_input = K.concatenate([merged_context, modeled_passage]) span_begin_weights = TimeDistributed(self.dense_1)(span_begin_input) span_begin_probabilities = Softmax()(K.squeeze(span_begin_weights, axis=-1)) return span_begin_probabilities
def dice(y_true, y_pred): y_pred = K.cast(K.greater(y_pred, .5), dtype='float32') # .5 is the threshold num = K.sum(K.sum(K.squeeze(y_true * y_pred, axis=3), axis=2), axis=1) den = K.sum(K.sum(K.squeeze(y_true + y_pred, axis=3), axis=2), axis=1) return K.mean((2*num + K.epsilon()) / (den + K.epsilon()))
def call(self, inputs, states, training=None): # dropout matrices for input units dp_mask = self._dropout_mask # dropout matrices for recurrent units rec_dp_mask = self._recurrent_dropout_mask h_tm1 = states[0] # previous memory state c_tm1 = states[1] # previous carry state # alignment model h_att = K.repeat(h_tm1, self.timestep_dim) att = _time_distributed_dense(inputs, self.attention_weights, self.attention_bias, input_dim=self.input_dim, output_dim=self.units, timesteps=self.timestep_dim) attention_ = self.attention_activation( K.dot(h_att, self.attention_recurrent_weights) + att) # energy attention_ = K.squeeze(K.dot(attention_, self.attention_recurrent_bias), 2) # energy alpha = K.exp(attention_) if dp_mask is not None: alpha *= dp_mask[0] alpha /= K.sum(alpha, axis=1, keepdims=True) alpha_r = K.repeat(alpha, self.input_dim) alpha_r = K.permute_dimensions(alpha_r, (0, 2, 1)) # make context vector (soft attention after Bahdanau et al.) z_hat = inputs * alpha_r context_sequence = z_hat z_hat = K.sum(z_hat, axis=1) if self.implementation == 1: if 0 < self.dropout < 1.: inputs_i = inputs * dp_mask[0] inputs_f = inputs * dp_mask[1] inputs_c = inputs * dp_mask[2] inputs_o = inputs * dp_mask[3] else: inputs_i = inputs inputs_f = inputs inputs_c = inputs inputs_o = inputs x_i = K.dot(inputs_i, self.kernel_i) x_f = K.dot(inputs_f, self.kernel_f) x_c = K.dot(inputs_c, self.kernel_c) x_o = K.dot(inputs_o, self.kernel_o) if self.use_bias: x_i = K.bias_add(x_i, self.bias_i) x_f = K.bias_add(x_f, self.bias_f) x_c = K.bias_add(x_c, self.bias_c) x_o = K.bias_add(x_o, self.bias_o) if 0 < self.recurrent_dropout < 1.: h_tm1_i = h_tm1 * rec_dp_mask[0] h_tm1_f = h_tm1 * rec_dp_mask[1] h_tm1_c = h_tm1 * rec_dp_mask[2] h_tm1_o = h_tm1 * rec_dp_mask[3] else: h_tm1_i = h_tm1 h_tm1_f = h_tm1 h_tm1_c = h_tm1 h_tm1_o = h_tm1 i = self.recurrent_activation( x_i + K.dot(h_tm1_i, self.recurrent_kernel_i) + K.dot(z_hat, self.attention_i)) f = self.recurrent_activation( x_f + K.dot(h_tm1_f, self.recurrent_kernel_f) + K.dot(z_hat, self.attention_f)) c = f * c_tm1 + i * self.activation( x_c + K.dot(h_tm1_c, self.recurrent_kernel_c) + K.dot(z_hat, self.attention_c)) o = self.recurrent_activation( x_o + K.dot(h_tm1_o, self.recurrent_kernel_o) + K.dot(z_hat, self.attention_o)) else: if 0. < self.dropout < 1.: inputs *= dp_mask[0] z = K.dot(inputs, self.kernel) if 0. < self.recurrent_dropout < 1.: h_tm1 *= rec_dp_mask[0] z += K.dot(h_tm1, self.recurrent_kernel) z += K.dot(z_hat, self.attention_kernel) if self.use_bias: z = K.bias_add(z, self.bias) z0 = z[:, :self.units] z1 = z[:, self.units:2 * self.units] z2 = z[:, 2 * self.units:3 * self.units] z3 = z[:, 3 * self.units:] i = self.recurrent_activation(z0) f = self.recurrent_activation(z1) c = f * c_tm1 + i * self.activation(z2) o = self.recurrent_activation(z3) h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: if training is None: h._uses_learning_phase = True return h, [h, c]