def build_model(img_input, input_shape=None, classes=1000): x = layers.Reshape((input_shape[0], input_shape[1], -1))(img_input) X = residual_stack(x, 32, "ReStk1", False) # shape:(1,512,32) # X = layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 2), padding='valid')(X) # Residual Srack 2 X = residual_stack(X, 32, "ReStk2", True) # shape:(1,256,32) # Residual Srack 3 X = residual_stack(X, 32, "ReStk3", True) # shape:(1,128,32) # Residual Srack 4 X = residual_stack(X, 32, "ReStk4", True) # shape:(1,64,32) # Residual Srack 5 X = residual_stack(X, 32, "ReStk5", True) # shape:(1,32,32) # Residual Srack 6 X = residual_stack(X, 32, "ReStk6", True) # shape:(1,16,32) # Full Con 1 X = layers.Flatten()(X) X = layers.Dense(128, activation='selu', name="dense1")(X) X = layers.AlphaDropout(0.3)(X) # Full Con 2 X = layers.Dense(128, activation='selu', name="dense2")(X) X = layers.AlphaDropout(0.3)(X) # Full Con 3 X = layers.Dense(classes, name="dense3")(X) outputs = layers.Activation('softmax')(X) resNet = models.Model(inputs=[img_input], outputs=[outputs]) return resNet
def build_model(hc_model, width=1024, depth=2, dropout_rate=0.5, nclasses=4, mode='dense', activation='softmax', selu=False, mc_dropout=False, l2_reg=1e-4): """ PixelNet: define an MLP model over a hypercolumn model given as input @article{pixelnet, title={Pixel{N}et: {R}epresentation of the pixels, by the pixels, and for the pixels}, author={Bansal, Aayush and Chen, Xinlei, and Russell, Bryan and Gupta, Abhinav and Ramanan, Deva}, Journal={arXiv preprint arXiv:1702.06506}, year={2017} } From the paper and their notes on github, it seems like the semantic segmentation task should work either with linear classifier + BatchNorm, or with MLP without BatchNorm. activation: activation function for prediction layer. 'softmax' for classification, 'linear' for regression. """ x = hc_model.output nchannels = tf.shape(x)[-1] x = flatten_pixels(nchannels)(x) if selu: for idx in range(depth): x = dense_selu(x, width, name='mlp{}'.format(idx + 1), l2_reg=l2_reg) x = layers.AlphaDropout(dropout_rate)(x) else: for idx in range(depth): x = dense_bn(x, width, name='mlp{}'.format(idx + 1), l2_reg=l2_reg) x = layers.Dropout(dropout_rate)(x, training=mc_dropout) x = layers.Dense(nclasses, activation=activation, name='predictions')(x) x = unflatten_pixels(hc_model.inputs, nclasses=nclasses, mode=mode)(x) return models.Model(inputs=hc_model.inputs, outputs=x)
def create_cost_module(inputs, adjustable): """Implements the cost module of the siamese network. :param inputs: list containing feature tensor from each siamese head :param adjustable: object of class ProjectVariable :return: some type of distance """ def subtract(x): output = x[0] - x[1] return output def divide(x): output = x[0] / x[1] return output def absolute(x): output = abs(x[0] - x[1]) return output # unused def the_shape(shapes): shape1, shape2 = shapes a_shape = shape1 return a_shape if adjustable.cost_module_type == 'neural_network': if adjustable.neural_distance == 'concatenate': features = layers.concatenate(inputs) elif adjustable.neural_distance == 'add': features = layers.add(inputs) elif adjustable.neural_distance == 'multiply': features = layers.multiply(inputs) elif adjustable.neural_distance == 'subtract': features = layers.Lambda(subtract)(inputs) elif adjustable.neural_distance == 'divide': features = layers.Lambda(divide)(inputs) elif adjustable.neural_distance == 'absolute': features = layers.Lambda(absolute)(inputs) else: features = None dense_layer = layers.Dense( adjustable.neural_distance_layers[0], name='dense_1', trainable=adjustable.trainable_cost_module)(features) activation = layers.Activation( adjustable.activation_function)(dense_layer) if adjustable.activation_function == 'selu': dropout_layer = layers.AlphaDropout( adjustable.dropout_rate)(activation) else: dropout_layer = layers.Dropout(adjustable.dropout_rate)(activation) dense_layer = layers.Dense( adjustable.neural_distance_layers[1], name='dense_2', trainable=adjustable.trainable_cost_module)(dropout_layer) activation = layers.Activation( adjustable.activation_function)(dense_layer) if adjustable.activation_function == 'selu': dropout_layer = layers.AlphaDropout( adjustable.dropout_rate)(activation) else: dropout_layer = layers.Dropout(adjustable.dropout_rate)(activation) output_layer = layers.Dense(pc.NUM_CLASSES, name='ouput')(dropout_layer) softmax = layers.Activation('softmax')(output_layer) if adjustable.weights_name is not None: softmax.load_weights(os.path.join(pc.SAVE_LOCATION_MODEL_WEIGHTS, adjustable.weights_name), by_name=True) return softmax elif adjustable.cost_module_type == 'euclidean': distance = layers.Lambda(euclidean_distance)(inputs) return distance elif adjustable.cost_module_type == 'euclidean_fc': distance = layers.Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)(inputs) dense_layer = layers.Dense(1, name='dense_1')(distance) activation = layers.Activation( adjustable.activation_function)(dense_layer) output_layer = layers.Dense(pc.NUM_CLASSES, name='ouput')(activation) softmax = layers.Activation('softmax')(output_layer) return softmax elif adjustable.cost_module_type == 'cosine': distance = layers.Lambda(cosine_distance_normalized)(inputs) return distance
def build_inter_coattention_cnn_model(num_feature_channels1, num_feature_channels2, num_features1, num_features2, feature_dim1, output_dim, num_filters, filter_sizes, atten_dim, model_dim, mlp_dim, mlp_depth=1, drop_out=0.5, pooling='max', padding='valid', return_customized_layers=False): """ Create A Multi-Layer Perceptron Model with Coattention Mechanism. inputs: embeddings: [batch, num_embed_feature, embed_dims] * 3 ## pronoun, A, B positional_features: [batch, num_pos_feature] * 2 ## pronoun-A, pronoun-B outputs: [batch, num_classes] # in our case there should be 3 output classes: A, B, None :param output_dim: the output dimension size :param model_dim: rrn dimension size :param mlp_dim: the dimension size of fully connected layer :param mlp_depth: the depth of fully connected layers :param drop_out: dropout rate of fully connected layers :param return_customized_layers: boolean, default=False If True, return model and customized object dictionary, otherwise return model only :return: keras model """ def _mlp_channel1(feature_dropout_layer, x): #x = feature_dropout_layer(x) return x def _mlp_channel2(feature_map_layer, x): x = feature_map_layer(x) return x # inputs inputs1 = list() for fi in range(num_feature_channels1): inputs1.append( models.Input(shape=(num_features1, feature_dim1), dtype='float32', name='input1_' + str(fi))) inputs2 = list() for fi in range(num_feature_channels2): inputs2.append( models.Input(shape=(num_features2, ), dtype='float32', name='input2_' + str(fi))) # define feature map layers # MLP Layers feature_dropout_layer1 = layers.TimeDistributed( layers.Dropout(rate=drop_out, name="input_dropout_layer")) feature_map_layer2 = layers.Dense(feature_dim1, name="feature_map_layer2", activation="relu") x1 = [_mlp_channel1(feature_dropout_layer1, input_) for input_ in inputs1] x2 = [_mlp_channel2(feature_map_layer2, input_) for input_ in inputs2] # From mention-pair embeddings reshape_layer = layers.Reshape((1, feature_dim1), name="reshape_layer") x2 = [reshape_layer(x2_) for x2_ in x2] pair1 = layers.Concatenate( axis=1, name="concate_pair1_layer")([x1[0], x1[1], x2[0]]) pair2 = layers.Concatenate( axis=1, name="concate_pair2_layer")([x1[0], x1[2], x2[1]]) coatten_layer = RemappedCoAttentionWeight(atten_dim, name="coattention_weights_layer") featnorm_layer1 = FeatureNormalization( name="normalized_coattention_weights_layer1", axis=1) featnorm_layer2 = FeatureNormalization( name="normalized_coattention_weights_layer2", axis=2) focus_layer1 = layers.Dot((1, 1), name="focus_layer1") focus_layer2 = layers.Dot((2, 1), name="focus_layer2") pair_layer1 = layers.Concatenate(axis=-1, name="pair_layer1") pair_layer2 = layers.Concatenate(axis=-1, name="pair_layer2") # attention attens = coatten_layer([pair1, pair2]) attens1 = featnorm_layer1(attens) attens2 = featnorm_layer2(attens) # compare focus1 = focus_layer1([attens1, pair1]) focus2 = focus_layer2([attens2, pair2]) pair1 = pair_layer1([pair1, focus2]) pair2 = pair_layer2([pair2, focus1]) x = layers.Concatenate(axis=1, name="concate_layer")([pair1, pair2]) x = layers.TimeDistributed( layers.Dropout(rate=drop_out, name="pair_dropout_layer"))(x) x = layers.TimeDistributed( layers.Dense(mlp_dim, name="pair_feature_map_layer", activation="relu"))(x) x = layers.Flatten(name="pair_feature_flatten_layer1")(x) # pooled_outputs = [] # for i in range(len(filter_sizes)): # conv = layers.Conv1D(num_filters[i], kernel_size=filter_sizes[i], padding=padding, activation='relu')(x) # if pooling == 'max': # conv = layers.GlobalMaxPooling1D(name='global_pooling_layer' + str(i))(conv) # else: # conv = layers.GlobalAveragePooling1D(name='global_pooling_layer' + str(i))(conv) # pooled_outputs.append(conv) # if len(pooled_outputs) > 1: # x = layers.Concatenate(name='concated_layer')(pooled_outputs) # else: # x = conv # MLP Layers x = layers.BatchNormalization(name='batch_norm_layer')(x) x = layers.Dropout(rate=drop_out, name="dropout_layer")(x) for i in range(mlp_depth - 1): x = layers.Dense(mlp_dim, activation='selu', kernel_initializer='lecun_normal', name='selu_layer' + str(i))(x) x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x) outputs = layers.Dense(output_dim, activation="softmax", name="softmax_layer0")(x) model = models.Model(inputs1 + inputs2, outputs) if return_customized_layers: return model, { 'RemappedCoAttentionWeight': RemappedCoAttentionWeight, "FeatureNormalization": FeatureNormalization } return model
def build_multi_channel_cnn_model(num_feature_channels1, num_feature_channels2, num_features1, num_features2, feature_dim1, output_dim, num_filters, filter_sizes, model_dim, mlp_dim, mlp_depth=1, drop_out=0.5, pooling='max', padding='valid', return_customized_layers=False): """ Create A Multi-Layer Perceptron Model. inputs: embeddings: [batch, num_embed_feature, embed_dims] * 3 ## pronoun, A, B positional_features: [batch, num_pos_feature] * 2 ## pronoun-A, pronoun-B outputs: [batch, num_classes] # in our case there should be 3 output classes: A, B, None :param output_dim: the output dimension size :param num_filters: list of integers The number of filters. :param filter_sizes: list of integers The kernel size. :param pooling: str, either 'max' or 'average' Pooling method. :param padding: One of "valid", "causal" or "same" (case-insensitive). Padding method. :param model_dim: rrn dimension size :param mlp_dim: the dimension size of fully connected layer :param mlp_depth: the depth of fully connected layers :param drop_out: dropout rate of fully connected layers :param return_customized_layers: boolean, default=False If True, return model and customized object dictionary, otherwise return model only :return: keras model """ def _mlp_channel1(feature_dropout_layer, cnns, pools, concate_layer1, x): x = feature_dropout_layer(x) pooled_outputs = [] for i in range(len(cnns)): conv = cnns[i](x) if pooling == 'max': conv = pools[i](conv) else: conv = pools[i](conv) pooled_outputs.append(conv) if len(cnns) == 1: x = conv else: x = concate_layer1(pooled_outputs) return x def _mlp_channel2(feature_map_layer, x): x = feature_map_layer(x) return x # inputs inputs1 = list() for fi in range(num_feature_channels1): inputs1.append( models.Input(shape=(num_features1, feature_dim1), dtype='float32', name='input1_' + str(fi))) inputs2 = list() for fi in range(num_feature_channels2): inputs2.append( models.Input(shape=(num_features2, ), dtype='float32', name='input2_' + str(fi))) # define feature map layers # CNN Layers cnns = [] pools = [] feature_dropout_layer1 = layers.TimeDistributed( layers.Dropout(rate=drop_out, name="input_dropout_layer")) for i in range(len(filter_sizes)): cnns.append( layers.Conv1D(num_filters[i], kernel_size=filter_sizes[i], padding=padding, activation='relu', name="cc_layer1" + str(i))) if pooling == 'max': pools.append( layers.GlobalMaxPooling1D(name='global_pooling_layer1' + str(i))) else: pools.append( layers.GlobalAveragePooling1D(name='global_pooling_layer1' + str(i))) concate_layer1 = layers.Concatenate(name='concated_layer') feature_map_layer2 = layers.Dense(model_dim, name="feature_map_layer2", activation="relu") x1 = [ _mlp_channel1(feature_dropout_layer1, cnns, pools, concate_layer1, input_) for input_ in inputs1 ] x2 = [_mlp_channel2(feature_map_layer2, input_) for input_ in inputs2] x = layers.Concatenate(axis=1, name="concate_layer")(x1 + x2) # MLP Layers x = layers.BatchNormalization(name='batch_norm_layer')(x) x = layers.Dropout(rate=drop_out, name="dropout_layer")(x) for i in range(mlp_depth - 1): x = layers.Dense(mlp_dim, activation='selu', kernel_initializer='lecun_normal', name='selu_layer' + str(i))(x) x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x) outputs = layers.Dense(output_dim, activation="softmax", name="softmax_layer0")(x) model = models.Model(inputs1 + inputs2, outputs) if return_customized_layers: return model, {} return model
def build_mlp_model(num_feature_channels1, num_feature_channels2, num_features1, num_features2, feature_dim1, output_dim, model_dim, mlp_dim, mlp_depth=1, drop_out=0.5, return_customized_layers=False): """ Create A Multi-Layer Perceptron Model. inputs: embeddings: [batch, num_embed_feature, embed_dims] * 3 ## pronoun, A, B positional_features: [batch, num_pos_feature] * 2 ## pronoun-A, pronoun-B outputs: [batch, num_classes] # in our case there should be 3 output classes: A, B, None :param output_dim: the output dimension size :param model_dim: rrn dimension size :param mlp_dim: the dimension size of fully connected layer :param mlp_depth: the depth of fully connected layers :param drop_out: dropout rate of fully connected layers :param return_customized_layers: boolean, default=False If True, return model and customized object dictionary, otherwise return model only :return: keras model """ def _mlp_channel1(feature_dropout_layer, feature_map_layer, flatten_layer, x): x = feature_dropout_layer(x) x = feature_map_layer(x) x = flatten_layer(x) return x def _mlp_channel2(feature_map_layer, x): x = feature_map_layer(x) return x # inputs inputs1 = list() for fi in range(num_feature_channels1): inputs1.append( models.Input(shape=(num_features1, feature_dim1), dtype='float32', name='input1_' + str(fi))) inputs2 = list() for fi in range(num_feature_channels2): inputs2.append( models.Input(shape=(num_features2, ), dtype='float32', name='input2_' + str(fi))) # define feature map layers # MLP Layers feature_dropout_layer1 = layers.TimeDistributed( layers.Dropout(rate=drop_out, name="input_dropout_layer")) feature_map_layer1 = layers.TimeDistributed( layers.Dense(model_dim, name="feature_map_layer1", activation="relu")) flatten_layer1 = layers.Flatten(name="feature_flatten_layer1") feature_map_layer2 = layers.Dense(model_dim, name="feature_map_layer2", activation="relu") x1 = [ _mlp_channel1(feature_dropout_layer1, feature_map_layer1, flatten_layer1, input_) for input_ in inputs1 ] x2 = [_mlp_channel2(feature_map_layer2, input_) for input_ in inputs2] x = layers.Concatenate(axis=1, name="concate_layer")(x1 + x2) # MLP Layers x = layers.BatchNormalization(name='batch_norm_layer')(x) x = layers.Dropout(rate=drop_out, name="dropout_layer")(x) for i in range(mlp_depth - 1): x = layers.Dense(mlp_dim, activation='selu', kernel_initializer='lecun_normal', name='selu_layer' + str(i))(x) x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x) outputs = layers.Dense(output_dim, activation="softmax", name="softmax_layer0")(x) model = models.Model(inputs1 + inputs2, outputs) if return_customized_layers: return model, {} return model
if activationFunc=='selu': myInitializer="lecun_normal" elif activationFunc=='tanh': myInitializer="glorot_uniform" tensorBoardDir="../deepNN/%s-pid%d/%s/fold%d"%(dateTime, sysuffix, namePrefix, fold) #/tensorBoardLog if not os.path.exists(tensorBoardDir): os.makedirs(tensorBoardDir) checkPtFile='../deepNN/%s-pid%d/p2v-fold%d.hdf5'%(dateTime, sysuffix, fold) scriptInputArgs='../deepNN/%s-pid%d/scriptInputArgs.txt'%(dateTime, sysuffix) with open(scriptInputArgs,'w') as textFile: print(syspec, file=textFile) protTensor=Input(shape=(protTrainIN.shape[1],), name='FastProt') if activationFunc=='selu': x1=layers.AlphaDropout(dropoutRate)(protTensor) else: x1=layers.Dropout(dropoutRate)(protTensor) x1=layers.Dense(units=32, activation=activationFunc, kernel_initializer=myInitializer, kernel_regularizer=regularizers.l1_l2(l1=0, l2=0.01))(x1) rnaTensor=Input(shape=(rnaTrainIN.shape[1],), name='FastRNA') if activationFunc=='selu': x2=layers.AlphaDropout(dropoutRate)(rnaTensor) else: x2=layers.Dropout(dropoutRate)(rnaTensor) x2=layers.Dense(units=32, activation=activationFunc, kernel_initializer=myInitializer, kernel_regularizer=regularizers.l1_l2(l1=0, l2=0.01))(x2) merged=layers.dot([x1, x2], -1) #merged=kronecker([x1, x2]) #merged=layers.concatenate([x1, x2]) #merged=layers.multiply([x1, x2])
def build_birnn_multifeature_coattention_model(voca_dim, time_steps, num_feature_channels, num_features, feature_dim, output_dim, model_dim, atten_dim, mlp_dim, item_embedding=None, rnn_depth=1, mlp_depth=1, drop_out=0.5, rnn_drop_out=0., rnn_state_drop_out=0., trainable_embedding=False, gpu=False, return_customized_layers=False): """ Create A Bidirectional Attention Model. :param voca_dim: vocabulary dimension size. :param time_steps: the length of input :param output_dim: the output dimension size :param model_dim: rrn dimension size :param mlp_dim: the dimension size of fully connected layer :param item_embedding: integer, numpy 2D array, or None (default=None) If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor. If item_embedding is a matrix, this matrix will be used as the embedding matrix. If item_embedding is None, then connect input tensor to RNN layer directly. :param rnn_depth: rnn depth :param mlp_depth: the depth of fully connected layers :param num_feature_channels: the number of attention channels, this can be used to mimic multi-head attention mechanism :param drop_out: dropout rate of fully connected layers :param rnn_drop_out: dropout rate of rnn layers :param rnn_state_drop_out: dropout rate of rnn state tensor :param trainable_embedding: boolean :param gpu: boolean, default=False If True, CuDNNLSTM is used instead of LSTM for RNN layer. :param return_customized_layers: boolean, default=False If True, return model and customized object dictionary, otherwise return model only :return: keras model """ if model_dim % 2 == 1: model_dim += 1 if item_embedding is not None: inputs = models.Input(shape=(time_steps, ), dtype='int32', name='input0') x1 = inputs # item embedding if isinstance(item_embedding, np.ndarray): assert voca_dim == item_embedding.shape[0] x1 = layers.Embedding(voca_dim, item_embedding.shape[1], input_length=time_steps, weights=[ item_embedding, ], trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x1) elif utils.is_integer(item_embedding): x1 = layers.Embedding(voca_dim, item_embedding, input_length=time_steps, trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x1) else: raise ValueError( "item_embedding must be either integer or numpy matrix") else: inputs = models.Input(shape=(time_steps, voca_dim), dtype='float32', name='input0') x1 = inputs inputs1 = list() for fi in range(num_feature_channels): inputs1.append( models.Input(shape=(num_features, feature_dim), dtype='float32', name='input1' + str(fi))) feature_map_layer = layers.TimeDistributed(layers.Dense( model_dim, name="feature_map_layer", activation="sigmoid"), name="td_feature_map_layer") x2s = list(map(lambda input_: feature_map_layer(input_), inputs1)) if gpu: # rnn encoding for i in range(rnn_depth): x1 = layers.Bidirectional(layers.CuDNNLSTM(int(model_dim / 2), return_sequences=True), name='bi_lstm_layer' + str(i))(x1) x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x1) x1 = layers.Dropout(rnn_drop_out, name="rnn_dropout_layer" + str(i))(x1) else: # rnn encoding for i in range(rnn_depth): x1 = layers.Bidirectional(layers.LSTM( int(model_dim / 2), return_sequences=True, dropout=rnn_drop_out, recurrent_dropout=rnn_state_drop_out), name='bi_lstm_layer' + str(i))(x1) x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x1) coatten_layer = clayers.CoAttentionWeight(name="coattention_weights_layer") featnorm_layer1 = clayers.FeatureNormalization( name="normalized_coattention_weights_layer1", axis=1) featnorm_layer2 = clayers.FeatureNormalization( name="normalized_coattention_weights_layer2", axis=2) focus_layer1 = layers.Dot((1, 1), name="focus_layer1") focus_layer2 = layers.Dot((2, 1), name="focus_layer2") pair_layer1 = layers.Concatenate(axis=-1, name="pair_layer1") pair_layer2 = layers.Concatenate(axis=-1, name="pair_layer2") compare_layer1 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"), name="compare_layer1") compare_layer2 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"), name="compare_layer2") flatten_layer = layers.Flatten(name="flatten_layer") xs = list() for x2_ in x2s: xs += _coatten_compare_aggregate(coatten_layer, featnorm_layer1, featnorm_layer2, focus_layer1, focus_layer2, pair_layer1, pair_layer2, compare_layer1, compare_layer2, flatten_layer, x1, x2_) x = layers.Concatenate(axis=1, name="concat_feature_layer")(xs) # MLP Layers for i in range(mlp_depth - 1): x = layers.Dense(mlp_dim, activation='selu', kernel_initializer='lecun_normal', name='selu_layer' + str(i))(x) x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x) outputs = layers.Dense(output_dim, activation="softmax", name="softmax_layer0")(x) model = models.Model([inputs] + inputs1, outputs) if return_customized_layers: return model, { 'CoAttentionWeight': clayers.CoAttentionWeight, "FeatureNormalization": clayers.FeatureNormalization } return model
def build_birnn_feature_coattention_cnn_model(voca_dim, time_steps, num_features, feature_dim, output_dim, model_dim, mlp_dim, num_filters, filter_sizes, item_embedding=None, rnn_depth=1, mlp_depth=1, drop_out=0.5, rnn_drop_out=0., rnn_state_drop_out=0., cnn_drop_out=0.5, pooling='max', trainable_embedding=False, gpu=False, return_customized_layers=False): """ Create A Bidirectional Attention Model. :param voca_dim: vocabulary dimension size. :param time_steps: the length of input :param output_dim: the output dimension size :param model_dim: rrn dimension size :param mlp_dim: the dimension size of fully connected layer :param item_embedding: integer, numpy 2D array, or None (default=None) If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor. If item_embedding is a matrix, this matrix will be used as the embedding matrix. If item_embedding is None, then connect input tensor to RNN layer directly. :param rnn_depth: rnn depth :param mlp_depth: the depth of fully connected layers :param num_att_channel: the number of attention channels, this can be used to mimic multi-head attention mechanism :param drop_out: dropout rate of fully connected layers :param rnn_drop_out: dropout rate of rnn layers :param rnn_state_drop_out: dropout rate of rnn state tensor :param trainable_embedding: boolean :param gpu: boolean, default=False If True, CuDNNLSTM is used instead of LSTM for RNN layer. :param return_customized_layers: boolean, default=False If True, return model and customized object dictionary, otherwise return model only :return: keras model """ if model_dim % 2 == 1: model_dim += 1 if item_embedding is not None: inputs = models.Input(shape=(time_steps, ), dtype='int32', name='input0') x1 = inputs # item embedding if isinstance(item_embedding, np.ndarray): assert voca_dim == item_embedding.shape[0] x1 = layers.Embedding(voca_dim, item_embedding.shape[1], input_length=time_steps, weights=[ item_embedding, ], trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x1) elif utils.is_integer(item_embedding): x1 = layers.Embedding(voca_dim, item_embedding, input_length=time_steps, trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x1) else: raise ValueError( "item_embedding must be either integer or numpy matrix") else: inputs = models.Input(shape=(time_steps, voca_dim), dtype='float32', name='input0') x1 = inputs inputs1 = models.Input(shape=(num_features, feature_dim), dtype='float32', name='input1') x2 = layers.Dense(feature_dim, name="feature_map_layer", activation="relu")(inputs1) if gpu: # rnn encoding for i in range(rnn_depth): x1 = layers.Bidirectional(layers.CuDNNLSTM(int(model_dim / 2), return_sequences=True), name='bi_lstm_layer' + str(i))(x1) x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x1) x1 = layers.Dropout(rnn_drop_out, name="rnn_dropout_layer" + str(i))(x1) else: # rnn encoding for i in range(rnn_depth): x1 = layers.Bidirectional(layers.LSTM( int(model_dim / 2), return_sequences=True, dropout=rnn_drop_out, recurrent_dropout=rnn_state_drop_out), name='bi_lstm_layer' + str(i))(x1) x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x1) # attention attens = clayers.CoAttentionWeight(name="coattention_weights_layer")( [x1, x2]) attens1 = clayers.FeatureNormalization( name="normalized_coattention_weights_layer1", axis=1)(attens) attens2 = clayers.FeatureNormalization( name="normalized_coattention_weights_layer2", axis=2)(attens) # compare focus1 = layers.Dot((1, 1), name="focus_layer1")([attens1, x1]) focus2 = layers.Dot((2, 1), name="focus_layer2")([attens2, x2]) pair1 = layers.Concatenate(axis=-1, name="pair_layer1")([x1, focus2]) pair2 = layers.Concatenate(axis=-1, name="pair_layer2")([x2, focus1]) x1 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"), name="compare_layer1")(pair1) x2 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"), name="compare_layer2")(pair2) # Multi-Channel CNN for x1 pooled_outputs = [] for i in range(len(filter_sizes)): conv = layers.Conv1D(num_filters, kernel_size=filter_sizes[i], padding='valid', activation='relu')(x1) if pooling == 'max': conv = layers.MaxPooling1D(pool_size=time_steps - filter_sizes[i] + 1, strides=1, padding='valid')(conv) else: conv = layers.AveragePooling1D(pool_size=time_steps - filter_sizes[i] + 1, strides=1, padding='valid')(conv) pooled_outputs.append(conv) x1 = layers.Concatenate(name='concated_layer')(pooled_outputs) x1 = layers.Flatten()(x1) x1 = layers.Dropout(cnn_drop_out, name='conv_dropout_layer')(x1) x1 = layers.BatchNormalization(name="batch_norm_layer")(x1) # Average Pool for x2 x2 = layers.GlobalAveragePooling1D(name="average_pool_layer")(x2) x = layers.Concatenate(axis=1, name="concat_deep_feature_layer")([x1, x2]) # MLP Layers for i in range(mlp_depth - 1): x = layers.Dense(mlp_dim, activation='selu', kernel_initializer='lecun_normal', name='selu_layer' + str(i))(x) x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x) outputs = layers.Dense(output_dim, activation="softmax", name="softmax_layer0")(x) model = models.Model(inputs, outputs) if return_customized_layers: return model, { 'CoAttentionWeight': clayers.CoAttentionWeight, "FeatureNormalization": clayers.FeatureNormalization } return model
def build_birnn_cnn_model(voca_dim, time_steps, output_dim, rnn_dim, mlp_dim, num_filters, filter_sizes, item_embedding=None, rnn_depth=1, mlp_depth=1, drop_out=0.5, rnn_drop_out=0.5, rnn_state_drop_out=0.5, cnn_drop_out=0.5, pooling='max', trainable_embedding=False, gpu=False, return_customized_layers=False): """ Create A Bidirectional CNN Model. :param voca_dim: vocabulary dimension size. :param time_steps: the length of input :param output_dim: the output dimension size :param rnn_dim: rrn dimension size :param num_filters: the number of filters :param filter_sizes: list of integers The kernel size. :param mlp_dim: the dimension size of fully connected layer :param item_embedding: integer, numpy 2D array, or None (default=None) If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor. If item_embedding is a matrix, this matrix will be used as the embedding matrix. If item_embedding is None, then connect input tensor to RNN layer directly. :param rnn_depth: rnn depth :param mlp_depth: the depth of fully connected layers :param num_att_channel: the number of attention channels, this can be used to mimic multi-head attention mechanism :param drop_out: dropout rate of fully connected layers :param rnn_drop_out: dropout rate of rnn layers :param rnn_state_drop_out: dropout rate of rnn state tensor :param cnn_drop_out: dropout rate of between cnn layer and fully connected layers :param pooling: str, either 'max' or 'average' Pooling method. :param trainable_embedding: boolean :param gpu: boolean, default=False If True, CuDNNLSTM is used instead of LSTM for RNN layer. :param return_customized_layers: boolean, default=False If True, return model and customized object dictionary, otherwise return model only :return: keras model """ if item_embedding is not None: inputs = models.Input(shape=(time_steps, ), dtype='int32', name='input0') x = inputs # item embedding if isinstance(item_embedding, np.ndarray): assert voca_dim == item_embedding.shape[0] x = layers.Embedding(voca_dim, item_embedding.shape[1], input_length=time_steps, weights=[ item_embedding, ], trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x) elif utils.is_integer(item_embedding): x = layers.Embedding(voca_dim, item_embedding, input_length=time_steps, trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x) else: raise ValueError( "item_embedding must be either integer or numpy matrix") else: inputs = models.Input(shape=(time_steps, voca_dim), dtype='float32', name='input0') x = inputs if gpu: # rnn encoding for i in range(rnn_depth): x = layers.Bidirectional(layers.CuDNNLSTM(rnn_dim, return_sequences=True), name='bi_lstm_layer' + str(i))(x) x = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x) x = layers.Dropout(rnn_drop_out, name="rnn_dropout_layer" + str(i))(x) else: # rnn encoding for i in range(rnn_depth): x = layers.Bidirectional(layers.LSTM( rnn_dim, return_sequences=True, dropout=rnn_drop_out, recurrent_dropout=rnn_state_drop_out), name='bi_lstm_layer' + str(i))(x) x = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x) pooled_outputs = [] for i in range(len(filter_sizes)): conv = layers.Conv1D(num_filters, kernel_size=filter_sizes[i], padding='valid', activation='relu')(x) if pooling == 'max': conv = layers.MaxPooling1D(pool_size=time_steps - filter_sizes[i] + 1, strides=1, padding='valid')(conv) else: conv = layers.AveragePooling1D(pool_size=time_steps - filter_sizes[i] + 1, strides=1, padding='valid')(conv) pooled_outputs.append(conv) x = layers.Concatenate(name='concated_layer')(pooled_outputs) x = layers.Flatten()(x) x = layers.Dropout(cnn_drop_out, name='conv_dropout_layer')(x) x = layers.BatchNormalization(name="batch_norm_layer")(x) # MLP Layers for i in range(mlp_depth - 1): x = layers.Dense(mlp_dim, activation='selu', kernel_initializer='lecun_normal', name='selu_layer' + str(i))(x) x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x) outputs = layers.Dense(output_dim, activation="softmax", name="softmax_layer0")(x) model = models.Model(inputs, outputs) if return_customized_layers: return model, dict() return model
def build_birnn_attention_model(voca_dim, time_steps, output_dim, rnn_dim, mlp_dim, item_embedding=None, rnn_depth=1, mlp_depth=1, num_att_channel=1, drop_out=0.5, rnn_drop_out=0., rnn_state_drop_out=0., trainable_embedding=False, gpu=False, return_customized_layers=False): """ Create A Bidirectional Attention Model. :param voca_dim: vocabulary dimension size. :param time_steps: the length of input :param output_dim: the output dimension size :param rnn_dim: rrn dimension size :param mlp_dim: the dimension size of fully connected layer :param item_embedding: integer, numpy 2D array, or None (default=None) If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor. If item_embedding is a matrix, this matrix will be used as the embedding matrix. If item_embedding is None, then connect input tensor to RNN layer directly. :param rnn_depth: rnn depth :param mlp_depth: the depth of fully connected layers :param num_att_channel: the number of attention channels, this can be used to mimic multi-head attention mechanism :param drop_out: dropout rate of fully connected layers :param rnn_drop_out: dropout rate of rnn layers :param rnn_state_drop_out: dropout rate of rnn state tensor :param trainable_embedding: boolean :param gpu: boolean, default=False If True, CuDNNLSTM is used instead of LSTM for RNN layer. :param return_customized_layers: boolean, default=False If True, return model and customized object dictionary, otherwise return model only :return: keras model """ if item_embedding is not None: inputs = models.Input(shape=(time_steps, ), dtype='int32', name='input0') x = inputs # item embedding if isinstance(item_embedding, np.ndarray): assert voca_dim == item_embedding.shape[0] x = layers.Embedding(voca_dim, item_embedding.shape[1], input_length=time_steps, weights=[ item_embedding, ], trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x) elif utils.is_integer(item_embedding): x = layers.Embedding(voca_dim, item_embedding, input_length=time_steps, trainable=trainable_embedding, mask_zero=False, name='embedding_layer0')(x) else: raise ValueError( "item_embedding must be either integer or numpy matrix") else: inputs = models.Input(shape=(time_steps, voca_dim), dtype='float32', name='input0') x = inputs if gpu: # rnn encoding for i in range(rnn_depth): x = layers.Bidirectional(layers.CuDNNLSTM(rnn_dim, return_sequences=True), name='bi_lstm_layer' + str(i))(x) x = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x) x = layers.Dropout(rnn_drop_out, name="rnn_dropout_layer" + str(i))(x) else: # rnn encoding for i in range(rnn_depth): x = layers.Bidirectional(layers.LSTM( rnn_dim, return_sequences=True, dropout=rnn_drop_out, recurrent_dropout=rnn_state_drop_out), name='bi_lstm_layer' + str(i))(x) x = layers.BatchNormalization(name='rnn_batch_norm_layer' + str(i))(x) # attention attention_heads = [] x_per = layers.Permute((2, 1), name='permuted_attention_x')(x) for h in range(max(1, num_att_channel)): attention = clayers.AttentionWeight(name="attention_weights_layer" + str(h))(x) xx = layers.Dot([2, 1], name='focus_head' + str(h) + '_layer0')([x_per, attention]) attention_heads.append(xx) if num_att_channel > 1: x = layers.Concatenate(name='focus_layer0')(attention_heads) else: x = attention_heads[0] x = layers.BatchNormalization(name='focused_batch_norm_layer')(x) # MLP Layers for i in range(mlp_depth - 1): x = layers.Dense(mlp_dim, activation='selu', kernel_initializer='lecun_normal', name='selu_layer' + str(i))(x) x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x) outputs = layers.Dense(output_dim, activation="softmax", name="softmax_layer0")(x) model = models.Model(inputs, outputs) if return_customized_layers: return model, {'AttentionWeight': clayers.AttentionWeight} return model