Ejemplo n.º 1
0
def build_model(img_input,
             input_shape=None,
             classes=1000):
    x = layers.Reshape((input_shape[0], input_shape[1], -1))(img_input)

    X = residual_stack(x, 32, "ReStk1", False)  # shape:(1,512,32)
    # X = layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 2), padding='valid')(X)

    # Residual Srack 2
    X = residual_stack(X, 32, "ReStk2", True)  # shape:(1,256,32)

    # Residual Srack 3
    X = residual_stack(X, 32, "ReStk3", True)  # shape:(1,128,32)

    # Residual Srack 4
    X = residual_stack(X, 32, "ReStk4", True)  # shape:(1,64,32)

    # Residual Srack 5
    X = residual_stack(X, 32, "ReStk5", True)  # shape:(1,32,32)

    # Residual Srack 6
    X = residual_stack(X, 32, "ReStk6", True)  # shape:(1,16,32)

    # Full Con 1
    X = layers.Flatten()(X)
    X = layers.Dense(128, activation='selu', name="dense1")(X)
    X = layers.AlphaDropout(0.3)(X)
    # Full Con 2
    X = layers.Dense(128, activation='selu', name="dense2")(X)
    X = layers.AlphaDropout(0.3)(X)
    # Full Con 3
    X = layers.Dense(classes, name="dense3")(X)

    outputs = layers.Activation('softmax')(X)

    resNet = models.Model(inputs=[img_input], outputs=[outputs])

    return resNet
Ejemplo n.º 2
0
def build_model(hc_model,
                width=1024,
                depth=2,
                dropout_rate=0.5,
                nclasses=4,
                mode='dense',
                activation='softmax',
                selu=False,
                mc_dropout=False,
                l2_reg=1e-4):
    """ PixelNet: define an MLP model over a hypercolumn model given as input 

    @article{pixelnet,
      title={Pixel{N}et: {R}epresentation of the pixels, by the pixels, and for the pixels},
      author={Bansal, Aayush
              and Chen, Xinlei,
              and  Russell, Bryan
              and Gupta, Abhinav
              and Ramanan, Deva},
      Journal={arXiv preprint arXiv:1702.06506},
      year={2017}
    }

    From the paper and their notes on github, it seems like the semantic segmentation
    task should work either with linear classifier + BatchNorm, or with MLP without BatchNorm.

    activation: activation function for prediction layer. 'softmax' for classification, 'linear' for regression. """

    x = hc_model.output
    nchannels = tf.shape(x)[-1]
    x = flatten_pixels(nchannels)(x)

    if selu:
        for idx in range(depth):
            x = dense_selu(x,
                           width,
                           name='mlp{}'.format(idx + 1),
                           l2_reg=l2_reg)
            x = layers.AlphaDropout(dropout_rate)(x)
    else:
        for idx in range(depth):
            x = dense_bn(x, width, name='mlp{}'.format(idx + 1), l2_reg=l2_reg)
            x = layers.Dropout(dropout_rate)(x, training=mc_dropout)

    x = layers.Dense(nclasses, activation=activation, name='predictions')(x)

    x = unflatten_pixels(hc_model.inputs, nclasses=nclasses, mode=mode)(x)

    return models.Model(inputs=hc_model.inputs, outputs=x)
Ejemplo n.º 3
0
def create_cost_module(inputs, adjustable):
    """Implements the cost module of the siamese network.
    :param inputs:          list containing feature tensor from each siamese head
    :param adjustable:      object of class ProjectVariable
    :return:                some type of distance
    """
    def subtract(x):
        output = x[0] - x[1]
        return output

    def divide(x):
        output = x[0] / x[1]
        return output

    def absolute(x):
        output = abs(x[0] - x[1])
        return output

    # unused
    def the_shape(shapes):
        shape1, shape2 = shapes
        a_shape = shape1
        return a_shape

    if adjustable.cost_module_type == 'neural_network':
        if adjustable.neural_distance == 'concatenate':
            features = layers.concatenate(inputs)
        elif adjustable.neural_distance == 'add':
            features = layers.add(inputs)
        elif adjustable.neural_distance == 'multiply':
            features = layers.multiply(inputs)
        elif adjustable.neural_distance == 'subtract':
            features = layers.Lambda(subtract)(inputs)
        elif adjustable.neural_distance == 'divide':
            features = layers.Lambda(divide)(inputs)
        elif adjustable.neural_distance == 'absolute':
            features = layers.Lambda(absolute)(inputs)
        else:
            features = None
        dense_layer = layers.Dense(
            adjustable.neural_distance_layers[0],
            name='dense_1',
            trainable=adjustable.trainable_cost_module)(features)
        activation = layers.Activation(
            adjustable.activation_function)(dense_layer)
        if adjustable.activation_function == 'selu':
            dropout_layer = layers.AlphaDropout(
                adjustable.dropout_rate)(activation)
        else:
            dropout_layer = layers.Dropout(adjustable.dropout_rate)(activation)
        dense_layer = layers.Dense(
            adjustable.neural_distance_layers[1],
            name='dense_2',
            trainable=adjustable.trainable_cost_module)(dropout_layer)
        activation = layers.Activation(
            adjustable.activation_function)(dense_layer)
        if adjustable.activation_function == 'selu':
            dropout_layer = layers.AlphaDropout(
                adjustable.dropout_rate)(activation)
        else:
            dropout_layer = layers.Dropout(adjustable.dropout_rate)(activation)
        output_layer = layers.Dense(pc.NUM_CLASSES,
                                    name='ouput')(dropout_layer)
        softmax = layers.Activation('softmax')(output_layer)

        if adjustable.weights_name is not None:
            softmax.load_weights(os.path.join(pc.SAVE_LOCATION_MODEL_WEIGHTS,
                                              adjustable.weights_name),
                                 by_name=True)

        return softmax

    elif adjustable.cost_module_type == 'euclidean':
        distance = layers.Lambda(euclidean_distance)(inputs)
        return distance

    elif adjustable.cost_module_type == 'euclidean_fc':
        distance = layers.Lambda(euclidean_distance,
                                 output_shape=eucl_dist_output_shape)(inputs)
        dense_layer = layers.Dense(1, name='dense_1')(distance)
        activation = layers.Activation(
            adjustable.activation_function)(dense_layer)
        output_layer = layers.Dense(pc.NUM_CLASSES, name='ouput')(activation)
        softmax = layers.Activation('softmax')(output_layer)
        return softmax

    elif adjustable.cost_module_type == 'cosine':
        distance = layers.Lambda(cosine_distance_normalized)(inputs)
        return distance
Ejemplo n.º 4
0
def build_inter_coattention_cnn_model(num_feature_channels1,
                                      num_feature_channels2,
                                      num_features1,
                                      num_features2,
                                      feature_dim1,
                                      output_dim,
                                      num_filters,
                                      filter_sizes,
                                      atten_dim,
                                      model_dim,
                                      mlp_dim,
                                      mlp_depth=1,
                                      drop_out=0.5,
                                      pooling='max',
                                      padding='valid',
                                      return_customized_layers=False):
    """
    Create A Multi-Layer Perceptron Model with Coattention Mechanism.
    
    inputs: 
        embeddings: [batch, num_embed_feature, embed_dims] * 3 ## pronoun, A, B
        positional_features: [batch, num_pos_feature] * 2 ## pronoun-A, pronoun-B
        
    outputs: 
        [batch, num_classes] # in our case there should be 3 output classes: A, B, None
        
    :param output_dim: the output dimension size
    :param model_dim: rrn dimension size
    :param mlp_dim: the dimension size of fully connected layer
    :param mlp_depth: the depth of fully connected layers
    :param drop_out: dropout rate of fully connected layers
    :param return_customized_layers: boolean, default=False
        If True, return model and customized object dictionary, otherwise return model only
    :return: keras model
    """
    def _mlp_channel1(feature_dropout_layer, x):
        #x = feature_dropout_layer(x)
        return x

    def _mlp_channel2(feature_map_layer, x):
        x = feature_map_layer(x)
        return x

    # inputs
    inputs1 = list()
    for fi in range(num_feature_channels1):
        inputs1.append(
            models.Input(shape=(num_features1, feature_dim1),
                         dtype='float32',
                         name='input1_' + str(fi)))

    inputs2 = list()
    for fi in range(num_feature_channels2):
        inputs2.append(
            models.Input(shape=(num_features2, ),
                         dtype='float32',
                         name='input2_' + str(fi)))

    # define feature map layers
    # MLP Layers
    feature_dropout_layer1 = layers.TimeDistributed(
        layers.Dropout(rate=drop_out, name="input_dropout_layer"))
    feature_map_layer2 = layers.Dense(feature_dim1,
                                      name="feature_map_layer2",
                                      activation="relu")

    x1 = [_mlp_channel1(feature_dropout_layer1, input_) for input_ in inputs1]
    x2 = [_mlp_channel2(feature_map_layer2, input_) for input_ in inputs2]

    # From mention-pair embeddings
    reshape_layer = layers.Reshape((1, feature_dim1), name="reshape_layer")
    x2 = [reshape_layer(x2_) for x2_ in x2]
    pair1 = layers.Concatenate(
        axis=1, name="concate_pair1_layer")([x1[0], x1[1], x2[0]])
    pair2 = layers.Concatenate(
        axis=1, name="concate_pair2_layer")([x1[0], x1[2], x2[1]])

    coatten_layer = RemappedCoAttentionWeight(atten_dim,
                                              name="coattention_weights_layer")
    featnorm_layer1 = FeatureNormalization(
        name="normalized_coattention_weights_layer1", axis=1)
    featnorm_layer2 = FeatureNormalization(
        name="normalized_coattention_weights_layer2", axis=2)
    focus_layer1 = layers.Dot((1, 1), name="focus_layer1")
    focus_layer2 = layers.Dot((2, 1), name="focus_layer2")
    pair_layer1 = layers.Concatenate(axis=-1, name="pair_layer1")
    pair_layer2 = layers.Concatenate(axis=-1, name="pair_layer2")

    # attention
    attens = coatten_layer([pair1, pair2])
    attens1 = featnorm_layer1(attens)
    attens2 = featnorm_layer2(attens)
    # compare
    focus1 = focus_layer1([attens1, pair1])
    focus2 = focus_layer2([attens2, pair2])
    pair1 = pair_layer1([pair1, focus2])
    pair2 = pair_layer2([pair2, focus1])

    x = layers.Concatenate(axis=1, name="concate_layer")([pair1, pair2])
    x = layers.TimeDistributed(
        layers.Dropout(rate=drop_out, name="pair_dropout_layer"))(x)
    x = layers.TimeDistributed(
        layers.Dense(mlp_dim, name="pair_feature_map_layer",
                     activation="relu"))(x)
    x = layers.Flatten(name="pair_feature_flatten_layer1")(x)

    #     pooled_outputs = []
    #     for i in range(len(filter_sizes)):
    #         conv = layers.Conv1D(num_filters[i], kernel_size=filter_sizes[i], padding=padding, activation='relu')(x)
    #         if pooling == 'max':
    #             conv = layers.GlobalMaxPooling1D(name='global_pooling_layer' + str(i))(conv)
    #         else:
    #             conv = layers.GlobalAveragePooling1D(name='global_pooling_layer' + str(i))(conv)
    #         pooled_outputs.append(conv)
    #     if len(pooled_outputs) > 1:
    #         x = layers.Concatenate(name='concated_layer')(pooled_outputs)
    #     else:
    #         x = conv

    # MLP Layers
    x = layers.BatchNormalization(name='batch_norm_layer')(x)
    x = layers.Dropout(rate=drop_out, name="dropout_layer")(x)

    for i in range(mlp_depth - 1):
        x = layers.Dense(mlp_dim,
                         activation='selu',
                         kernel_initializer='lecun_normal',
                         name='selu_layer' + str(i))(x)
        x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x)

    outputs = layers.Dense(output_dim,
                           activation="softmax",
                           name="softmax_layer0")(x)

    model = models.Model(inputs1 + inputs2, outputs)

    if return_customized_layers:
        return model, {
            'RemappedCoAttentionWeight': RemappedCoAttentionWeight,
            "FeatureNormalization": FeatureNormalization
        }

    return model
Ejemplo n.º 5
0
def build_multi_channel_cnn_model(num_feature_channels1,
                                  num_feature_channels2,
                                  num_features1,
                                  num_features2,
                                  feature_dim1,
                                  output_dim,
                                  num_filters,
                                  filter_sizes,
                                  model_dim,
                                  mlp_dim,
                                  mlp_depth=1,
                                  drop_out=0.5,
                                  pooling='max',
                                  padding='valid',
                                  return_customized_layers=False):
    """
    Create A Multi-Layer Perceptron Model.
    
    inputs: 
        embeddings: [batch, num_embed_feature, embed_dims] * 3 ## pronoun, A, B
        positional_features: [batch, num_pos_feature] * 2 ## pronoun-A, pronoun-B
        
    outputs: 
        [batch, num_classes] # in our case there should be 3 output classes: A, B, None
        
    :param output_dim: the output dimension size
    :param num_filters: list of integers
        The number of filters.
    :param filter_sizes: list of integers
        The kernel size.
    :param pooling: str, either 'max' or 'average'
        Pooling method.
    :param padding: One of "valid", "causal" or "same" (case-insensitive).
        Padding method.
    :param model_dim: rrn dimension size
    :param mlp_dim: the dimension size of fully connected layer
    :param mlp_depth: the depth of fully connected layers
    :param drop_out: dropout rate of fully connected layers
    :param return_customized_layers: boolean, default=False
        If True, return model and customized object dictionary, otherwise return model only
    :return: keras model
    """
    def _mlp_channel1(feature_dropout_layer, cnns, pools, concate_layer1, x):
        x = feature_dropout_layer(x)
        pooled_outputs = []
        for i in range(len(cnns)):
            conv = cnns[i](x)
            if pooling == 'max':
                conv = pools[i](conv)
            else:
                conv = pools[i](conv)
            pooled_outputs.append(conv)

        if len(cnns) == 1:
            x = conv
        else:
            x = concate_layer1(pooled_outputs)
        return x

    def _mlp_channel2(feature_map_layer, x):
        x = feature_map_layer(x)
        return x

    # inputs
    inputs1 = list()
    for fi in range(num_feature_channels1):
        inputs1.append(
            models.Input(shape=(num_features1, feature_dim1),
                         dtype='float32',
                         name='input1_' + str(fi)))

    inputs2 = list()
    for fi in range(num_feature_channels2):
        inputs2.append(
            models.Input(shape=(num_features2, ),
                         dtype='float32',
                         name='input2_' + str(fi)))

    # define feature map layers
    # CNN Layers
    cnns = []
    pools = []
    feature_dropout_layer1 = layers.TimeDistributed(
        layers.Dropout(rate=drop_out, name="input_dropout_layer"))
    for i in range(len(filter_sizes)):
        cnns.append(
            layers.Conv1D(num_filters[i],
                          kernel_size=filter_sizes[i],
                          padding=padding,
                          activation='relu',
                          name="cc_layer1" + str(i)))
        if pooling == 'max':
            pools.append(
                layers.GlobalMaxPooling1D(name='global_pooling_layer1' +
                                          str(i)))
        else:
            pools.append(
                layers.GlobalAveragePooling1D(name='global_pooling_layer1' +
                                              str(i)))
    concate_layer1 = layers.Concatenate(name='concated_layer')

    feature_map_layer2 = layers.Dense(model_dim,
                                      name="feature_map_layer2",
                                      activation="relu")

    x1 = [
        _mlp_channel1(feature_dropout_layer1, cnns, pools, concate_layer1,
                      input_) for input_ in inputs1
    ]
    x2 = [_mlp_channel2(feature_map_layer2, input_) for input_ in inputs2]

    x = layers.Concatenate(axis=1, name="concate_layer")(x1 + x2)

    # MLP Layers
    x = layers.BatchNormalization(name='batch_norm_layer')(x)
    x = layers.Dropout(rate=drop_out, name="dropout_layer")(x)

    for i in range(mlp_depth - 1):
        x = layers.Dense(mlp_dim,
                         activation='selu',
                         kernel_initializer='lecun_normal',
                         name='selu_layer' + str(i))(x)
        x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x)

    outputs = layers.Dense(output_dim,
                           activation="softmax",
                           name="softmax_layer0")(x)

    model = models.Model(inputs1 + inputs2, outputs)

    if return_customized_layers:
        return model, {}

    return model
Ejemplo n.º 6
0
def build_mlp_model(num_feature_channels1,
                    num_feature_channels2,
                    num_features1,
                    num_features2,
                    feature_dim1,
                    output_dim,
                    model_dim,
                    mlp_dim,
                    mlp_depth=1,
                    drop_out=0.5,
                    return_customized_layers=False):
    """
    Create A Multi-Layer Perceptron Model.
    
    inputs: 
        embeddings: [batch, num_embed_feature, embed_dims] * 3 ## pronoun, A, B
        positional_features: [batch, num_pos_feature] * 2 ## pronoun-A, pronoun-B
        
    outputs: 
        [batch, num_classes] # in our case there should be 3 output classes: A, B, None
        
    :param output_dim: the output dimension size
    :param model_dim: rrn dimension size
    :param mlp_dim: the dimension size of fully connected layer
    :param mlp_depth: the depth of fully connected layers
    :param drop_out: dropout rate of fully connected layers
    :param return_customized_layers: boolean, default=False
        If True, return model and customized object dictionary, otherwise return model only
    :return: keras model
    """
    def _mlp_channel1(feature_dropout_layer, feature_map_layer, flatten_layer,
                      x):
        x = feature_dropout_layer(x)
        x = feature_map_layer(x)
        x = flatten_layer(x)
        return x

    def _mlp_channel2(feature_map_layer, x):
        x = feature_map_layer(x)
        return x

    # inputs
    inputs1 = list()
    for fi in range(num_feature_channels1):
        inputs1.append(
            models.Input(shape=(num_features1, feature_dim1),
                         dtype='float32',
                         name='input1_' + str(fi)))

    inputs2 = list()
    for fi in range(num_feature_channels2):
        inputs2.append(
            models.Input(shape=(num_features2, ),
                         dtype='float32',
                         name='input2_' + str(fi)))

    # define feature map layers
    # MLP Layers
    feature_dropout_layer1 = layers.TimeDistributed(
        layers.Dropout(rate=drop_out, name="input_dropout_layer"))
    feature_map_layer1 = layers.TimeDistributed(
        layers.Dense(model_dim, name="feature_map_layer1", activation="relu"))
    flatten_layer1 = layers.Flatten(name="feature_flatten_layer1")
    feature_map_layer2 = layers.Dense(model_dim,
                                      name="feature_map_layer2",
                                      activation="relu")

    x1 = [
        _mlp_channel1(feature_dropout_layer1, feature_map_layer1,
                      flatten_layer1, input_) for input_ in inputs1
    ]
    x2 = [_mlp_channel2(feature_map_layer2, input_) for input_ in inputs2]

    x = layers.Concatenate(axis=1, name="concate_layer")(x1 + x2)

    # MLP Layers
    x = layers.BatchNormalization(name='batch_norm_layer')(x)
    x = layers.Dropout(rate=drop_out, name="dropout_layer")(x)

    for i in range(mlp_depth - 1):
        x = layers.Dense(mlp_dim,
                         activation='selu',
                         kernel_initializer='lecun_normal',
                         name='selu_layer' + str(i))(x)
        x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x)

    outputs = layers.Dense(output_dim,
                           activation="softmax",
                           name="softmax_layer0")(x)

    model = models.Model(inputs1 + inputs2, outputs)

    if return_customized_layers:
        return model, {}

    return model
    if activationFunc=='selu':
        myInitializer="lecun_normal"
    elif activationFunc=='tanh':
        myInitializer="glorot_uniform" 

    tensorBoardDir="../deepNN/%s-pid%d/%s/fold%d"%(dateTime, sysuffix, namePrefix, fold)   #/tensorBoardLog
    if not os.path.exists(tensorBoardDir):
        os.makedirs(tensorBoardDir)
    checkPtFile='../deepNN/%s-pid%d/p2v-fold%d.hdf5'%(dateTime, sysuffix, fold)
    scriptInputArgs='../deepNN/%s-pid%d/scriptInputArgs.txt'%(dateTime, sysuffix)
    with open(scriptInputArgs,'w') as textFile:
        print(syspec, file=textFile)

    protTensor=Input(shape=(protTrainIN.shape[1],), name='FastProt')
    if activationFunc=='selu':
        x1=layers.AlphaDropout(dropoutRate)(protTensor)
    else:
        x1=layers.Dropout(dropoutRate)(protTensor)
    x1=layers.Dense(units=32, activation=activationFunc, kernel_initializer=myInitializer, kernel_regularizer=regularizers.l1_l2(l1=0, l2=0.01))(x1)

    rnaTensor=Input(shape=(rnaTrainIN.shape[1],), name='FastRNA')
    if activationFunc=='selu':
        x2=layers.AlphaDropout(dropoutRate)(rnaTensor)
    else:
        x2=layers.Dropout(dropoutRate)(rnaTensor)
    x2=layers.Dense(units=32, activation=activationFunc, kernel_initializer=myInitializer, kernel_regularizer=regularizers.l1_l2(l1=0, l2=0.01))(x2)   

    merged=layers.dot([x1, x2], -1)    
    #merged=kronecker([x1, x2]) 
    #merged=layers.concatenate([x1, x2]) 
    #merged=layers.multiply([x1, x2]) 
Ejemplo n.º 8
0
def build_birnn_multifeature_coattention_model(voca_dim,
                                               time_steps,
                                               num_feature_channels,
                                               num_features,
                                               feature_dim,
                                               output_dim,
                                               model_dim,
                                               atten_dim,
                                               mlp_dim,
                                               item_embedding=None,
                                               rnn_depth=1,
                                               mlp_depth=1,
                                               drop_out=0.5,
                                               rnn_drop_out=0.,
                                               rnn_state_drop_out=0.,
                                               trainable_embedding=False,
                                               gpu=False,
                                               return_customized_layers=False):
    """
    Create A Bidirectional Attention Model.

    :param voca_dim: vocabulary dimension size.
    :param time_steps: the length of input
    :param output_dim: the output dimension size
    :param model_dim: rrn dimension size
    :param mlp_dim: the dimension size of fully connected layer
    :param item_embedding: integer, numpy 2D array, or None (default=None)
        If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor.
        If item_embedding is a matrix, this matrix will be used as the embedding matrix.
        If item_embedding is None, then connect input tensor to RNN layer directly.
    :param rnn_depth: rnn depth
    :param mlp_depth: the depth of fully connected layers
    :param num_feature_channels: the number of attention channels, this can be used to mimic multi-head attention mechanism
    :param drop_out: dropout rate of fully connected layers
    :param rnn_drop_out: dropout rate of rnn layers
    :param rnn_state_drop_out: dropout rate of rnn state tensor
    :param trainable_embedding: boolean
    :param gpu: boolean, default=False
        If True, CuDNNLSTM is used instead of LSTM for RNN layer.
    :param return_customized_layers: boolean, default=False
        If True, return model and customized object dictionary, otherwise return model only
    :return: keras model
    """

    if model_dim % 2 == 1:
        model_dim += 1

    if item_embedding is not None:
        inputs = models.Input(shape=(time_steps, ),
                              dtype='int32',
                              name='input0')
        x1 = inputs

        # item embedding
        if isinstance(item_embedding, np.ndarray):
            assert voca_dim == item_embedding.shape[0]
            x1 = layers.Embedding(voca_dim,
                                  item_embedding.shape[1],
                                  input_length=time_steps,
                                  weights=[
                                      item_embedding,
                                  ],
                                  trainable=trainable_embedding,
                                  mask_zero=False,
                                  name='embedding_layer0')(x1)
        elif utils.is_integer(item_embedding):
            x1 = layers.Embedding(voca_dim,
                                  item_embedding,
                                  input_length=time_steps,
                                  trainable=trainable_embedding,
                                  mask_zero=False,
                                  name='embedding_layer0')(x1)
        else:
            raise ValueError(
                "item_embedding must be either integer or numpy matrix")
    else:
        inputs = models.Input(shape=(time_steps, voca_dim),
                              dtype='float32',
                              name='input0')
        x1 = inputs

    inputs1 = list()
    for fi in range(num_feature_channels):
        inputs1.append(
            models.Input(shape=(num_features, feature_dim),
                         dtype='float32',
                         name='input1' + str(fi)))

    feature_map_layer = layers.TimeDistributed(layers.Dense(
        model_dim, name="feature_map_layer", activation="sigmoid"),
                                               name="td_feature_map_layer")
    x2s = list(map(lambda input_: feature_map_layer(input_), inputs1))

    if gpu:
        # rnn encoding
        for i in range(rnn_depth):
            x1 = layers.Bidirectional(layers.CuDNNLSTM(int(model_dim / 2),
                                                       return_sequences=True),
                                      name='bi_lstm_layer' + str(i))(x1)
            x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                           str(i))(x1)
            x1 = layers.Dropout(rnn_drop_out,
                                name="rnn_dropout_layer" + str(i))(x1)
    else:
        # rnn encoding
        for i in range(rnn_depth):
            x1 = layers.Bidirectional(layers.LSTM(
                int(model_dim / 2),
                return_sequences=True,
                dropout=rnn_drop_out,
                recurrent_dropout=rnn_state_drop_out),
                                      name='bi_lstm_layer' + str(i))(x1)
            x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                           str(i))(x1)

    coatten_layer = clayers.CoAttentionWeight(name="coattention_weights_layer")
    featnorm_layer1 = clayers.FeatureNormalization(
        name="normalized_coattention_weights_layer1", axis=1)
    featnorm_layer2 = clayers.FeatureNormalization(
        name="normalized_coattention_weights_layer2", axis=2)
    focus_layer1 = layers.Dot((1, 1), name="focus_layer1")
    focus_layer2 = layers.Dot((2, 1), name="focus_layer2")
    pair_layer1 = layers.Concatenate(axis=-1, name="pair_layer1")
    pair_layer2 = layers.Concatenate(axis=-1, name="pair_layer2")

    compare_layer1 = layers.TimeDistributed(layers.Dense(model_dim,
                                                         activation="relu"),
                                            name="compare_layer1")
    compare_layer2 = layers.TimeDistributed(layers.Dense(model_dim,
                                                         activation="relu"),
                                            name="compare_layer2")
    flatten_layer = layers.Flatten(name="flatten_layer")

    xs = list()
    for x2_ in x2s:
        xs += _coatten_compare_aggregate(coatten_layer, featnorm_layer1,
                                         featnorm_layer2, focus_layer1,
                                         focus_layer2, pair_layer1,
                                         pair_layer2, compare_layer1,
                                         compare_layer2, flatten_layer, x1,
                                         x2_)

    x = layers.Concatenate(axis=1, name="concat_feature_layer")(xs)

    # MLP Layers
    for i in range(mlp_depth - 1):
        x = layers.Dense(mlp_dim,
                         activation='selu',
                         kernel_initializer='lecun_normal',
                         name='selu_layer' + str(i))(x)
        x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x)

    outputs = layers.Dense(output_dim,
                           activation="softmax",
                           name="softmax_layer0")(x)

    model = models.Model([inputs] + inputs1, outputs)

    if return_customized_layers:
        return model, {
            'CoAttentionWeight': clayers.CoAttentionWeight,
            "FeatureNormalization": clayers.FeatureNormalization
        }

    return model
Ejemplo n.º 9
0
def build_birnn_feature_coattention_cnn_model(voca_dim,
                                              time_steps,
                                              num_features,
                                              feature_dim,
                                              output_dim,
                                              model_dim,
                                              mlp_dim,
                                              num_filters,
                                              filter_sizes,
                                              item_embedding=None,
                                              rnn_depth=1,
                                              mlp_depth=1,
                                              drop_out=0.5,
                                              rnn_drop_out=0.,
                                              rnn_state_drop_out=0.,
                                              cnn_drop_out=0.5,
                                              pooling='max',
                                              trainable_embedding=False,
                                              gpu=False,
                                              return_customized_layers=False):
    """
    Create A Bidirectional Attention Model.

    :param voca_dim: vocabulary dimension size.
    :param time_steps: the length of input
    :param output_dim: the output dimension size
    :param model_dim: rrn dimension size
    :param mlp_dim: the dimension size of fully connected layer
    :param item_embedding: integer, numpy 2D array, or None (default=None)
        If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor.
        If item_embedding is a matrix, this matrix will be used as the embedding matrix.
        If item_embedding is None, then connect input tensor to RNN layer directly.
    :param rnn_depth: rnn depth
    :param mlp_depth: the depth of fully connected layers
    :param num_att_channel: the number of attention channels, this can be used to mimic multi-head attention mechanism
    :param drop_out: dropout rate of fully connected layers
    :param rnn_drop_out: dropout rate of rnn layers
    :param rnn_state_drop_out: dropout rate of rnn state tensor
    :param trainable_embedding: boolean
    :param gpu: boolean, default=False
        If True, CuDNNLSTM is used instead of LSTM for RNN layer.
    :param return_customized_layers: boolean, default=False
        If True, return model and customized object dictionary, otherwise return model only
    :return: keras model
    """
    if model_dim % 2 == 1:
        model_dim += 1

    if item_embedding is not None:
        inputs = models.Input(shape=(time_steps, ),
                              dtype='int32',
                              name='input0')
        x1 = inputs

        # item embedding
        if isinstance(item_embedding, np.ndarray):
            assert voca_dim == item_embedding.shape[0]
            x1 = layers.Embedding(voca_dim,
                                  item_embedding.shape[1],
                                  input_length=time_steps,
                                  weights=[
                                      item_embedding,
                                  ],
                                  trainable=trainable_embedding,
                                  mask_zero=False,
                                  name='embedding_layer0')(x1)
        elif utils.is_integer(item_embedding):
            x1 = layers.Embedding(voca_dim,
                                  item_embedding,
                                  input_length=time_steps,
                                  trainable=trainable_embedding,
                                  mask_zero=False,
                                  name='embedding_layer0')(x1)
        else:
            raise ValueError(
                "item_embedding must be either integer or numpy matrix")
    else:
        inputs = models.Input(shape=(time_steps, voca_dim),
                              dtype='float32',
                              name='input0')
        x1 = inputs

    inputs1 = models.Input(shape=(num_features, feature_dim),
                           dtype='float32',
                           name='input1')
    x2 = layers.Dense(feature_dim, name="feature_map_layer",
                      activation="relu")(inputs1)

    if gpu:
        # rnn encoding
        for i in range(rnn_depth):
            x1 = layers.Bidirectional(layers.CuDNNLSTM(int(model_dim / 2),
                                                       return_sequences=True),
                                      name='bi_lstm_layer' + str(i))(x1)
            x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                           str(i))(x1)
            x1 = layers.Dropout(rnn_drop_out,
                                name="rnn_dropout_layer" + str(i))(x1)
    else:
        # rnn encoding
        for i in range(rnn_depth):
            x1 = layers.Bidirectional(layers.LSTM(
                int(model_dim / 2),
                return_sequences=True,
                dropout=rnn_drop_out,
                recurrent_dropout=rnn_state_drop_out),
                                      name='bi_lstm_layer' + str(i))(x1)
            x1 = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                           str(i))(x1)

    # attention
    attens = clayers.CoAttentionWeight(name="coattention_weights_layer")(
        [x1, x2])

    attens1 = clayers.FeatureNormalization(
        name="normalized_coattention_weights_layer1", axis=1)(attens)
    attens2 = clayers.FeatureNormalization(
        name="normalized_coattention_weights_layer2", axis=2)(attens)

    # compare
    focus1 = layers.Dot((1, 1), name="focus_layer1")([attens1, x1])
    focus2 = layers.Dot((2, 1), name="focus_layer2")([attens2, x2])

    pair1 = layers.Concatenate(axis=-1, name="pair_layer1")([x1, focus2])
    pair2 = layers.Concatenate(axis=-1, name="pair_layer2")([x2, focus1])

    x1 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"),
                                name="compare_layer1")(pair1)
    x2 = layers.TimeDistributed(layers.Dense(model_dim, activation="relu"),
                                name="compare_layer2")(pair2)

    # Multi-Channel CNN for x1
    pooled_outputs = []
    for i in range(len(filter_sizes)):
        conv = layers.Conv1D(num_filters,
                             kernel_size=filter_sizes[i],
                             padding='valid',
                             activation='relu')(x1)
        if pooling == 'max':
            conv = layers.MaxPooling1D(pool_size=time_steps - filter_sizes[i] +
                                       1,
                                       strides=1,
                                       padding='valid')(conv)
        else:
            conv = layers.AveragePooling1D(pool_size=time_steps -
                                           filter_sizes[i] + 1,
                                           strides=1,
                                           padding='valid')(conv)
        pooled_outputs.append(conv)

    x1 = layers.Concatenate(name='concated_layer')(pooled_outputs)
    x1 = layers.Flatten()(x1)
    x1 = layers.Dropout(cnn_drop_out, name='conv_dropout_layer')(x1)
    x1 = layers.BatchNormalization(name="batch_norm_layer")(x1)

    # Average Pool for x2
    x2 = layers.GlobalAveragePooling1D(name="average_pool_layer")(x2)

    x = layers.Concatenate(axis=1, name="concat_deep_feature_layer")([x1, x2])

    # MLP Layers
    for i in range(mlp_depth - 1):
        x = layers.Dense(mlp_dim,
                         activation='selu',
                         kernel_initializer='lecun_normal',
                         name='selu_layer' + str(i))(x)
        x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x)

    outputs = layers.Dense(output_dim,
                           activation="softmax",
                           name="softmax_layer0")(x)

    model = models.Model(inputs, outputs)

    if return_customized_layers:
        return model, {
            'CoAttentionWeight': clayers.CoAttentionWeight,
            "FeatureNormalization": clayers.FeatureNormalization
        }

    return model
Ejemplo n.º 10
0
def build_birnn_cnn_model(voca_dim,
                          time_steps,
                          output_dim,
                          rnn_dim,
                          mlp_dim,
                          num_filters,
                          filter_sizes,
                          item_embedding=None,
                          rnn_depth=1,
                          mlp_depth=1,
                          drop_out=0.5,
                          rnn_drop_out=0.5,
                          rnn_state_drop_out=0.5,
                          cnn_drop_out=0.5,
                          pooling='max',
                          trainable_embedding=False,
                          gpu=False,
                          return_customized_layers=False):
    """
    Create A Bidirectional CNN Model.

    :param voca_dim: vocabulary dimension size.
    :param time_steps: the length of input
    :param output_dim: the output dimension size
    :param rnn_dim: rrn dimension size
    :param num_filters: the number of filters
    :param filter_sizes: list of integers
        The kernel size.
    :param mlp_dim: the dimension size of fully connected layer
    :param item_embedding: integer, numpy 2D array, or None (default=None)
        If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor.
        If item_embedding is a matrix, this matrix will be used as the embedding matrix.
        If item_embedding is None, then connect input tensor to RNN layer directly.
    :param rnn_depth: rnn depth
    :param mlp_depth: the depth of fully connected layers
    :param num_att_channel: the number of attention channels, this can be used to mimic multi-head attention mechanism
    :param drop_out: dropout rate of fully connected layers
    :param rnn_drop_out: dropout rate of rnn layers
    :param rnn_state_drop_out: dropout rate of rnn state tensor
    :param cnn_drop_out: dropout rate of between cnn layer and fully connected layers
    :param pooling: str, either 'max' or 'average'
        Pooling method.
    :param trainable_embedding: boolean
    :param gpu: boolean, default=False
        If True, CuDNNLSTM is used instead of LSTM for RNN layer.
    :param return_customized_layers: boolean, default=False
        If True, return model and customized object dictionary, otherwise return model only
    :return: keras model
    """

    if item_embedding is not None:
        inputs = models.Input(shape=(time_steps, ),
                              dtype='int32',
                              name='input0')
        x = inputs

        # item embedding
        if isinstance(item_embedding, np.ndarray):
            assert voca_dim == item_embedding.shape[0]
            x = layers.Embedding(voca_dim,
                                 item_embedding.shape[1],
                                 input_length=time_steps,
                                 weights=[
                                     item_embedding,
                                 ],
                                 trainable=trainable_embedding,
                                 mask_zero=False,
                                 name='embedding_layer0')(x)
        elif utils.is_integer(item_embedding):
            x = layers.Embedding(voca_dim,
                                 item_embedding,
                                 input_length=time_steps,
                                 trainable=trainable_embedding,
                                 mask_zero=False,
                                 name='embedding_layer0')(x)
        else:
            raise ValueError(
                "item_embedding must be either integer or numpy matrix")
    else:
        inputs = models.Input(shape=(time_steps, voca_dim),
                              dtype='float32',
                              name='input0')
        x = inputs

    if gpu:
        # rnn encoding
        for i in range(rnn_depth):
            x = layers.Bidirectional(layers.CuDNNLSTM(rnn_dim,
                                                      return_sequences=True),
                                     name='bi_lstm_layer' + str(i))(x)
            x = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                          str(i))(x)
            x = layers.Dropout(rnn_drop_out,
                               name="rnn_dropout_layer" + str(i))(x)
    else:
        # rnn encoding
        for i in range(rnn_depth):
            x = layers.Bidirectional(layers.LSTM(
                rnn_dim,
                return_sequences=True,
                dropout=rnn_drop_out,
                recurrent_dropout=rnn_state_drop_out),
                                     name='bi_lstm_layer' + str(i))(x)
            x = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                          str(i))(x)

    pooled_outputs = []
    for i in range(len(filter_sizes)):
        conv = layers.Conv1D(num_filters,
                             kernel_size=filter_sizes[i],
                             padding='valid',
                             activation='relu')(x)
        if pooling == 'max':
            conv = layers.MaxPooling1D(pool_size=time_steps - filter_sizes[i] +
                                       1,
                                       strides=1,
                                       padding='valid')(conv)
        else:
            conv = layers.AveragePooling1D(pool_size=time_steps -
                                           filter_sizes[i] + 1,
                                           strides=1,
                                           padding='valid')(conv)
        pooled_outputs.append(conv)

    x = layers.Concatenate(name='concated_layer')(pooled_outputs)
    x = layers.Flatten()(x)
    x = layers.Dropout(cnn_drop_out, name='conv_dropout_layer')(x)
    x = layers.BatchNormalization(name="batch_norm_layer")(x)

    # MLP Layers
    for i in range(mlp_depth - 1):
        x = layers.Dense(mlp_dim,
                         activation='selu',
                         kernel_initializer='lecun_normal',
                         name='selu_layer' + str(i))(x)
        x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x)

    outputs = layers.Dense(output_dim,
                           activation="softmax",
                           name="softmax_layer0")(x)

    model = models.Model(inputs, outputs)

    if return_customized_layers:
        return model, dict()

    return model
Ejemplo n.º 11
0
def build_birnn_attention_model(voca_dim,
                                time_steps,
                                output_dim,
                                rnn_dim,
                                mlp_dim,
                                item_embedding=None,
                                rnn_depth=1,
                                mlp_depth=1,
                                num_att_channel=1,
                                drop_out=0.5,
                                rnn_drop_out=0.,
                                rnn_state_drop_out=0.,
                                trainable_embedding=False,
                                gpu=False,
                                return_customized_layers=False):
    """
    Create A Bidirectional Attention Model.

    :param voca_dim: vocabulary dimension size.
    :param time_steps: the length of input
    :param output_dim: the output dimension size
    :param rnn_dim: rrn dimension size
    :param mlp_dim: the dimension size of fully connected layer
    :param item_embedding: integer, numpy 2D array, or None (default=None)
        If item_embedding is a integer, connect a randomly initialized embedding matrix to the input tensor.
        If item_embedding is a matrix, this matrix will be used as the embedding matrix.
        If item_embedding is None, then connect input tensor to RNN layer directly.
    :param rnn_depth: rnn depth
    :param mlp_depth: the depth of fully connected layers
    :param num_att_channel: the number of attention channels, this can be used to mimic multi-head attention mechanism
    :param drop_out: dropout rate of fully connected layers
    :param rnn_drop_out: dropout rate of rnn layers
    :param rnn_state_drop_out: dropout rate of rnn state tensor
    :param trainable_embedding: boolean
    :param gpu: boolean, default=False
        If True, CuDNNLSTM is used instead of LSTM for RNN layer.
    :param return_customized_layers: boolean, default=False
        If True, return model and customized object dictionary, otherwise return model only
    :return: keras model
    """

    if item_embedding is not None:
        inputs = models.Input(shape=(time_steps, ),
                              dtype='int32',
                              name='input0')
        x = inputs

        # item embedding
        if isinstance(item_embedding, np.ndarray):
            assert voca_dim == item_embedding.shape[0]
            x = layers.Embedding(voca_dim,
                                 item_embedding.shape[1],
                                 input_length=time_steps,
                                 weights=[
                                     item_embedding,
                                 ],
                                 trainable=trainable_embedding,
                                 mask_zero=False,
                                 name='embedding_layer0')(x)
        elif utils.is_integer(item_embedding):
            x = layers.Embedding(voca_dim,
                                 item_embedding,
                                 input_length=time_steps,
                                 trainable=trainable_embedding,
                                 mask_zero=False,
                                 name='embedding_layer0')(x)
        else:
            raise ValueError(
                "item_embedding must be either integer or numpy matrix")
    else:
        inputs = models.Input(shape=(time_steps, voca_dim),
                              dtype='float32',
                              name='input0')
        x = inputs

    if gpu:
        # rnn encoding
        for i in range(rnn_depth):
            x = layers.Bidirectional(layers.CuDNNLSTM(rnn_dim,
                                                      return_sequences=True),
                                     name='bi_lstm_layer' + str(i))(x)
            x = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                          str(i))(x)
            x = layers.Dropout(rnn_drop_out,
                               name="rnn_dropout_layer" + str(i))(x)
    else:
        # rnn encoding
        for i in range(rnn_depth):
            x = layers.Bidirectional(layers.LSTM(
                rnn_dim,
                return_sequences=True,
                dropout=rnn_drop_out,
                recurrent_dropout=rnn_state_drop_out),
                                     name='bi_lstm_layer' + str(i))(x)
            x = layers.BatchNormalization(name='rnn_batch_norm_layer' +
                                          str(i))(x)

    # attention
    attention_heads = []
    x_per = layers.Permute((2, 1), name='permuted_attention_x')(x)
    for h in range(max(1, num_att_channel)):
        attention = clayers.AttentionWeight(name="attention_weights_layer" +
                                            str(h))(x)
        xx = layers.Dot([2, 1], name='focus_head' + str(h) +
                        '_layer0')([x_per, attention])
        attention_heads.append(xx)

    if num_att_channel > 1:
        x = layers.Concatenate(name='focus_layer0')(attention_heads)
    else:
        x = attention_heads[0]

    x = layers.BatchNormalization(name='focused_batch_norm_layer')(x)

    # MLP Layers
    for i in range(mlp_depth - 1):
        x = layers.Dense(mlp_dim,
                         activation='selu',
                         kernel_initializer='lecun_normal',
                         name='selu_layer' + str(i))(x)
        x = layers.AlphaDropout(drop_out, name='alpha_layer' + str(i))(x)

    outputs = layers.Dense(output_dim,
                           activation="softmax",
                           name="softmax_layer0")(x)

    model = models.Model(inputs, outputs)

    if return_customized_layers:
        return model, {'AttentionWeight': clayers.AttentionWeight}
    return model