def test_architecture(nb_classes, nb_tokens, maxlen, embed_l2=1E-6, return_attention=False): """ Renvoie la structure du modèle # Arguments: nb_classes: Niombe de classe dans le Data set, a priori 90 nb_tokens: taille vocabulary maxlen: taille max d'un mot embed_l2: L2 regularization for the embedding layerl. # Returns: Model with the given parameters. """ # define embedding layer that turns word tokens into vectors # an activation function is used to bound the values of the embedding print('Beggining build model') model_input = Input(shape=(maxlen,), dtype='int32', name= 'Input First') print('Embedding reg') embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None print('Beggining embedding layer') embed = Embedding(input_dim=nb_tokens, output_dim=256, mask_zero=True, input_length=maxlen, embeddings_regularizer=embed_reg, name='embedding') print('embeb finished') x = embed(model_input) print('x finished') x = Activation('tanh')(x) print('Finish introduction') # entire embedding channels are dropped out instead of the # normal Keras embedding dropout, which drops all channels for entire words # many of the datasets contain so few words that losing one or more words can alter the emotions completely # skip-connection from embedding to output eases gradient-flow and allows access to lower-level features # ordering of the way the merge is done is important for consistency with the pretrained model print('LSTM building') lstm_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_0")(x) x = concatenate([lstm_output, x]) # if return_attention is True in AttentionWeightedAverage, an additional tensor # representing the weight at each timestep is returned x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x) print('LSTMs ready') # if return_attention is True in AttentionWeightedAverage, an additional tensor # representing the weight at each timestep is returned weights = None #x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x) #print('Attention layer ready') outputs = [x] return Model(inputs=[model_input], outputs=outputs, name="riminder")
def deepmoji_architecture(nb_classes, nb_tokens, maxlen, feature_output=False, embed_dropout_rate=0, final_dropout_rate=0, embed_l2=1E-6, return_attention=False): """ Returns the DeepMoji architecture uninitialized and without using the pretrained model weights. # Arguments: nb_classes: Number of classes in the dataset. nb_tokens: Number of tokens in the dataset (i.e. vocabulary size). maxlen: Maximum length of a token. feature_output: If True the model returns the penultimate feature vector rather than Softmax probabilities (defaults to False). embed_dropout_rate: Dropout rate for the embedding layer. final_dropout_rate: Dropout rate for the final Softmax layer. embed_l2: L2 regularization for the embedding layerl. # Returns: Model with the given parameters. """ # define embedding layer that turns word tokens into vectors # an activation function is used to bound the values of the embedding model_input = Input(shape=(maxlen, ), dtype='int32') embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None embed = Embedding(input_dim=nb_tokens, output_dim=256, mask_zero=True, input_length=maxlen, embeddings_regularizer=embed_reg, name='embedding') x = embed(model_input) x = Activation('tanh')(x) # entire embedding channels are dropped out instead of the # normal Keras embedding dropout, which drops all channels for entire words # many of the datasets contain so few words that losing one or more words can alter the emotions completely if embed_dropout_rate != 0: embed_drop = SpatialDropout1D(embed_dropout_rate, name='embed_drop') x = embed_drop(x) # skip-connection from embedding to output eases gradient-flow and allows access to lower-level features # ordering of the way the merge is done is important for consistency with the pretrained model lstm_0_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_0")(x) lstm_1_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_1")(lstm_0_output) x = concatenate([lstm_1_output, lstm_0_output, x]) # if return_attention is True in AttentionWeightedAverage, an additional tensor # representing the weight at each timestep is returned weights = None x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x) if return_attention: x, weights = x if feature_output == False: # output class probabilities if final_dropout_rate != 0: x = Dropout(final_dropout_rate)(x) if nb_classes > 2: outputs = [ Dense(nb_classes, activation='softmax', name='softmax')(x) ] else: outputs = [Dense(1, activation='sigmoid', name='softmax')(x)] else: # output penultimate feature vector outputs = [x] if return_attention: # add the attention weights to the outputs if required outputs.append(weights) return Model(inputs=[model_input], outputs=outputs, name="DeepMoji")