예제 #1
0
def test_architecture(nb_classes, nb_tokens, maxlen, embed_l2=1E-6, return_attention=False):
	"""
		Renvoie la structure du modèle

	# Arguments:
		nb_classes: Niombe de classe dans le Data set, a priori 90
		nb_tokens: taille vocabulary 
		maxlen: taille max d'un mot

		embed_l2: L2 regularization for the embedding layerl.

	# Returns:
		Model with the given parameters.
	"""
	# define embedding layer that turns word tokens into vectors
	# an activation function is used to bound the values of the embedding
	print('Beggining build model')
	model_input = Input(shape=(maxlen,), dtype='int32', name= 'Input First')
	print('Embedding reg')
	embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None
	print('Beggining embedding layer')
	embed = Embedding(input_dim=nb_tokens,
					  output_dim=256,
					  mask_zero=True,
					  input_length=maxlen,
					  embeddings_regularizer=embed_reg,
					  name='embedding')
	print('embeb finished')
	x = embed(model_input)
	print('x finished')
	x = Activation('tanh')(x)
	print('Finish introduction')

	# entire embedding channels are dropped out instead of the
	# normal Keras embedding dropout, which drops all channels for entire words
	# many of the datasets contain so few words that losing one or more words can alter the emotions completely

	# skip-connection from embedding to output eases gradient-flow and allows access to lower-level features
	# ordering of the way the merge is done is important for consistency with the pretrained model
	print('LSTM building')
	lstm_output = Bidirectional(LSTM(512, return_sequences=True), name="bi_lstm_0")(x)

	x = concatenate([lstm_output, x])

    # if return_attention is True in AttentionWeightedAverage, an additional tensor
    # representing the weight at each timestep is returned
	x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x)
	print('LSTMs ready')

	# if return_attention is True in AttentionWeightedAverage, an additional tensor
	# representing the weight at each timestep is returned
	weights = None
	#x = AttentionWeightedAverage(name='attlayer', return_attention=return_attention)(x)
	#print('Attention layer ready')

	outputs = [x]

	return Model(inputs=[model_input], outputs=outputs, name="riminder")
예제 #2
0
def deepmoji_architecture(nb_classes,
                          nb_tokens,
                          maxlen,
                          feature_output=False,
                          embed_dropout_rate=0,
                          final_dropout_rate=0,
                          embed_l2=1E-6,
                          return_attention=False):
    """
    Returns the DeepMoji architecture uninitialized and
    without using the pretrained model weights.

    # Arguments:
        nb_classes: Number of classes in the dataset.
        nb_tokens: Number of tokens in the dataset (i.e. vocabulary size).
        maxlen: Maximum length of a token.
        feature_output: If True the model returns the penultimate
                        feature vector rather than Softmax probabilities
                        (defaults to False).
        embed_dropout_rate: Dropout rate for the embedding layer.
        final_dropout_rate: Dropout rate for the final Softmax layer.
        embed_l2: L2 regularization for the embedding layerl.

    # Returns:
        Model with the given parameters.
    """
    # define embedding layer that turns word tokens into vectors
    # an activation function is used to bound the values of the embedding
    model_input = Input(shape=(maxlen, ), dtype='int32')
    embed_reg = L1L2(l2=embed_l2) if embed_l2 != 0 else None
    embed = Embedding(input_dim=nb_tokens,
                      output_dim=256,
                      mask_zero=True,
                      input_length=maxlen,
                      embeddings_regularizer=embed_reg,
                      name='embedding')
    x = embed(model_input)
    x = Activation('tanh')(x)

    # entire embedding channels are dropped out instead of the
    # normal Keras embedding dropout, which drops all channels for entire words
    # many of the datasets contain so few words that losing one or more words can alter the emotions completely
    if embed_dropout_rate != 0:
        embed_drop = SpatialDropout1D(embed_dropout_rate, name='embed_drop')
        x = embed_drop(x)

    # skip-connection from embedding to output eases gradient-flow and allows access to lower-level features
    # ordering of the way the merge is done is important for consistency with the pretrained model
    lstm_0_output = Bidirectional(LSTM(512, return_sequences=True),
                                  name="bi_lstm_0")(x)
    lstm_1_output = Bidirectional(LSTM(512, return_sequences=True),
                                  name="bi_lstm_1")(lstm_0_output)
    x = concatenate([lstm_1_output, lstm_0_output, x])

    # if return_attention is True in AttentionWeightedAverage, an additional tensor
    # representing the weight at each timestep is returned
    weights = None
    x = AttentionWeightedAverage(name='attlayer',
                                 return_attention=return_attention)(x)
    if return_attention:
        x, weights = x

    if feature_output == False:
        # output class probabilities
        if final_dropout_rate != 0:
            x = Dropout(final_dropout_rate)(x)

        if nb_classes > 2:
            outputs = [
                Dense(nb_classes, activation='softmax', name='softmax')(x)
            ]
        else:
            outputs = [Dense(1, activation='sigmoid', name='softmax')(x)]
    else:
        # output penultimate feature vector
        outputs = [x]

    if return_attention:
        # add the attention weights to the outputs if required
        outputs.append(weights)

    return Model(inputs=[model_input], outputs=outputs, name="DeepMoji")