Python Attention Exemples, custom_layers.Attention Python Exemples

Exemple #1

0

Afficher le fichier

    def __init__(self,
                 embedding_dim,
                 aspect_embedding_dim,
                 hidden_dim,
                 output_dim,
                 n_layers,
                 embed_weights,
                 at=True,
                 ae=False,
                 dropout=0):

        #Constructor
        super().__init__()
        # ATAE ?
        self.ae = ae
        self.at = at
        self.embedding_dim = embedding_dim
        # Embedding layer using GloVe or fasttext
        self.embedding = custom_word_embedding(embed_weights)

        # Embedding layer using Glove for aspects
        self.aspects_embedding = custom_word_embedding(embed_weights)

        # Embedding layer without GloVe
        # self.embedding = nn.Embedding(emb_mat.shape[0], emb_mat.shape[1])

        # LSTM layer and initialization
        if self.ae:
            self.lstm = nn.LSTM(embedding_dim * 2,
                                hidden_dim,
                                num_layers=n_layers,
                                bidirectional=False,
                                dropout=dropout,
                                batch_first=True)
        else:
            self.lstm = nn.LSTM(embedding_dim,
                                hidden_dim,
                                num_layers=n_layers,
                                bidirectional=False,
                                dropout=dropout,
                                batch_first=True)

        for name, param in self.lstm.named_parameters():
            if 'bias' in name:
                nn.init.constant_(param, 0.0)
            elif 'weight' in name:
                nn.init.xavier_normal_(param)

        # Attention layer with initialization
        if self.at:
            self.attention = Attention(aspect_embedding_dim, hidden_dim)
            self.attention.xavier_init()

        # Final dense layer with initialization
        self.fc = nn.Linear(embedding_dim, output_dim)
        nn.init.xavier_normal_(self.fc.weight)

        #activation function
        #self.act = nn.Sigmoid()
        self.act = nn.Softmax(dim=1)

Exemple #2

0

Afficher le fichier

Fichier : HeartSegNet.py Projet : istiaq28/Adversarial-Heart-Sound-Classification

def getAttentionModel(model, foldname, lr, lr_decay):
    load_path = Result(foldname, find=True).df['model_path']
    load_path = load_path.replace(
        load_path[-16:-12],
        str(int(load_path[-16:-12]) + 1).rjust(4, '0'))

    model.load_weights(load_path)
    layers = {x.name: x for x in model.layers[-5:]}
    layers
    while ('flatten' not in model.layers[-1].name):
        model.layers.pop()
    merged = Attention(name='att')(model.layers[-1].output)
    dann_in = layers['grl'](merged)
    dsc = layers['domain_dense'](dann_in)
    dsc = layers['domain'](dsc)
    merged = layers['class_dense'](merged)
    merged = layers['class'](merged)
    model = Model(inputs=model.layers[0].input, outputs=[merged, dsc])
    for layer in model.layers:
        if 'flatten' in layer.name:
            break
        else:
            layer.trainable = False
    opt = SGD(lr=lr, decay=lr_decay)

    model.compile(optimizer=opt,
                  loss={
                      'class': 'categorical_crossentropy',
                      'domain': 'categorical_crossentropy'
                  },
                  metrics=['accuracy'])
    return model

Exemple #3

0

Afficher le fichier

Fichier : simgnn.py Projet : pulkit1joshi/SimGNN

def simgnn(parser):
    inputA = Input(shape=(None,16))
    GinputA = Input(shape=(None,None))
    inputB = Input(shape=(None,16))
    GinputB = Input(shape=(None,None))
    
    shared_gcn1 =  GraphConv(units=parser.filters_1,step_num=3, activation="relu")
    shared_gcn2 =  GraphConv(units=parser.filters_2,step_num=3, activation="relu")
    shared_gcn3 =  GraphConv(units=parser.filters_3,step_num=3, activation="relu")
    shared_attention =  Attention(parser)

    x = shared_gcn1([inputA, GinputA])
    x = shared_gcn2([x, GinputA])
    x = shared_gcn3([x, GinputA])
    x = shared_attention(x[0])

    y = shared_gcn1([inputB, GinputB])
    y = shared_gcn2([y, GinputB])
    y = shared_gcn3([y, GinputB])
    y = shared_attention(y[0])

    z = NeuralTensorLayer(output_dim=16, input_dim=16)([x, y])
    z = keras.layers.Dense(16, activation="relu")(z)
    z = keras.layers.Dense(8, activation="relu")(z)
    z = keras.layers.Dense(4, activation="relu")(z)
    z = keras.layers.Dense(1)(z)
    z = keras.activations.sigmoid(z)

    return Model(inputs=[inputA, GinputA, inputB, GinputB], outputs=z)

Exemple #4

0

Afficher le fichier

def build_discriminator_att(shape, gpus=2):
    ch = 64
    layer_num = 5
    input_tensor = Input(shape)
    x = Conv2D(ch, (3, 3), strides=(2, 2),
               padding='same')(input_tensor)  # 112x112
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)
    x = residual_block(x, output_channels=ch)
    x = residual_block(x, output_channels=ch * 2, stride=2)
    x = Attention(ch * 2)(x)

    ch = ch * 2

    for i in range(layer_num):
        if i == layer_num - 1:
            x = residual_block(x, output_channels=ch * 2)
        else:
            x = residual_block(x, output_channels=ch * 2, stride=2)

        ch = ch * 2

    x = LeakyReLU(alpha=0.2)(x)

    x = Flatten()(x)
    output = Dense(1, activation='sigmoid')(x)
    model = Model(input_tensor, output)

    return model

Exemple #5

0

Afficher le fichier

Fichier : models2.py Projet : prasys/ABSA_Keras

    def at_lstm(self):
        input_text = Input(shape=(self.max_len, ))
        input_aspect = Input(shape=(1, ), )

        if self.use_elmo:
            elmo_embedding = ELMoEmbedding(
                output_mode=self.config.elmo_output_mode,
                idx2word=self.config.idx2token,
                mask_zero=True,
                hub_url=self.config.elmo_hub_url,
                elmo_trainable=self.config.elmo_trainable)
            if self.config.use_elmo_alone:
                text_embed = SpatialDropout1D(0.2)(elmo_embedding(input_text))
            else:
                word_embedding = Embedding(
                    input_dim=self.text_embeddings.shape[0],
                    output_dim=self.config.word_embed_dim,
                    weights=[self.text_embeddings],
                    trainable=self.config.word_embed_trainable,
                    mask_zero=True)
                text_embed = SpatialDropout1D(0.2)(concatenate(
                    [word_embedding(input_text),
                     elmo_embedding(input_text)]))
        else:
            word_embedding = Embedding(
                input_dim=self.text_embeddings.shape[0],
                output_dim=self.config.word_embed_dim,
                weights=[self.text_embeddings],
                trainable=self.config.word_embed_trainable,
                mask_zero=True)
            text_embed = SpatialDropout1D(0.2)(word_embedding(input_text))

        if self.config.aspect_embed_type == 'random':
            asp_embedding = Embedding(input_dim=self.n_aspect,
                                      output_dim=self.config.aspect_embed_dim)
        else:
            asp_embedding = Embedding(
                input_dim=self.aspect_embeddings.shape[0],
                output_dim=self.config.aspect_embed_dim,
                trainable=self.config.aspect_embed_trainable)
        aspect_embed = asp_embedding(input_aspect)
        aspect_embed = Flatten()(aspect_embed)  # reshape to 2d
        repeat_aspect = RepeatVector(self.max_len)(
            aspect_embed)  # repeat aspect for every word in sequence

        hidden_vecs = LSTM(self.config.lstm_units, return_sequences=True)(
            text_embed)  # hidden vectors output by lstm
        concat = concatenate([
            hidden_vecs, repeat_aspect
        ], axis=-1)  # mask after concatenate will be same as hidden_out's mask

        # apply attention mechanism
        attend_weight = Attention()(concat)
        attend_weight_expand = Lambda(lambda x: K.expand_dims(x))(
            attend_weight)
        attend_hidden = multiply([hidden_vecs, attend_weight_expand])
        attend_hidden = Lambda(lambda x: K.sum(x, axis=1))(attend_hidden)
        return Model([input_text, input_aspect], attend_hidden)

Exemple #6

0

Afficher le fichier

Fichier : models.py Projet : prasys/ABSA_Keras

    def memnet(self):
        n_hop = 9
        input_text = Input(shape=(self.max_len,))
        input_aspect = Input(shape=(1,))
        inputs = [input_text, input_aspect]

        # if self.use_elmo:
        #     elmo_embedding = ELMoEmbedding(output_mode=self.config.elmo_output_mode, idx2word=self.config.idx2token,
        #                                    mask_zero=True, hub_url=self.config.elmo_hub_url,
        #                                    elmo_trainable=self.config.elmo_trainable)
        #     if self.config.use_elmo_alone:
        #         text_embed = SpatialDropout1D(0.2)(elmo_embedding(input_text))
        #     else:
        #         word_embedding = Embedding(input_dim=self.text_embeddings.shape[0],
        #                                    output_dim=self.config.word_embed_dim,
        #                                    weights=[self.text_embeddings], trainable=self.config.word_embed_trainable,
        #                                    mask_zero=True)
        #         text_embed = SpatialDropout1D(0.2)(concatenate([word_embedding(input_text), elmo_embedding(input_text)]))
        # else:
        word_embedding = Embedding(input_dim=self.text_embeddings.shape[0], output_dim=self.config.word_embed_dim,
                                   weights=[self.text_embeddings], trainable=self.config.word_embed_trainable,
                                   mask_zero=True)
        text_embed = SpatialDropout1D(0.2)(word_embedding(input_text))

        if self.config.use_loc_input:   # location attention
            input_loc = Input(shape=(self.max_len,))
            inputs.append(input_loc)
            input_loc_expand = Lambda(lambda x: K.expand_dims(x))(input_loc)
            text_embed = multiply([text_embed, input_loc_expand])

        if self.config.aspect_embed_type == 'random':
            asp_embedding = Embedding(input_dim=self.n_aspect, output_dim=self.config.aspect_embed_dim)
        else:
            asp_embedding = Embedding(input_dim=self.aspect_embeddings.shape[0],
                                      output_dim=self.config.aspect_embed_dim,
                                      trainable=self.config.aspect_embed_trainable)
        aspect_embed = asp_embedding(input_aspect)
        aspect_embed = Flatten()(aspect_embed)  # reshape to 2d

        # the parameter of attention and linear layers are shared in different hops
        attention_layer = Attention(use_W=False, use_bias=True)
        linear_layer = Dense(self.config.word_embed_dim)
        # output from each computation layer, representing text in different level of abstraction
        computation_layers_out = [aspect_embed]

        for h in range(n_hop):
            # content attention layer
            repeat_out = RepeatVector(self.max_len)(computation_layers_out[-1])
            concat = concatenate([text_embed, repeat_out], axis=-1)
            attend_weight = attention_layer(concat)
            attend_weight_expand = Lambda(lambda x: K.expand_dims(x))(attend_weight)
            content_attend = multiply([text_embed, attend_weight_expand])
            content_attend = Lambda(lambda x: K.sum(x, axis=1))(content_attend)

            # linear layer
            out_linear = linear_layer(computation_layers_out[-1])
            computation_layers_out.append(add([content_attend, out_linear]))
        return Model(inputs, computation_layers_out[-1])

Exemple #7

0

Afficher le fichier

def create_model(args, maxlen, vocab):

    def ortho_reg(weight_matrix):
        # orthogonal regularization for aspect embedding matrix
        w_n = weight_matrix / K.cast(K.epsilon() + K.sqrt(K.sum(K.square(weight_matrix), axis=-1, keepdims=True)),
                                     K.floatx())
        reg = K.sum(K.square(K.dot(w_n, K.transpose(w_n)) - K.eye(w_n.shape[0].value)))

        return args.ortho_reg * reg

    vocab_size = len(vocab)

    # Inputs
    sentence_input = Input(shape=(maxlen,), dtype='int32', name='sentence_input')
    neg_input = Input(shape=(args.neg_size, maxlen), dtype='int32', name='neg_input')

    # Construct word embedding layer
    word_emb = Embedding(vocab_size, args.emb_dim, mask_zero=True, name='word_emb')

    # Compute sentence representation
    e_w = word_emb(sentence_input)
    y_s = Average()(e_w)
    att_weights = Attention(name='att_weights')([e_w, y_s])
    z_s = WeightedSum()([e_w, att_weights])

    # Compute representations of negative instances
    e_neg = word_emb(neg_input)
    z_n = Average()(e_neg)

    # Reconstruction
    p_t = Dense(args.aspect_size)(z_s)
    p_t = Activation('softmax', name='p_t')(p_t)
    r_s = WeightedAspectEmb(args.aspect_size, args.emb_dim, name='aspect_emb',
                            W_regularizer=ortho_reg)(p_t)

    # Loss
    loss = MaxMargin(name='max_margin')([z_s, z_n, r_s])
    model = Model(inputs=[sentence_input, neg_input], outputs=loss)

    # Word embedding and aspect embedding initialization
    if args.emb_path:
        emb_reader = EmbReader(args.emb_path, emb_dim=args.emb_dim)
        logger.info('Initializing word embedding matrix')
        K.set_value(
            model.get_layer('word_emb').embeddings,
            emb_reader.get_emb_matrix_given_vocab(vocab, K.get_value(model.get_layer('word_emb').embeddings)))
        logger.info('Initializing aspect embedding matrix as centroid of kmean clusters')
        K.set_value(
            model.get_layer('aspect_emb').W,
            emb_reader.get_aspect_matrix(args.aspect_size))

    return model

Exemple #8

0

Afficher le fichier

def patch_discriminator(shape, gpus=2):
    init = RandomNormal(stddev=0.02)
    in_image = Input(shape=shape)
    d = Conv2D(64, (4, 4),
               strides=(2, 2),
               padding='same',
               kernel_initializer=init)(in_image)
    d = LeakyReLU(alpha=0.2)(d)
    d = Conv2D(128, (4, 4),
               strides=(2, 2),
               padding='same',
               kernel_initializer=init)(d)
    d = LeakyReLU(alpha=0.2)(d)
    d = Conv2D(256, (4, 4),
               strides=(2, 2),
               padding='same',
               kernel_initializer=init)(d)
    d = LeakyReLU(alpha=0.2)(d)
    d = Attention(256)(d)
    d = Conv2D(512, (4, 4),
               strides=(2, 2),
               padding='same',
               kernel_initializer=init)(d)
    d = LeakyReLU(alpha=0.2)(d)
    d = Conv2D(512, (4, 4), padding='same', kernel_initializer=init)(d)
    x = LeakyReLU(alpha=0.2)(d)
    x = Attention(512)(d)
    output = Conv2D(1, (4, 4),
                    padding='same',
                    activation='sigmoid',
                    kernel_initializer=init)(d)
    with tf.device('/cpu:0'):
        model = Model(in_image, output)
    model = multi_gpu_model(model, gpus=2)

    return model

Exemple #9

0

Afficher le fichier

Fichier : models.py Projet : jes-moore/quora_preproc

def model_lstm_atten(embedding_matrix, maxlen, max_features, embed_size):
    inp = Input(shape=(maxlen, ))
    x = Embedding(max_features,
                  embed_size,
                  weights=[embedding_matrix],
                  trainable=False)(inp)
    x = Bidirectional(CuDNNLSTM(256, return_sequences=True))(x)
    x = Bidirectional(CuDNNLSTM(128, return_sequences=True))(x)
    x = Attention(maxlen)(x)
    x = Dense(128, activation="relu")(x)
    x = Dense(1, activation="sigmoid")(x)
    model = Model(inputs=inp, outputs=x)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    return model

Exemple #10

0

Afficher le fichier

Fichier : models.py Projet : prasys/ABSA_Keras

    def atae_lstm(self):
        input_text = Input(shape=(self.max_len,))
        input_aspect = Input(shape=(1,), )

        if self.use_elmo:
            elmo_embedding = ELMoEmbedding(output_mode=self.config.elmo_output_mode, idx2word=self.config.idx2token,
                                           mask_zero=True, hub_url=self.config.elmo_hub_url,
                                           elmo_trainable=self.config.elmo_trainable)
            if self.config.use_elmo_alone:
                text_embed = SpatialDropout1D(0.2)(elmo_embedding(input_text))
            else:
                word_embedding = Embedding(input_dim=self.text_embeddings.shape[0],
                                           output_dim=self.config.word_embed_dim,
                                           weights=[self.text_embeddings], trainable=self.config.word_embed_trainable,
                                           mask_zero=True)
                text_embed = SpatialDropout1D(0.2)(concatenate([word_embedding(input_text), elmo_embedding(input_text)]))
        else:
            word_embedding = Embedding(input_dim=self.text_embeddings.shape[0], output_dim=self.config.word_embed_dim,
                                       weights=[self.text_embeddings], trainable=self.config.word_embed_trainable,
                                       mask_zero=True)
            text_embed = SpatialDropout1D(0.2)(word_embedding(input_text))

        if self.config.aspect_embed_type == 'random':
            asp_embedding = Embedding(input_dim=self.n_aspect, output_dim=self.config.aspect_embed_dim)
        else:
            asp_embedding = Embedding(input_dim=self.aspect_embeddings.shape[0],
                                      output_dim=self.config.aspect_embed_dim,
                                      trainable=self.config.aspect_embed_trainable)
        aspect_embed = asp_embedding(input_aspect)
        aspect_embed = Flatten()(aspect_embed)  # reshape to 2d
        repeat_aspect = RepeatVector(self.max_len)(aspect_embed)  # repeat aspect for every word in sequence

        input_concat = concatenate([text_embed, repeat_aspect], axis=-1)
        hidden_vecs, state_h, _ = LSTM(self.config.lstm_units, return_sequences=True, return_state=True)(input_concat)
        concat = concatenate([hidden_vecs, repeat_aspect], axis=-1)

        # apply attention mechanism
        attend_weight = Attention()(concat)
        attend_weight_expand = Lambda(lambda x: K.expand_dims(x))(attend_weight)
        attend_hidden = multiply([hidden_vecs, attend_weight_expand])
        attend_hidden = Lambda(lambda x: K.sum(x, axis=1))(attend_hidden)

        attend_hidden_dense = Dense(self.config.lstm_units)(attend_hidden)
        last_hidden_dense = Dense(self.config.lstm_units)(state_h)
        final_output = Activation('tanh')(add([attend_hidden_dense, last_hidden_dense]))

        return Model([input_text, input_aspect], final_output)

Exemple #11

0

Afficher le fichier

Fichier : absa_model.py Projet : dutyhong/ABSA_Keras

    def atae_lstm_new(self):
        input_content = Input(shape=(self.max_len, ))
        input_aspect = Input(shape=(self.aspect_max_len, ))
        ###先将每个字进行embed，然后将aspect进行embed，然后根据content中字的个数重复，然后重复后的每个aspect embedding跟每个字的进行串联
        content_embed = Embedding(input_dim=self.max_content_vocab_size,
                                  output_dim=self.content_embed_dim)
        aspect_embed = Embedding(input_dim=self.max_content_vocab_size,
                                 output_dim=self.aspect_embed_dim)
        content_embedding = content_embed(input_content)
        content_embedding = SpatialDropout1D(0.2)(content_embedding)
        aspect_embedding = aspect_embed(input_aspect)
        ##对aspect的字符串向量进行一个pooling 60*128=>1*128
        aspect_embedding = AveragePooling1D(
            pool_size=self.aspect_max_len)(aspect_embedding)
        aspect_flatten = Flatten()(aspect_embedding)
        repeat_aspect_embedding = RepeatVector(self.max_len)(aspect_flatten)
        ##将重复后的aspect和content字进行串联
        input_concat = concatenate(
            [content_embedding, repeat_aspect_embedding], axis=-1)
        ##再加个LSTM
        if (self.is_cudnn):
            hidden_vecs, state_h, _ = CuDNNLSTM(
                self.lstm_units, return_sequences=True,
                return_state=True)(input_concat)
        else:
            hidden_vecs, state_h, _ = LSTM(self.lstm_units,
                                           return_sequences=True,
                                           return_state=True)(input_concat)
        concat = concatenate([hidden_vecs, repeat_aspect_embedding], axis=-1)

        # apply attention mechanism
        attend_weight = Attention()(concat)
        attend_weight_expand = Lambda(lambda x: K.expand_dims(x))(
            attend_weight)
        attend_hidden = multiply([hidden_vecs, attend_weight_expand])
        attend_hidden = Lambda(lambda x: K.sum(x, axis=1))(attend_hidden)

        attend_hidden_dense = Dense(self.lstm_units)(attend_hidden)
        last_hidden_dense = Dense(self.lstm_units)(state_h)
        final_output = Activation('tanh')(add(
            [attend_hidden_dense, last_hidden_dense]))
        dense_layer = Dense(self.dense_units, activation='relu')(final_output)
        output_layer = Dense(self.n_classes, activation='softmax')(dense_layer)
        return Model([input_content, input_aspect], output_layer)

Exemple #12

0

Afficher le fichier

Fichier : quotation_classification_test.py Projet : vivs0/valence-arousal

def build_model(args, embeddings, emb_dim, vocab_size, max_len, words):

	if(args.wordratings):
		dense_layer = get_word_classification(args.wordratings)
	else:
		dense_layer = Dense(120, activation="relu")

	# Embedding layer
	#embeddings_matrix = np.zeros((vocab_size+1, emb_dim))
	embeddings_matrix = np.random.rand(vocab_size + 1, emb_dim)
	embeddings_matrix[0] *= 0

	for index, word in enumerate(words, start=1):
		try:
			embedding_vector = embeddings[word]
			embeddings_matrix[index] = embedding_vector
		except:
			print("Not found embedding for: <{0}>".format(word))

	input_layer = Input(shape=(max_len,))

	embedding_layer = Embedding(embeddings_matrix.shape[0], 
								embeddings_matrix.shape[1], 
								weights = [embeddings_matrix],
								mask_zero=True,
								trainable=False)(input_layer)

	layer1 = TimeDistributed(dense_layer)(embedding_layer)

	# Recurrent layer. Either LSTM or GRU
	if(args.rnn == "LSTM"):
		rnn_layer = LSTM(units=64, return_sequences=(args.attention or args.maxpooling))
	else:
		rnn_layer = GRU(units=64, return_sequences=(args.attention or args.maxpooling))

	rnn = Bidirectional(rnn_layer)(layer1)

	# Max Pooling and attention
	if(args.maxpooling and args.attention):
		max_pooling = GlobalMaxPooling1DMasked()(rnn)
		con = TimeDistributed(Dense(100))(rnn)
		attention = Attention()(con)
		connection = concatenate([max_pooling, attention])
	elif(args.maxpooling):
		max_pooling = GlobalMaxPooling1DMasked()
		connection = max_pooling(rnn)
	elif(args.attention):
		con = TimeDistributed(Dense(100))(rnn)
		attention = Attention()
		connection = attention(con)
	else:
		connection = rnn

	connection = Dropout(0.2)(connection)

	valence_output = Dense(1, activation="sigmoid", name="valence_output")(connection)
	arousal_output = Dense(1, activation="sigmoid", name="arousal_output")(connection)

	# Build Model
	model = Model(inputs=[input_layer], outputs=[valence_output, arousal_output])
	return model

Exemple #13

0

Afficher le fichier

class AT_LSTM(nn.Module):

    #define all the layers used in model
    def __init__(self,
                 embedding_dim,
                 aspect_embedding_dim,
                 hidden_dim,
                 output_dim,
                 n_layers,
                 embed_weights,
                 at=True,
                 ae=False,
                 dropout=0):

        #Constructor
        super().__init__()
        # ATAE ?
        self.ae = ae
        self.at = at
        self.embedding_dim = embedding_dim
        # Embedding layer using GloVe or fasttext
        self.embedding = custom_word_embedding(embed_weights)

        # Embedding layer using Glove for aspects
        self.aspects_embedding = custom_word_embedding(embed_weights)

        # Embedding layer without GloVe
        # self.embedding = nn.Embedding(emb_mat.shape[0], emb_mat.shape[1])

        # LSTM layer and initialization
        if self.ae:
            self.lstm = nn.LSTM(embedding_dim * 2,
                                hidden_dim,
                                num_layers=n_layers,
                                bidirectional=False,
                                dropout=dropout,
                                batch_first=True)
        else:
            self.lstm = nn.LSTM(embedding_dim,
                                hidden_dim,
                                num_layers=n_layers,
                                bidirectional=False,
                                dropout=dropout,
                                batch_first=True)

        for name, param in self.lstm.named_parameters():
            if 'bias' in name:
                nn.init.constant_(param, 0.0)
            elif 'weight' in name:
                nn.init.xavier_normal_(param)

        # Attention layer with initialization
        if self.at:
            self.attention = Attention(aspect_embedding_dim, hidden_dim)
            self.attention.xavier_init()

        # Final dense layer with initialization
        self.fc = nn.Linear(embedding_dim, output_dim)
        nn.init.xavier_normal_(self.fc.weight)

        #activation function
        #self.act = nn.Sigmoid()
        self.act = nn.Softmax(dim=1)

    def forward(self, inp, text_lengths=None):

        text = inp[0].view(inp[0].size()[1], -1)  # Remove the useless 1st axis
        #text = [batch_size, sent_length]
        categories = inp[1].view(
            inp[1].size()[1]).long()  #categories = [batch_size]

        embedded = self.embedding(text.long())

        # ATAE
        if self.ae:
            embedded_input_aspect = self.aspects_embedding(categories)
            embedded_input_aspect = embedded_input_aspect.view(
                embedded_input_aspect.size()[0], 1, self.embedding_dim)
            embedded_input_aspect = embedded_input_aspect.repeat(
                1,
                embedded.size()[1], 1)
            embedded = torch.cat((embedded, embedded_input_aspect), -1)

        #packed sequence
        #packed_embedded = nn.utils.rnn.pack_padded_sequence(embedded, text_lengths,batch_first=True)
        #si = embedded.size()
        #embedded = embedded.view(si[1],si[2],si[3])
        embedded = embedded.float().cuda()

        packed_output, (hidden, cell) = self.lstm(embedded)
        #packed_output = [batch_size, sent_length, hid_dim]
        #hidden = [batch size, num layers * num directions,hid dim]
        #cell = [batch size, num layers * num directions,hid dim]
        embedded_aspects = self.aspects_embedding(categories)
        embedded_aspects = embedded_aspects.float().cuda()
        #embedded_aspects = [batch_size, aspect_embedding_dim]

        if self.at:
            final_hidden = self.attention(embedded, embedded_aspects,
                                          packed_output)
        else:
            final_hidden = hidden
        #hidden = [batch size, hid dim * num directions]
        dense_outputs = self.fc(final_hidden)

        #Final activation function
        outputs = self.act(dense_outputs)

        return outputs