예제 #1
0
def build_baseline(dataset):
    opt = config.parse_opt()
    w_emb = WordEmbedding(dataset.dictionary.ntokens(), 300, opt.EMB_DROPOUT)
    q_emb = QuestionEmbedding(300, opt.NUM_HIDDEN, opt.NUM_LAYER, opt.BIDIRECT,
                              opt.L_RNN_DROPOUT)
    v_emb = VideoEmbedding(opt.C3D_SIZE + opt.RES_SIZE, opt.NUM_HIDDEN,
                           opt.NUM_LAYER, opt.BIDIRECT, opt.L_RNN_DROPOUT)
    v_att = Attention(opt.NUM_HIDDEN, opt.MID_DIM, opt.FC_DROPOUT)
    classifier = SimpleClassifier(opt.NUM_HIDDEN, opt.MID_DIM, 1,
                                  opt.FC_DROPOUT)
    return BaseModel(w_emb, q_emb, v_att, classifier, v_emb)
    def __init__(self, vocab_size, embed_hidden=300, mlp_hidden=512):
        super(TopDown, self).__init__()

        self.vocab_size = vocab_size

        self.v_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden)
        self.q_net = FCNet([mlp_hidden, mlp_hidden])
        self.v_net = FCNet([mlp_hidden, mlp_hidden])
        self.classifier = SimpleClassifier(mlp_hidden, 2 * mlp_hidden,
                                           self.vocab_size, 0.5)
        self.mlp_hidden = mlp_hidden
    def __init__(self, embed_hidden=300, mlp_hidden=512):
        super(TopDown, self).__init__()

        self.q_emb = nn.LSTM(embed_hidden,
                             mlp_hidden,
                             batch_first=True,
                             bidirectional=True)
        self.q_prep = FCNet([mlp_hidden, mlp_hidden])
        self.lstm_proj = nn.Linear(mlp_hidden * 2, mlp_hidden)
        self.verb_transform = nn.Linear(embed_hidden, mlp_hidden)
        self.v_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden)
예제 #4
0
파일: decoder.py 프로젝트: ptripathi/NLP
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
        super(Decoder, self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru1 = gru(self.dec_units)
        self.gru2 = gru(self.dec_units)
        self.fc = tf.keras.layers.Dense(vocab_size)

        # used for attention
        self.attention = Attention(self.dec_units)
예제 #5
0
def built_attention_model():
    S_inputs = Input(shape=(None,), dtype='int32')
    embeddings = Embedding(cfg.max_word, 128)(S_inputs)
    embeddings = Position_Embedding()(embeddings)  # 增加Position_Embedding能轻微提高准确率
    O_seq = Attention(8, 16)([embeddings, embeddings, embeddings])
    O_seq = GlobalAveragePooling1D()(O_seq)
    O_seq = Dropout(0.2)(O_seq)
    outputs = Dense(cfg.num_classes, activation='sigmoid')(O_seq)
    model = Model(inputs=S_inputs, outputs=outputs)
    print(model.summary())
    return model
예제 #6
0
def build_baseline0(dataset, num_hid):
    w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    c_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    v_att = Attention(dataset.v_dim, q_emb.num_hid, num_hid)
    q_net = FCNet([num_hid, num_hid])
    v_net = FCNet([dataset.v_dim, num_hid])
    c_net = FCNet([num_hid, num_hid])
    classifier = SimpleClassifier(2 * num_hid, 2 * num_hid,
                                  dataset.num_ans_candidates, 0.5)
    return BaseModel(w_emb, q_emb, c_emb, v_att, q_net, v_net, c_net,
                     classifier)
예제 #7
0
def build_baseline1(dataset, num_hid):
    w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0)
    w_emb2 = WordEmbedding(dataset.dictionary.ntoken, num_hid, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    v_att = Attention(dataset.v_dim, q_emb.num_hid, num_hid)
    q_net = FCNet([num_hid, num_hid])
    v_net = FCNet([dataset.v_dim, num_hid])
    lstm = nn.LSTM(num_hid, num_hid, 1, batch_first=True)
    classifier = SimpleClassifier(num_hid, 2 * num_hid,
                                  dataset.num_ans_candidates, 0.5)
    return BaseModel(w_emb, q_emb, v_att, q_net, v_net, classifier, lstm,
                     w_emb2)
예제 #8
0
 def __init__(self, embedding_matrix):
     super(RAM, self).__init__()
     self.embed = nn.Embedding.from_pretrained(
         torch.tensor(embedding_matrix, dtype=torch.float))
     self.bi_lstm = nn.LSTM(embed_dim,
                            hidden_dim,
                            lstm_layers,
                            batch_first=True,
                            bidirectional=True)
     self.attention = Attention(hidden_dim * 2, score_function='mlp')
     self.gru_cell = nn.GRUCell(hidden_dim * 2, hidden_dim * 2)
     self.dense = nn.Linear(hidden_dim * 2, polarities_dim)
예제 #9
0
    def build_model(self):
        input_ = Input((self.maxlen, ))
        emb = Embedding(input_dim=self.max_features,
                        output_dim=self.emb_dim,
                        input_length=self.maxlen)(input_)
        enc = Bidirectional(LSTM(128, activation='tanh',
                                 return_sequences=True))(emb)
        enc = Attention(self.maxlen)(enc)

        output = Dense(self.class_num, activation=self.last_activation)(enc)
        model = Model(input_, output)
        return model
예제 #10
0
파일: model.py 프로젝트: shamnastv/RTER
 def __init__(self, input_dim, hidden_dim, num_layers, dropout, device):
     super(UtteranceGRU, self).__init__()
     self.device = device
     self.gru = nn.GRU(input_size=input_dim,
                       hidden_size=hidden_dim,
                       bidirectional=True,
                       num_layers=num_layers,
                       batch_first=True)
     self.linear1 = nn.Linear(hidden_dim * 2, hidden_dim)
     self.linear2 = nn.Linear(hidden_dim * 2, hidden_dim)
     self.dropout = nn.Dropout(dropout)
     self.attention = Attention(hidden_dim * 2)
예제 #11
0
    def __init__(self, units, vocab_size, embedding_dim):
        super().__init__()

        self.embedding_layer = tf.keras.layers.Embedding(
            vocab_size, embedding_dim)

        self.attention_layer = Attention(units)
        self.gru_layer = tf.keras.layers.GRU(
            units,
            return_sequences=True,
            return_state=True,
            recurrent_initializer="glorot_uniform")
        self.prediction_dense = tf.keras.layers.Dense(vocab_size)
예제 #12
0
 def __init__(self, config, pretrained_embedding=None):
     super(NewsEncoder, self).__init__()
     self.config = config
     self.multi_head_self_attention = SelfAttention(config.nb_head,
                                                    config.embedding_dim)
     self.attention = Attention(config.attention_dim, config.embedding_dim)
     if pretrained_embedding is None:
         self.word_embedding = nn.Embedding(config.word_num,
                                            config.embedding_dim,
                                            padding_idx=0)
     else:
         self.word_embedding = nn.Embedding.from_pretrained(
             pretrained_embedding, freeze=False)
예제 #13
0
 def __init__(self, model, params, vocabulary, attention_key_size):
     self.vocabulary = vocabulary
     self.attention_module = Attention(model, params.decoder_state_size,
                                       attention_key_size,
                                       attention_key_size)
     self.state_transform_weights = du.add_params(
         model, (params.decoder_state_size + attention_key_size,
                 params.decoder_state_size), "weights-state-transform")
     self.vocabulary_weights = du.add_params(
         model, (params.decoder_state_size, len(vocabulary)),
         "weights-vocabulary")
     self.vocabulary_biases = du.add_params(model, tuple([len(vocabulary)]),
                                            "biases-vocabulary")
예제 #14
0
    def get_model(self):
        input = Input((self.maxlen, ))

        embedding = Embedding(self.max_features,
                              self.embedding_dims,
                              input_length=self.maxlen)(input)
        x = Bidirectional(CuDNNLSTM(128, return_sequences=True))(
            embedding)  # LSTM or GRU
        x = Attention(self.maxlen)(x)

        output = Dense(self.class_num, activation=self.last_activation)(x)
        model = Model(inputs=input, outputs=output)
        return model
예제 #15
0
    def get_model(self):
        # Word part
        input_word = Input(shape=(self.maxlen_word, ))
        x_word = Embedding(self.max_features,
                           self.embedding_dims,
                           input_length=self.maxlen_word)(input_word)
        x_word = Bidirectional(CuDNNLSTM(128, return_sequences=True))(
            x_word)  # LSTM or GRU
        x_word = Attention(self.maxlen_word)(x_word)
        model_word = Model(input_word, x_word)

        # Sentence part
        input = Input(shape=(self.maxlen_sentence, self.maxlen_word))
        x_sentence = TimeDistributed(model_word)(input)
        x_sentence = Bidirectional(CuDNNLSTM(128, return_sequences=True))(
            x_sentence)  # LSTM or GRU
        x_sentence = Attention(self.maxlen_sentence)(x_sentence)

        output = Dense(self.class_num,
                       activation=self.last_activation)(x_sentence)
        model = Model(inputs=input, outputs=output)
        return model
예제 #16
0
    def __init__(self,
                 vocab_size,
                 embed_hidden=300,
                 mlp_hidden=512):
        super(TopDown, self).__init__()

        self.vocab_size = vocab_size



        self.v_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden)
        self.q_net = FCNet([mlp_hidden, mlp_hidden])
        self.v_net = FCNet([mlp_hidden, mlp_hidden])
예제 #17
0
def embedding_RNN_1_lstm_attention(input_shape):
    device = device_lib.list_local_devices()[0].device_type

    input = Input(batch_shape=input_shape)

    if device == 'CPU':
        x = Bidirectional(LSTM(units=32, return_sequences=True))(input)
    else:
        x = Bidirectional(CuDNNLSTM(units=32, return_sequences=True))(input)

    x, attention = Attention(return_attention=True)(x)

    return x, input, attention
예제 #18
0
    def __init__(self, config, pretrained_embedding=None):
        super(NewsEncoder, self).__init__()
        self.config = config
        if pretrained_embedding is None:
            self.word_embedding = nn.Embedding(config.word_num,
                                               config.embedding_dim,
                                               padding_idx=0)
        else:
            self.word_embedding = nn.Embedding.from_pretrained(
                pretrained_embedding, freeze=False)
        self.category_embedding = nn.Embedding(config.category_num,
                                               config.category_embedding_dim,
                                               padding_idx=0)
        self.subcategory_embedding = nn.Embedding(
            config.subcategory_num,
            config.category_embedding_dim,
            padding_idx=0)

        self.title_cnn = nn.Conv2d(1,
                                   config.num_filters,
                                   (config.window_size, config.embedding_dim),
                                   padding=(1, 0))
        self.abstract_cnn = nn.Conv2d(
            1,
            config.num_filters, (config.window_size, config.embedding_dim),
            padding=(1, 0))
        self.title_attention = Attention(config.attention_dim,
                                         config.num_filters)
        self.abstract_attention = Attention(config.attention_dim,
                                            config.num_filters)

        self.category_dense = nn.Linear(config.category_embedding_dim,
                                        config.num_filters)
        self.subcategory_dense = nn.Linear(config.category_embedding_dim,
                                           config.num_filters)

        self.view_attention = Attention(config.attention_dim,
                                        config.num_filters)
예제 #19
0
파일: model.py 프로젝트: Xenia-W/RNN
 def __init__(self):
     super(Model, self).__init__()
     self.embed = nn.Embedding(config.vocab_size, config.embed_dim).cuda()
     self.gru = nn.GRU(300, config.hidden_dim).cuda()
     self.bi_gru = nn.GRU(300, config.hidden_dim, bidirectional=True).cuda()
     self.output_linear = nn.Linear(
         1 * config.batch_size * config.hidden_dim, 1).cuda()
     self.linear = nn.Linear(
         config.hidden_dim * 2,
         config.hidden_dim).cuda()  # output: (L, B, 2*H) -> (L, B, H)
     self.hidden_linear = nn.Linear(
         1 * config.batch_size * config.hidden_dim,
         1 * config.batch_size * config.hidden_dim).cuda()
     self.attention = Attention()
예제 #20
0
def build_caq_newatt(dataset, num_hid):
    w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0)
    q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0)
    v_att = Attention(dataset.v_dim, q_emb.num_hid, num_hid)
    q_net = FCNet([q_emb.num_hid, num_hid // 2])
    v_net = FCNet([dataset.v_dim, num_hid // 2])
    updated_query_composer = FCNet([num_hid + num_hid // 2, num_hid])
    neighbour_attention = MultiHeadedAttention(4, num_hid // 2, dropout=0.1)
    Dropout_C = nn.Dropout(0.1)

    classifier = SimpleClassifier(num_hid // 2, num_hid * 2,
                                  dataset.num_ans_candidates + 1, 0.5)
    return CAQModel(w_emb, q_emb, v_att, q_net, v_net, updated_query_composer,
                    neighbour_attention, Dropout_C, classifier, dataset)
예제 #21
0
    def __init__(self, h, d_model, dropout=0.1):
        super().__init__()
        assert d_model % h == 0

        # We assume d_v always equals d_k
        self.d_k = d_model // h
        self.h = h

        self.linear_layers = nn.ModuleList(
            [nn.Linear(d_model, d_model) for _ in range(3)])
        self.output_linear = nn.Linear(d_model, d_model)
        self.attention = Attention()

        self.dropout = nn.Dropout(p=dropout)
예제 #22
0
 def attentionModel(self):
     inputs = Input(shape=(self._sequence_long, self._features))
     encoded = LSTM(self._lstm_neurons,
                    return_sequences=True,
                    activation="tanh",
                    )(inputs)
     decoded = Attention()(encoded)
     decoded = RepeatVector(self._sequence_long)(decoded)
     decoded = LSTM(self._features, return_sequences=True)(decoded)
     autoencoder = Model(inputs=inputs, outputs=decoded)
     encoder = Model(inputs=inputs, outputs=encoded)
     autoencoder.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
     autoencoder.summary()
     return autoencoder, encoder
    def __init__(self, hidden_size, output_size, n_layers=3, dropout_p=0.1):
        super(AttentionDecoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        self.dropout_p = dropout_p

        self.embedding = nn.Embedding(output_size, hidden_size)
        self.gru = nn.GRU(hidden_size * 2,
                          hidden_size,
                          n_layers,
                          dropout=dropout_p)
        self.linear = nn.Linear(hidden_size * 2, output_size)
        self.attention = Attention(hidden_size)
예제 #24
0
def BidLstmMpAtn(inp, max_len, max_features, embed_size, embedding_matrix):
    x = Embedding(max_features,
                  embed_size,
                  weights=[embedding_matrix],
                  trainable=False)(inp)
    x = Bidirectional(CuDNNLSTM(50, return_sequences=True))(x)
    x = Dropout(0.25)(x)
    x = Bidirectional(CuDNNLSTM(50, return_sequences=True))(x)
    x = Dropout(0.25)(x)
    x1 = Attention(max_len)(x)
    x2 = GlobalMaxPooling1D()(x)
    x = concatenate([x1, x2])

    return x
예제 #25
0
    def __init__(self, vocab_size: int, hidden_dim: int, dropout_rate: float,
                 *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.vocab_size = vocab_size
        self.hidden_dim = hidden_dim

        attention_base_layer = Attention(depth=hidden_dim)
        ffn_base_layer = FeedForwardNetwork(hidden_dim=hidden_dim,
                                            dropout_rate=dropout_rate)

        self.attention = AddNormalizationWrapper(attention_base_layer,
                                                 dropout_rate)
        self.ffn = AddNormalizationWrapper(ffn_base_layer, dropout_rate)
        self.output_normalization = LayerNormalization()
예제 #26
0
    def get_model(self):
        input = Input((self.maxlen, ), name="input")

        embedding = Embedding(self.max_features,
                              self.embedding_dims,
                              input_length=self.maxlen,
                              weights=[self.embedding_matrix])(input)
        x = Bidirectional(LSTM(128, return_sequences=True))(
            embedding)  # LSTM or GRU
        x = Attention(self.maxlen, name="attention")(x)
        output = Dense(self.class_num, activation=self.last_activation)(x)
        model = Model(inputs=input, outputs=output)
        model.trainable = False
        return model
예제 #27
0
    def __init__(self,
                 vocab_size,
                 emb_dim,
                 n_hidden,
                 bidirectional,
                 n_layer,
                 dropout=0.0):
        super().__init__()
        # embedding weight parameter is shared between encoder, decoder,
        # and used as final projection layer to vocab logit
        # and can be initialized with pretrained word vectors
        self._embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self._enc_lstm = nn.LSTM(emb_dim,
                                 n_hidden,
                                 n_layer,
                                 bidirectional=bidirectional,
                                 dropout=dropout)

        state_layer = n_layer * (2 if bidirectional else 1)
        self._init_enc_h = nn.Parameter(torch.Tensor(state_layer, n_hidden))
        self._init_enc_c = nn.Parameter(torch.Tensor(state_layer, n_hidden))
        init.uniform_(self._init_enc_h, -INIT, INIT)
        init.uniform_(self._init_enc_c, -INIT, INIT)

        self._dec_lstm = MultiLayerLSTMCells(2 * emb_dim,
                                             n_hidden,
                                             n_layer,
                                             dropout=dropout)
        # project encoder final states to decoder initial states
        enc_out_dim = n_hidden * (2 if bidirectional else 1)
        self._dec_h = nn.Linear(enc_out_dim, n_hidden, bias=False)
        self._dec_c = nn.Linear(enc_out_dim, n_hidden, bias=False)
        # multiplicative attention
        self._attn_wm = nn.Parameter(torch.Tensor(enc_out_dim, n_hidden))
        self._attn_wq = nn.Parameter(torch.Tensor(n_hidden, n_hidden))
        init.xavier_normal_(self._attn_wm)
        init.xavier_normal_(self._attn_wq)

        # attention layer
        self._attention = Attention(n_hidden, n_hidden)

        # project decoder output to emb_dim, then
        # apply weight matrix from embedding layer
        self._projection = nn.Sequential(
            nn.Linear(2 * n_hidden, n_hidden), nn.Tanh(),
            nn.Linear(n_hidden, emb_dim, bias=False))
        # functional object for easier usage
        self._decoder = AttentionalDecoder(self._embedding, self._dec_lstm,
                                           self._attn_wq, self._attention,
                                           self._projection)
예제 #28
0
파일: CNN-a.py 프로젝트: xypan1232/CNN-Att
def set_cnn_model_attention(input_dim=4, input_length=2701):
    attention_reg_x = 0.25
    attention_reg_xr = 1
    attentionhidden_x = 16
    attentionhidden_xr = 8
    nbfilter = 16
    input = Input(shape=(input_length, input_dim))
    x = conv.Convolution1D(nbfilter, 10, border_mode="valid")(input)
    x = Dropout(0.5)(x)
    x = Activation('relu')(x)
    x = conv.MaxPooling1D(pool_length=3)(x)
    x_reshape = core.Reshape((x._keras_shape[2], x._keras_shape[1]))(x)

    x = Dropout(0.5)(x)
    x_reshape = Dropout(0.5)(x_reshape)

    decoder_x = Attention(hidden=attentionhidden_x,
                          activation='linear')  # success
    decoded_x = decoder_x(x)
    output_x = myFlatten(x._keras_shape[2])(decoded_x)

    decoder_xr = Attention(hidden=attentionhidden_xr, activation='linear')
    decoded_xr = decoder_xr(x_reshape)
    output_xr = myFlatten(x_reshape._keras_shape[2])(decoded_xr)

    output = merge([output_x, output_xr, Flatten()(x)], mode='concat')
    #output = BatchNormalization()(output)
    output = Dropout(0.5)(output)
    print output.shape
    output = Dense(nbfilter * 10, activation="relu")(output)
    output = Dropout(0.5)(output)
    out = Dense(2, activation='softmax')(output)
    #output = BatchNormalization()(output)
    model = Model(input, out)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    return model
예제 #29
0
def dual_bert():
    set_seed(33)

    opt = Adam(learning_rate=2e-5)

    id1 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    id2 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)

    mask1 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    mask2 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)

    atn1 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    atn2 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)

    config = BertConfig()
    config.output_hidden_states = False  # Set to True to obtain hidden states
    bert_model1 = TFBertModel.from_pretrained('bert-base-uncased',
                                              config=config)
    bert_model2 = TFBertModel.from_pretrained('bert-base-uncased',
                                              config=config)

    embedding1 = bert_model1(id1, attention_mask=mask1, token_type_ids=atn1)[0]
    embedding2 = bert_model2(id2, attention_mask=mask2, token_type_ids=atn2)[0]
    x = Concatenate()([embedding1, embedding2])
    x = keras.layers.Bidirectional(  # 加上这个就变成了双向lstm
        keras.layers.LSTM(  # 这个是单向lstm
            64,
            # 权重初始化
            kernel_initializer='he_normal',
            # 返回每个token的输出,如果设置为False 只出最后一个。
            return_sequences=True))(x)
    #x = Lambda(lambda x: x[:, 0], name='CLS-token')(x)#降维
    #x1 = GlobalAveragePooling1D()(embedding1)
    #x2 = GlobalAveragePooling1D()(embedding2)

    #x = Concatenate()([x1, x2])
    x = Attention(128)(x)  # 加入attention

    x = Dense(64, activation='relu')(x)
    x = Dropout(0.2)(x)
    #out = Dense(len(map_label), activation='softmax')(x)
    out = Dense(5, activation='softmax')(x)

    model = Model(inputs=[id1, mask1, atn1, id2, mask2, atn2], outputs=out)
    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])  #加个评测指标

    return model
    def __init__(self, encoder,
                 gpu_mode,
                 embed_hidden=300,
                 mlp_hidden=512):
        super(BaseModel, self).__init__()

        self.normalize = tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

        self.train_transform = tv.transforms.Compose([
            tv.transforms.RandomRotation(10),
            tv.transforms.RandomResizedCrop(224),
            tv.transforms.RandomHorizontalFlip(),
            tv.transforms.ToTensor(),
            self.normalize,
        ])

        self.dev_transform = tv.transforms.Compose([
            tv.transforms.Resize(224),
            tv.transforms.CenterCrop(224),
            tv.transforms.ToTensor(),
            self.normalize,
        ])

        self.encoder = encoder
        self.gpu_mode = gpu_mode
        self.n_roles = self.encoder.get_num_roles()
        self.n_verbs = self.encoder.get_num_verbs()
        self.vocab_size = self.encoder.get_num_labels()
        self.max_role_count = self.encoder.get_max_role_count()
        self.n_role_q_vocab = len(self.encoder.question_words)

        #self.conv = vgg16_modified()
        self.down = nn.Linear(mlp_hidden*2, mlp_hidden)
        self.verb_lookup = nn.Embedding(self.n_verbs, embed_hidden)
        self.w_emb = nn.Embedding(self.n_role_q_vocab + 1, embed_hidden, padding_idx=self.n_role_q_vocab)
        self.q_emb = nn.LSTM(embed_hidden, mlp_hidden,
                             batch_first=True, bidirectional=True)
        self.q_prep = FCNet([mlp_hidden, mlp_hidden])
        self.lstm_proj = nn.Linear(mlp_hidden * 2, mlp_hidden)
        self.verb_transform = nn.Linear(embed_hidden, mlp_hidden)
        self.v_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden)
        self.q_net = FCNet([mlp_hidden, mlp_hidden])
        self.v_net = FCNet([mlp_hidden, mlp_hidden])
        self.classifier = SimpleClassifier(
            mlp_hidden, 2 * mlp_hidden, self.vocab_size, 0.5)

        #self.conv_hidden = self.conv.base_size()
        self.mlp_hidden = mlp_hidden
        self.embed_hidden = embed_hidden