def __init__(self, args, Y, dicts, K=7, attn='bandanau'):
        super(AttDense, self).__init__()
        self.word_rep = WordRep(args, Y, dicts)
        self.att1 = Attention('', 100)
        filters = [100]
        dc = 200
        self.attn = attn
        if attn == 'bandanau':
            for i in range(2, K + 1):
                filters += [dc]
                print(filters, sum(filters[:-1]))
                self.add_module(
                    f"block{i - 2}",
                    DenseBlock(sum(filters[:-1]), filters[i - 1], 3))
                self.add_module(f"U{i - 2}", Attention('bmm', dc))
        else:
            for i in range(2, K + 1):
                filters += [dc]
                print(filters, sum(filters[:-1]))
                self.add_module(
                    f"block{i-2}",
                    DenseBlock(sum(filters[:-1]), filters[i - 1], 3))
                self.add_module(f"U{i-2}", nn.Linear(dc, Y))

        #self.att = Attn('bmm',200)

        #self.output_layer = OutputLayer(args, Y, dicts,dc)
        self.output_layer = nn.Linear(dc, Y)
        self.loss_function = nn.BCEWithLogitsLoss()
Exemple #2
0
    def __init__(self, output_size, embedding_size, hidden_size, key_size,
                 value_size, num_layers, max_len):
        super(DecoderRNN, self).__init__()
        self.value_size = value_size
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, embedding_size)
        self.max_len = max_len

        self.params_h0 = nn.ParameterList([
            nn.Parameter(torch.zeros(1, self.hidden_size)).float()
            for i in range(self.num_layers)
        ])
        self.params_c0 = nn.ParameterList([
            nn.Parameter(torch.zeros(1, self.hidden_size)).float()
            for i in range(self.num_layers)
        ])

        self.lstmCells = nn.ModuleList([
            nn.LSTMCell(input_size=value_size + embedding_size,
                        hidden_size=self.hidden_size),
            nn.LSTMCell(input_size=self.hidden_size,
                        hidden_size=self.hidden_size),
            nn.LSTMCell(input_size=self.hidden_size,
                        hidden_size=self.hidden_size)
        ])

        self.attention = Attention(self.hidden_size, key_size, value_size,
                                   output_size)
        self.attention.projection.weight = self.embedding.weight
Exemple #3
0
def test_attention_forward():
    with torch.no_grad():
        kq_dim = 2
        v_dim = 2
        hidden_dim = 16
        att = Attention(hidden_dim, key_and_query_dim=kq_dim, value_dim=v_dim)

        batch_size = 4
        q_seq_len = 2
        kv_seq_len = 7

        q_attention_input = torch.rand((batch_size, q_seq_len, hidden_dim))
        kv_attention_input = torch.rand((batch_size, kv_seq_len, hidden_dim))

        attention_output = att.forward(q_hidden_inputs=q_attention_input,
                                       k_hidden_inputs=kv_attention_input,
                                       v_hidden_inputs=kv_attention_input,
                                       mask=None)
        assert attention_output.size() == torch.Size(
            (batch_size, q_seq_len, v_dim))
        assert attention_output.sum().item() != 0

        mask = torch.ones((batch_size, q_seq_len, kv_seq_len))
        mask[:, :, -1] = 0

        attention_output = att.forward(q_hidden_inputs=q_attention_input,
                                       k_hidden_inputs=kv_attention_input,
                                       v_hidden_inputs=kv_attention_input,
                                       mask=mask,
                                       save_attention=True)
        assert attention_output.size() == torch.Size(
            (batch_size, q_seq_len, v_dim))
        assert att.attention[:, :, -1].sum().item() == 0
Exemple #4
0
 def get_predictions(self, frames, scope):
     frames = self._reshape_to_conv(frames)
     cnn = CNN()
     if self.operation == 'training':
         cnn_output = cnn.create_model(frames,
                                       cnn.conv_filters,
                                       keep_prob=self.keep_prob)
     else:
         cnn_output = cnn.create_model(frames,
                                       cnn.conv_filters,
                                       keep_prob=1.0)
     cnn_output = self._reshape_to_rnn(cnn_output)
     rnn = RNN()
     rnn_output = rnn.create_model(cnn_output, scope + '_rnn')
     if self.is_attention:
         attention = Attention(self.batch_size)
         attention_output = attention.create_model(rnn_output,
                                                   scope + '_attention')
         fc = FC(self.num_classes)
         outputs = fc.create_model(attention_output, scope + '_fc')
     else:
         rnn_output = rnn_output[:, -1, :]
         fc = FC(self.num_classes)
         outputs = fc.create_model(rnn_output, scope + '_fc')
     return outputs
Exemple #5
0
    def __init__(self, vocab_size, embedding_dim, dropout, device):
        super(AttentionLSTMModel, self).__init__()
        self.save_name = "AttentionLSTMModel.pt"
        self.device = device

        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.dropout = nn.Dropout(dropout)

        self.lstm = nn.LSTM(embedding_dim, 500, batch_first=True)
        self.attention = Attention(500)

        self.fc0 = nn.Linear(1000, 300)
        self.fc1 = nn.Linear(300, vocab_size)
    def __init__(self, args, data):
        super(Model, self).__init__()
        self.window = args.window
        self.variables = data.m
        self.hw = args.highway_window
        self.activate1 = F.relu
        self.hidR = args.hidRNN
        self.rnn1 = nn.LSTM(self.variables,
                            self.hidR,
                            num_layers=args.rnn_layers,
                            bidirectional=False)
        self.linear1 = nn.Linear(self.hidR, self.variables)
        # self.linear1=nn.Linear(1280,100)
        # self.out=nn.Linear(100,self.variables)
        if (self.hw > 0):
            self.highway = nn.Linear(self.hw, 1)
        print(self.hidR)
        print(self.window)
        #self.attention = Attention(hidden_emb=self.hidR, seq_len=self.window) # attention module
        self.attention = Attention(hidden_emb=self.hidR,
                                   seq_len=128)  # attention module

        self.dropout = nn.Dropout(p=args.dropout)
        self.output = None
        if (args.output_fun == 'sigmoid'):
            self.output = F.sigmoid
        if (args.output_fun == 'tanh'):
            self.output = F.tanh
Exemple #7
0
    def __init__(self, vocab_size, max_len, hidden_size, embedding_size,
            sos_id, eos_id, input_dropout_p=0, dropout_p=0,
            n_layers=1, bidirectional=False, rnn_cell='lstm',use_attention=True):
        super(DecoderRNN, self).__init__(vocab_size, max_len, hidden_size,
                input_dropout_p, dropout_p,
                n_layers, rnn_cell)

        self.bidirectional_encoder = bidirectional

        self.output_size = vocab_size
        self.max_length = max_len
        self.use_attention = use_attention
        self.eos_id = eos_id
        self.sos_id = sos_id

        self.init_input = None

        self.embedding = nn.Embedding(self.output_size, embedding_size)
        self.part_embedding = nn.Embedding(4900, 50)

        self.rnn = self.rnn_cell(embedding_size+50, hidden_size, n_layers, batch_first=True, dropout=dropout_p)
        if use_attention:
            self.attention = Attention(self.hidden_size)

        self.out = nn.Linear(self.hidden_size, self.output_size)
Exemple #8
0
    def __init__(
            self,
            input_feauters,
            rnn_units,
            # max_seq_len,
            pool_method,
            encoder_type,
            hidden_middle_val):
        super(DocumentEncoderRNN, self).__init__()

        # self.max_seq_len = max_seq_len
        self.emb_dim = input_feauters
        self.rnn_units = rnn_units
        self.pool_method = pool_method
        self.hidden_middle_val = hidden_middle_val * 2
        self.encoder_type = encoder_type

        self.encoder = encoder_type(self.emb_dim,
                                    self.rnn_units,
                                    bidirectional=True,
                                    batch_first=True)
        # self.hidden = self.init_hidden()

        if self.pool_method == 'attention':
            self.attention = Attention(self.rnn_units * 2)
        elif self.pool_method == 'relative_attention':
            self.attention = RelativeAttention(self.rnn_units * 2)
Exemple #9
0
    def __init__(self, opt):
        super().__init__()
        self.opt = opt
        with open(
                os.path.join(
                    'data',
                    f'debug{opt.debug}.{opt.dataset}.spacy.wv.{opt.embedding_dim}.pkl'
                ), 'rb') as f:
            emb_m = pickle.load(f)
        self.emb = nn.Embedding.from_pretrained(torch.Tensor(emb_m),
                                                freeze=False)
        self.embedding_drop = nn.Dropout(opt.embedding_drop)
        self.rnn = nn.LSTM(opt.embedding_dim,
                           opt.lstm_hidden_dim,
                           batch_first=True,
                           bidirectional=True)

        self.embedding_drop_2 = nn.Dropout(opt.embedding_drop)
        self.rnn_2 = nn.LSTM(opt.embedding_dim,
                             opt.lstm_hidden_dim,
                             batch_first=True,
                             bidirectional=True)
        self.att = Attention(2 * opt.lstm_hidden_dim, opt.embedding_drop)

        self.fc = nn.Linear(3 * 2 * opt.lstm_hidden_dim,
                            2 * opt.lstm_hidden_dim)
        self.relu = nn.ReLU()
        self.fc_drop = nn.Dropout(opt.fc_drop)
        self.classifier = nn.Linear(opt.lstm_hidden_dim * 2, opt.num_classes)
    def init_model(self, input_shape, num_classes, **kwargs):
        inputs = Input(shape=input_shape)
        # bnorm_1 = BatchNormalization(axis=-1)(inputs)
        x = Bidirectional(CuDNNLSTM(96, name='blstm1', return_sequences=True),
                          merge_mode='concat')(inputs)
        # activation_1 = Activation('tanh')(lstm_1)
        x = SpatialDropout1D(0.1)(x)
        x = Attention(8, 16)([x, x, x])
        x1 = GlobalMaxPool1D()(x)
        x2 = GlobalAvgPool1D()(x)
        x = Concatenate(axis=-1)([x1, x2])
        x = Dense(units=128, activation='elu')(x)
        x = Dense(units=64, activation='elu')(x)
        x = Dropout(rate=0.4)(x)
        outputs = Dense(units=num_classes, activation='softmax')(x)

        model = TFModel(inputs=inputs, outputs=outputs)
        optimizer = optimizers.Adam(
            # learning_rate=1e-3,
            lr=1e-3,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08,
            decay=0.0002,
            amsgrad=True)
        model.compile(optimizer=optimizer,
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        model.summary()
        self._model = model
        self.is_init = True
Exemple #11
0
    def __init__(self,
                 vocab_size,
                 embed_dim,
                 hidden_dim,
                 max_len,
                 trg_soi,
                 nlayers=2,
                 dropout_rate=0.2,
                 attention=False,
                 cuda=True):
        super(RNNHighwayDecoder, self).__init__()

        self.hidden_dim = hidden_dim
        self.max_len = max_len
        self.vocab_size = vocab_size
        self.trg_soi = trg_soi
        self.att = attention
        self.cuda = cuda
        self.trainable = True
        self.embed = nn.Embedding(vocab_size, embed_dim)
        self.attention = Attention(self.hidden_dim)
        # DecoderCell(embed_dim, hidden_dim)

        self.decodercell = RHNContextCell(embed_dim,
                                          h=hidden_dim,
                                          depth=nlayers,
                                          gateDrop=dropout_rate)
        self.dec2word = nn.Linear(hidden_dim, vocab_size)
Exemple #12
0
    def init_model(self, input_shape, num_classes, **kwargs):
        inputs = Input(shape=input_shape)
        # bnorm_1 = BatchNormalization(axis=2)(inputs)
        lstm_1 = Bidirectional(CuDNNLSTM(64,
                                         name='blstm_1',
                                         return_sequences=True),
                               merge_mode='concat')(inputs)
        activation_1 = Activation('tanh')(lstm_1)
        dropout1 = SpatialDropout1D(0.5)(activation_1)
        attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
        pool_1 = GlobalMaxPool1D()(attention_1)
        dropout2 = Dropout(rate=0.5)(pool_1)
        dense_1 = Dense(units=256, activation='relu')(dropout2)
        outputs = Dense(units=num_classes, activation='softmax')(dense_1)

        model = TFModel(inputs=inputs, outputs=outputs)
        optimizer = optimizers.Adam(
            # learning_rate=1e-3,
            lr=1e-3,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08,
            decay=0.0002,
            amsgrad=True)
        model.compile(optimizer=optimizer,
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
        model.summary()
        self._model = model
        self.is_init = True
Exemple #13
0
    def __init__(self, config, hidden_size=512, n_layers=8, bidirectional=False, attention=False):
        super(_LSTMModel, self).__init__()
        self.attention = attention

        # lstm layers
        self.lstm = LSTM(64, hidden_size, n_layers, dropout=config.lstm_dropout, bidirectional=bidirectional)

        n_layers *= 2 if bidirectional else 1
        hidden_size *= 2 if bidirectional else 1

        if attention:
            self.att_layer = Attention(hidden_size, (256, hidden_size), batch_first=True)

        self.avg_pooling = AdaptiveAvgPool2d((1, hidden_size))

        # fully connected output layers
        self.gender_out = Sequential(
            Dropout(config.fc_dropout),
            Linear(hidden_size, 3)
        )

        self.accent_out = Sequential(
            Dropout(config.fc_dropout),
            Linear(hidden_size, 16)
        )

        # initialise the network's weights
        self.init_weights()
    def __init__(self,
                 attention_dim,
                 embed_dim,
                 decoder_dim,
                 vocab_size,
                 encoder_dim=2048,
                 dropout=0.5):
        super(PureAttention, self).__init__()

        self.encoder_dim = encoder_dim
        self.attention_dim = attention_dim
        self.embed_dim = embed_dim
        self.decoder_dim = decoder_dim
        self.vocab_size = vocab_size
        self.dropout = dropout

        self.attention = Attention(encoder_dim, decoder_dim,
                                   attention_dim)  # attention network

        self.embedding = nn.Embedding(vocab_size, embed_dim)  # embedding layer
        self.dropout = nn.Dropout(p=self.dropout)
        self.decode_step = nn.LSTMCell(embed_dim + encoder_dim,
                                       decoder_dim,
                                       bias=True)  # decoding LSTMCell
        # linear layer to find initial hidden state of LSTMCell
        self.init_h = nn.Linear(encoder_dim, decoder_dim)
        # linear layer to find initial cell state of LSTMCell
        self.init_c = nn.Linear(encoder_dim, decoder_dim)
        # linear layer to create a sigmoid-activated gate
        self.f_beta = nn.Linear(decoder_dim, encoder_dim)
        self.sigmoid = nn.Sigmoid()
        # linear layer to find scores over vocabulary
        self.fc = nn.Linear(decoder_dim, vocab_size)
        self.init_weights(
        )  # initialize some layers with the uniform distribution
    def __init__(self,
                 embed_size,
                 vocab_size,
                 attention_dim,
                 encoder_dim,
                 decoder_dim,
                 drop_prob=0.3):
        super().__init__()

        self.vocab_size = vocab_size
        self.attention_dim = attention_dim
        self.decoder_dim = decoder_dim

        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.attention = Attention(encoder_dim, decoder_dim, attention_dim)

        self.init_h = nn.Linear(encoder_dim, decoder_dim)
        self.init_c = nn.Linear(encoder_dim, decoder_dim)
        self.lstm_cell = nn.LSTMCell(embed_size + encoder_dim,
                                     decoder_dim,
                                     bias=True)
        self.f_beta = nn.Linear(decoder_dim, encoder_dim)

        self.fcn = nn.Linear(decoder_dim, vocab_size)
        self.drop = nn.Dropout(drop_prob)
Exemple #16
0
 def multi_get_attention(self, frames):
     frames = self._reshape_to_conv(frames)
     cnn = CNN()
     cnn_output = cnn.create_model(frames, cnn.conv_filters)
     cnn_output = self._reshape_to_rnn(cnn_output)
     rnn = RNN()
     rnn_output = rnn.create_model(cnn_output)
     if self.is_attention:
         attention = Attention(self.batch_size)
         attention_output = attention.attention_analysis(rnn_output)
         return attention_output
     else:
         rnn_output = rnn_output[:, -1, :]
         fc = FC(self.num_classes)
         outputs = fc.create_model(rnn_output)
         return outputs
Exemple #17
0
 def init_model(self, input_shape, num_classes, **kwargs):
     inputs = Input(shape=input_shape)
     sequence_len = input_shape[0]
     lstm_units_array = np.array([32, 64, 128, 256, 512])
     lstm_units = lstm_units_array[np.argmin(
         np.abs(lstm_units_array - sequence_len))]
     lstm_1 = CuDNNLSTM(lstm_units, return_sequences=True)(inputs)
     activation_1 = Activation('tanh')(lstm_1)
     if num_classes >= 20:
         if num_classes < 30:
             dropout1 = SpatialDropout1D(0.5)(activation_1)
             attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
         else:
             attention_1 = Attention(
                 8, 16)([activation_1, activation_1, activation_1])
         k_num = 10
         kmaxpool_l = Lambda(lambda x: tf.reshape(tf.nn.top_k(
             tf.transpose(x, [0, 2, 1]), k=k_num, sorted=True)[0],
                                                  shape=[-1, k_num, 128]))(
                                                      attention_1)
         flatten = Flatten()(kmaxpool_l)
         dropout2 = Dropout(rate=0.5)(flatten)
     else:
         dropout1 = SpatialDropout1D(0.5)(activation_1)
         attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
         pool_l = GlobalMaxPool1D()(attention_1)
         dropout2 = Dropout(rate=0.5)(pool_l)
     dense_1 = Dense(units=256, activation='relu')(dropout2)
     #         dense_1 = Dense(units=256, activation='softplus',kernel_regularizer=regularizers.l2(0.01),
     #                        activity_regularizer=regularizers.l1(0.01))(dropout2)
     #dense_1 = DropConnect(Dense(units=256, activation='softplus'), prob=0.5)(dropout2)
     outputs = Dense(units=num_classes, activation='softmax')(dense_1)
     loss_fun = CategoricalCrossentropy(label_smoothing=0.2)
     model = TFModel(inputs=inputs, outputs=outputs)
     optimizer = optimizers.Nadam(lr=0.002,
                                  beta_1=0.9,
                                  beta_2=0.999,
                                  epsilon=None,
                                  schedule_decay=0.004)
     model.compile(
         optimizer=optimizer,
         loss=loss_fun,
         #loss="sparse_categorical_crossentropy",
         metrics=['accuracy'])
     model.summary()
     self._model = model
     self.is_init = True
Exemple #18
0
    def __init__(self, vocab_size, max_len, hidden_size, embedding_size,
                 sos_id, eos_id, input_dropout_p, dropout_p,
                 position_embedding, pos_embedding, n_layers, bidirectional,
                 rnn_cell, use_attention, attn_layers, hard_attn, pos_add,
                 use_memory, memory_dim):
        super(DecoderRNN,
              self).__init__(vocab_size, max_len, hidden_size, input_dropout_p,
                             dropout_p, n_layers, rnn_cell)

        self.bidirectional_encoder = bidirectional
        self.output_size = vocab_size
        self.attn_layers = attn_layers
        self.max_length = max_len
        self.use_attention = use_attention
        self.hard_attn = hard_attn
        self.eos_id = eos_id
        self.sos_id = sos_id
        self.s_rnn = rnn_cell
        self.init_input = None
        self.embedding_size = embedding_size
        self.embedding = nn.Embedding(self.output_size, embedding_size)
        self.pos_embedding = pos_embedding
        self.position_embedding = position_embedding
        self.pos_add = pos_add
        if pos_add == 'cat':
            rnn_input_size = embedding_size * 2
        else:
            rnn_input_size = embedding_size
        self.rnn = self.rnn_cell(rnn_input_size,
                                 hidden_size,
                                 n_layers,
                                 batch_first=True,
                                 dropout=dropout_p)
        if use_attention:
            if hard_attn:
                self.attention = Attention(self.hidden_size)
                self.hard_attention = HardAttention(self.hidden_size)
                self.out = nn.Linear(self.hidden_size * 2, self.output_size)
            else:
                self.attention1 = Attention(int(self.hidden_size /
                                                attn_layers))
                self.out = nn.Linear(self.hidden_size, self.output_size)
        else:
            self.out = nn.Linear(self.hidden_size, self.output_size)
        self.use_memory = use_memory
        if use_memory is not None:
            self.init_memory_augmented(max_len, memory_dim)
Exemple #19
0
 def __init__(self, input_size, output_size):
     super(GRU, self).__init__()
     self.encoder = nn.GRU(input_size,
                           output_size / 4,
                           num_layers=1,
                           batch_first=True,
                           dropout=0.1,
                           bidirectional=True)
     self.attention = Attention(output_size / 2)
     self.linear_filter = nn.Linear(output_size, output_size)
     self.sigmoid = nn.Sigmoid()
     self.post_attention = nn.GRU(output_size,
                                  output_size / 2,
                                  num_layers=1,
                                  batch_first=True,
                                  dropout=0.1,
                                  bidirectional=True)
     self.final_attention = Attention(output_size)
def load_model(model_name, model_config, embedding_matrix):
    if model_name == 'deep_cnn':
        model = cnn.DPCnn(model_config)
    elif model_name == 'cnn':
        model = cnn.Cnn(model_config)
    elif model_name == 'attention':
        model = Attention(model_config)
    elif model_name == 'rcnn':
        model = rcnn.RCnn(model_config)
    elif model_name == 'capsule':
        model = capsule.CapsuleRnn(model_config)
    elif model_name == 'hybrid':
        model = hybridnn.HybridNN(model_config)
    else:
        return None
    model.compile(embedding_matrix)
    logging.info('===={}模型加载完毕===='.format(model_name))
    return model
Exemple #21
0
    def __init__(self, filed = 80):
        super(IAN_LSTM, self).__init__()
        self.filed = filed
        self.cnn_l = CNN(filed=self.filed)
        self.rnn_l = nn.LSTM(
            input_size=55,
            hidden_size=64,
            num_layers=4,
            batch_first=True)

        self.attention_aspect = Attention(64, score_function='bi_linear')
        self.attention_context = Attention(64, score_function='bi_linear')


        self.linear = nn.Sequential(
                nn.Linear(128, 64),
                nn.Linear(64, 2),
        )
Exemple #22
0
    def get_multi_predictions(self, frames):
        frames = self._reshape_to_conv(frames)
        cnn = CNN()
        if self.operation == 'training':
            cnn_output = cnn.create_model(frames,
                                          cnn.conv_filters,
                                          keep_prob=self.keep_prob)
        else:
            cnn_output = cnn.create_model(frames,
                                          cnn.conv_filters,
                                          keep_prob=1.0)
        cnn_output = self._reshape_to_rnn(cnn_output)
        rnn = RNN()
        arousal_rnn_output = rnn.create_model(cnn_output, 'arousal_rnn')
        valence_rnn_output = rnn.create_model(cnn_output, 'valence_rnn')
        dominance_rnn_output = rnn.create_model(cnn_output, 'dominance_rnn')
        if self.is_attention:
            attention = Attention(self.batch_size)
            arousal_attention_output = attention.create_model(
                arousal_rnn_output, 'arousal_attention')
            valence_attention_output = attention.create_model(
                valence_rnn_output, 'valence_attention')
            dominance_attention_output = attention.create_model(
                dominance_rnn_output, 'dominance_attention')
            fc = FC(self.num_classes)
            arousal_fc_outputs = fc.create_model(arousal_attention_output,
                                                 'arousal_fc')
            valence_fc_outputs = fc.create_model(valence_attention_output,
                                                 'valence_fc')
            dominance_fc_outputs = fc.create_model(dominance_attention_output,
                                                   'dominance_fc')
        else:
            arousal_rnn_output = arousal_rnn_output[:, -1, :]
            valence_rnn_output = valence_rnn_output[:, -1, :]
            dominance_rnn_output = dominance_rnn_output[:, -1, :]
            fc = FC(self.num_classes)
            arousal_fc_outputs = fc.create_model(arousal_rnn_output,
                                                 'arousal_fc')
            valence_fc_outputs = fc.create_model(valence_rnn_output,
                                                 'valence_fc')
            dominance_fc_outputs = fc.create_model(dominance_rnn_output,
                                                   'dominance_fc')

        return arousal_fc_outputs, valence_fc_outputs, dominance_fc_outputs
Exemple #23
0
def transformer_encoder(emb_dim, MAX_NB_WORDS, MAX_SEQUENCE_LENGTH,
                        embedding_matrix, optimizer):
    context_input = Input(shape=(None, ), dtype='int32')
    response_input = Input(shape=(None, ), dtype='int32')
    #context_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    #response_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    embedding_layer = Embedding(
        output_dim=emb_dim,
        input_dim=MAX_NB_WORDS,
        input_length=MAX_SEQUENCE_LENGTH,
        weights=[embedding_matrix],
        #mask_zero=True,
        trainable=True)

    embedded_sequences_c = embedding_layer(context_input)
    embedded_dropout_c = Dropout(0.2)(embedded_sequences_c)
    embeddings_final_c = Position_Embedding()(
        embedded_dropout_c)  ## add positional embedding from self-attention
    embedded_sequences_r = embedding_layer(response_input)
    embedded_dropout_r = Dropout(0.2)(embedded_sequences_r)
    embeddings_final_r = Position_Embedding()(embedded_dropout_r)
    print("Now building encoder model with self attention...")

    c_seq = Attention(8, 16)([
        embeddings_final_c, embeddings_final_c, embeddings_final_c
    ])  ## the three embedding input is for K,V,Q needed for self-attention
    c_seq = GlobalAveragePooling1D()(c_seq)
    c_seq = Dropout(0.2)(c_seq)

    r_seq = Attention(8, 16)([
        embeddings_final_r, embeddings_final_r, embeddings_final_r
    ])  ## the three embedding input is for K,V,Q needed for self-attention
    r_seq = GlobalAveragePooling1D()(r_seq)
    r_seq = Dropout(0.2)(r_seq)

    concatenated = Multiply()([c_seq, r_seq])

    out = Dense((1), activation="sigmoid")(concatenated)

    model = Model([context_input, response_input], out)
    model.compile(loss='binary_crossentropy', optimizer=optimizer)
    # print(encoder.summary())
    print(model.summary())
    return model
Exemple #24
0
    def init_model(self,
                   input_shape,
                   num_classes,
                   **kwargs):
        inputs = Input(shape=input_shape)
        # bnorm_1 = BatchNormalization(axis=2)(inputs)
        sequence_len = input_shape[0]
        lstm_units_array = np.array([32, 64, 128, 256, 512])
        lstm_units = lstm_units_array[np.argmin(np.abs(lstm_units_array-sequence_len))]
        lstm_1 = Bidirectional(CuDNNLSTM(lstm_units, name='blstm_1',
                                         return_sequences=True),
                               merge_mode='concat')(inputs)
        activation_1 = Activation('tanh')(lstm_1)
        dropout1 = SpatialDropout1D(0.5)(activation_1)
        if lstm_units <=128:
            attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
        else:
            attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1])
        pool_1 = GlobalMaxPool1D()(attention_1)
        dropout2 = Dropout(rate=0.5)(pool_1)
        dense_1 = Dense(units=256, activation='relu')(dropout2)
#         dense_1 = Dense(units=256, activation='relu',kernel_regularizer=regularizers.l2(0.01),
#                        activity_regularizer=regularizers.l1(0.01))(dropout2)
        #dense_1 = DropConnect(Dense(units=256, activation='relu'), prob=0.5)(dropout2)
        outputs = Dense(units=num_classes, activation='softmax')(dense_1)

        model = TFModel(inputs=inputs, outputs=outputs)
        loss_fun = CategoricalCrossentropy(label_smoothing=0.2)
        optimizer = optimizers.Adam(
            # learning_rate=1e-3,
            lr=1e-3,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-08,
            decay=0.0002,
            amsgrad=True)
        model.compile(
            optimizer=optimizer,
            loss=loss_fun,
            #loss="sparse_categorical_crossentropy",
            metrics=['accuracy'])
        model.summary()
        self._model = model
        self.is_init = True
    def __init__(self, doc_len, text_len, vocab_size, embed_dim,
                 word_hidden_size, sent_hidden_size, title_vocab_size,
                 title_hidden_size, linear_out_size_2, linear_out_size_1,
                 num_classes, dropout):
        super(hanLSTM, self).__init__()

        self.doc_len = doc_len
        self.text_len = text_len
        self.word_hidden_size = word_hidden_size
        self.embed_size = embed_dim
        self.sent_hidden_size = sent_hidden_size
        self.title_hidden_size = title_hidden_size
        self.vocab_size = vocab_size
        self.title_vocab_size = title_vocab_size
        self.num_classes = num_classes
        self.linear_out_size_1 = linear_out_size_1
        self.linear_out_size_2 = linear_out_size_2
        self.embedding = nn.Embedding(self.vocab_size, self.embed_size)
        self.title_embedding = nn.Embedding(self.title_vocab_size,
                                            self.embed_size)
        self.sent_wise_lstms = nn.ModuleList()
        self.sent_wise_attlstms = nn.ModuleList()
        self.dropout = dropout
        for i in range(self.doc_len):
            self.sent_wise_lstms.append(
                nn.Sequential(
                    lstm_cell(self.embed_size, self.word_hidden_size),
                    nn.Dropout(p=self.dropout)))
            self.sent_wise_attlstms.append(Attention(self.word_hidden_size))
        self.doc_lstm = nn.Sequential(
            lstm_cell(self.word_hidden_size, self.sent_hidden_size),
            nn.Dropout(p=self.dropout))
        self.doc_attention = Attention(self.sent_hidden_size)
        self.title_lstm = nn.Sequential(
            lstm_cell(self.embed_size, self.title_hidden_size),
            nn.Dropout(p=self.dropout))
        self.title_attention = Attention(self.title_hidden_size)
        self.linear_stack = nn.Sequential(
            nn.Linear(self.sent_hidden_size + self.title_hidden_size,
                      self.linear_out_size_2), nn.ReLU(),
            nn.Dropout(p=self.dropout),
            nn.Linear(self.linear_out_size_2, self.linear_out_size_1),
            nn.ReLU(), nn.Linear(self.linear_out_size_1, self.num_classes))
    def build(self):
        input = Input(shape=(self.max_sequence_len, ))
        embedding_layer = self.embedding_layer(input)

        bi = Bidirectional(GRU(128, return_sequences=True))(embedding_layer)
        att = Attention()(bi)

        output = Dense(self.class_len, activation='sigmoid')(att)
        model = Model(inputs=input, outputs=output)
        return model
Exemple #27
0
    def build_model(self):
        vocab_size = int(self.config.model.vocab_size)
        embedding_size = int(self.config.model.embedding_size)
        lstm_units = int(self.config.model.lstm_units)
        output_size = int(self.config.model.output_size)
        batch_size = int(self.config.trainer.batch_size)
        use_elmo = bool(self.config.model.use_elmo)

        # input layer
        input_dtype = 'string' if use_elmo else None
        _input = tf.keras.layers.Input(shape=(None, ),
                                       batch_size=batch_size,
                                       dtype=input_dtype)

        # embeddings layer
        if use_elmo:
            embeddings = ElmoEmbeddingLayer()(_input)
            embedding_size = 1024  # hard coded in elmo
        else:
            embeddings = tf.keras.layers.Embedding(vocab_size,
                                                   embedding_size,
                                                   mask_zero=True)(_input)

        bilstm, forward_h, _, backward_h, _ = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(lstm_units,
                                 return_sequences=True,
                                 return_state=True,
                                 dropout=0.2,
                                 recurrent_dropout=0.2,
                                 input_shape=(batch_size, None,
                                              embedding_size)),
            merge_mode='sum')(embeddings)

        state_h = tf.keras.layers.Concatenate()([forward_h, backward_h])

        ctx, attn = Attention(lstm_units)([bilstm, state_h])

        conc = tf.keras.layers.Concatenate()([bilstm, ctx])

        logits = tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(output_size))(conc)

        mask = tf.keras.layers.Input(shape=(None, output_size),
                                     batch_size=batch_size)
        masked_logits = tf.keras.layers.Add()([logits, mask])
        output = tf.keras.layers.Softmax()(masked_logits)

        self.model = tf.keras.Model(inputs=[_input, mask],
                                    outputs=output,
                                    name='attention')

        self.model.compile(loss='sparse_categorical_crossentropy',
                           optimizer=tf.keras.optimizers.Adam(),
                           metrics=['acc'])
Exemple #28
0
    def __init__(self, output_size, device):
        super(AttentionDecoder, self).__init__()
        self.hidden_size = 128
        self.device = device
        self.dropout = nn.Dropout(0.5)

        self.embedding = nn.Embedding(output_size, self.hidden_size)
        self.attention = Attention(self.hidden_size)
        self.gru = nn.GRU(self.hidden_size * 2,
                          self.hidden_size,
                          batch_first=True)
        self.out = nn.Linear(self.hidden_size, output_size)
Exemple #29
0
    def __init__(self, args, data):
        super(Model, self).__init__()
        # 数据的变量数(列数),论文中Table 1 里的 D
        self.variables = data.m
        # 模型隐藏状态中的特征数量,通过args.hidRNN指定
        self.hidR = args.hidRNN
        # 模型循环层的数量,可以理解为,如果该参数为2,则堆叠了两个LSTM。通过args.rnn_layers指定
        self.layers = args.rnn_layers
        # 模型使用的窗口尺寸,可以理解为取多长时间进行训练,在本例中取7 * 24 = 168,即一周。通过args.window指定
        self.window = args.window
        # 定义attention
        self.attention = Attention(seq_len=self.window, hidden_emb=self.hidR)

        # 定义 GRU 模型
        # pytorch中模型定义见 https://pytorch.apachecn.org/docs/1.2/nn.html
        #
        # Parameters:
        #       input_size – The number of expected features in the input x
        #       hidden_size – The number of features in the hidden state h
        #       num_layers – Number of recurrent layers.E.g., setting num_layers = 2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and computing the final results.Default: 1
        #       bias – If False, then the layer does not use bias weights b_ih and b_hh.Default: True
        #       batch_first – If True, then the input and output tensors are provided as (batch, seq, feature).Default: False
        #       dropout – If non - zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to dropout.Default: 0
        #       bidirectional – If True, becomes a bidirectional LSTM.Default: False
        #
        # 输入:input, h_0
        #      input:(seq_len, batch, input_size)的三维张量
        #      h_0 (num_layers * num_directions, batch, hidden_size)表示模型的初始隐藏状态,如果不输入,则默认为0
        # 输出:output, h_n
        #      output:(seq_len, batch, num_directions * hidden_size)的三维张量
        #      h_0 (num_layers * num_directions, batch, hidden_size)表示模型最后一个细胞的隐藏状态
        #      一般使用
        #

        self.gru = nn.GRU(input_size=self.variables,
                          hidden_size=self.hidR,
                          num_layers=self.layers,
                          bidirectional=False)

        # 定义全连接层
        # 输入:RNN模型的隐藏状态
        # 输出:预测的一条时序数据的长度(变量数量),论文Table1中的D
        # 在这里 * 2 是因为加入了attention,
        self.linear = nn.Linear(self.hidR * 2, self.variables)

        # dropout模块,通过args.dropout参数指定丢弃率
        self.dropout = nn.Dropout(p=args.dropout)
        # 通过args.output_fun参数选择结果的激活函数
        self.output = None
        if (args.output_fun == 'sigmoid'):
            self.output = torch.sigmoid
        if (args.output_fun == 'tanh'):
            self.output = torch.tanh
Exemple #30
0
    def __init__(self, filed=80):
        super(MemNet, self).__init__()
        self.filed = filed
        self.cnn_l = CNN(filed=self.filed)

        self.attention = Attention(40, score_function='mlp')

        self.x_linear = nn.Sequential(nn.Linear(40, 40), )
        self.linear = nn.Sequential(
            nn.Linear(40, 64),
            nn.Linear(64, 2),
        )