def build_baseline(dataset): opt = config.parse_opt() w_emb = WordEmbedding(dataset.dictionary.ntokens(), 300, opt.EMB_DROPOUT) q_emb = QuestionEmbedding(300, opt.NUM_HIDDEN, opt.NUM_LAYER, opt.BIDIRECT, opt.L_RNN_DROPOUT) v_emb = VideoEmbedding(opt.C3D_SIZE + opt.RES_SIZE, opt.NUM_HIDDEN, opt.NUM_LAYER, opt.BIDIRECT, opt.L_RNN_DROPOUT) v_att = Attention(opt.NUM_HIDDEN, opt.MID_DIM, opt.FC_DROPOUT) classifier = SimpleClassifier(opt.NUM_HIDDEN, opt.MID_DIM, 1, opt.FC_DROPOUT) return BaseModel(w_emb, q_emb, v_att, classifier, v_emb)
def __init__(self, vocab_size, embed_hidden=300, mlp_hidden=512): super(TopDown, self).__init__() self.vocab_size = vocab_size self.v_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden) self.q_net = FCNet([mlp_hidden, mlp_hidden]) self.v_net = FCNet([mlp_hidden, mlp_hidden]) self.classifier = SimpleClassifier(mlp_hidden, 2 * mlp_hidden, self.vocab_size, 0.5) self.mlp_hidden = mlp_hidden
def __init__(self, embed_hidden=300, mlp_hidden=512): super(TopDown, self).__init__() self.q_emb = nn.LSTM(embed_hidden, mlp_hidden, batch_first=True, bidirectional=True) self.q_prep = FCNet([mlp_hidden, mlp_hidden]) self.lstm_proj = nn.Linear(mlp_hidden * 2, mlp_hidden) self.verb_transform = nn.Linear(embed_hidden, mlp_hidden) self.v_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden)
def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz): super(Decoder, self).__init__() self.batch_sz = batch_sz self.dec_units = dec_units self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim) self.gru1 = gru(self.dec_units) self.gru2 = gru(self.dec_units) self.fc = tf.keras.layers.Dense(vocab_size) # used for attention self.attention = Attention(self.dec_units)
def built_attention_model(): S_inputs = Input(shape=(None,), dtype='int32') embeddings = Embedding(cfg.max_word, 128)(S_inputs) embeddings = Position_Embedding()(embeddings) # 增加Position_Embedding能轻微提高准确率 O_seq = Attention(8, 16)([embeddings, embeddings, embeddings]) O_seq = GlobalAveragePooling1D()(O_seq) O_seq = Dropout(0.2)(O_seq) outputs = Dense(cfg.num_classes, activation='sigmoid')(O_seq) model = Model(inputs=S_inputs, outputs=outputs) print(model.summary()) return model
def build_baseline0(dataset, num_hid): w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) c_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) v_att = Attention(dataset.v_dim, q_emb.num_hid, num_hid) q_net = FCNet([num_hid, num_hid]) v_net = FCNet([dataset.v_dim, num_hid]) c_net = FCNet([num_hid, num_hid]) classifier = SimpleClassifier(2 * num_hid, 2 * num_hid, dataset.num_ans_candidates, 0.5) return BaseModel(w_emb, q_emb, c_emb, v_att, q_net, v_net, c_net, classifier)
def build_baseline1(dataset, num_hid): w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0) w_emb2 = WordEmbedding(dataset.dictionary.ntoken, num_hid, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) v_att = Attention(dataset.v_dim, q_emb.num_hid, num_hid) q_net = FCNet([num_hid, num_hid]) v_net = FCNet([dataset.v_dim, num_hid]) lstm = nn.LSTM(num_hid, num_hid, 1, batch_first=True) classifier = SimpleClassifier(num_hid, 2 * num_hid, dataset.num_ans_candidates, 0.5) return BaseModel(w_emb, q_emb, v_att, q_net, v_net, classifier, lstm, w_emb2)
def __init__(self, embedding_matrix): super(RAM, self).__init__() self.embed = nn.Embedding.from_pretrained( torch.tensor(embedding_matrix, dtype=torch.float)) self.bi_lstm = nn.LSTM(embed_dim, hidden_dim, lstm_layers, batch_first=True, bidirectional=True) self.attention = Attention(hidden_dim * 2, score_function='mlp') self.gru_cell = nn.GRUCell(hidden_dim * 2, hidden_dim * 2) self.dense = nn.Linear(hidden_dim * 2, polarities_dim)
def build_model(self): input_ = Input((self.maxlen, )) emb = Embedding(input_dim=self.max_features, output_dim=self.emb_dim, input_length=self.maxlen)(input_) enc = Bidirectional(LSTM(128, activation='tanh', return_sequences=True))(emb) enc = Attention(self.maxlen)(enc) output = Dense(self.class_num, activation=self.last_activation)(enc) model = Model(input_, output) return model
def __init__(self, input_dim, hidden_dim, num_layers, dropout, device): super(UtteranceGRU, self).__init__() self.device = device self.gru = nn.GRU(input_size=input_dim, hidden_size=hidden_dim, bidirectional=True, num_layers=num_layers, batch_first=True) self.linear1 = nn.Linear(hidden_dim * 2, hidden_dim) self.linear2 = nn.Linear(hidden_dim * 2, hidden_dim) self.dropout = nn.Dropout(dropout) self.attention = Attention(hidden_dim * 2)
def __init__(self, units, vocab_size, embedding_dim): super().__init__() self.embedding_layer = tf.keras.layers.Embedding( vocab_size, embedding_dim) self.attention_layer = Attention(units) self.gru_layer = tf.keras.layers.GRU( units, return_sequences=True, return_state=True, recurrent_initializer="glorot_uniform") self.prediction_dense = tf.keras.layers.Dense(vocab_size)
def __init__(self, config, pretrained_embedding=None): super(NewsEncoder, self).__init__() self.config = config self.multi_head_self_attention = SelfAttention(config.nb_head, config.embedding_dim) self.attention = Attention(config.attention_dim, config.embedding_dim) if pretrained_embedding is None: self.word_embedding = nn.Embedding(config.word_num, config.embedding_dim, padding_idx=0) else: self.word_embedding = nn.Embedding.from_pretrained( pretrained_embedding, freeze=False)
def __init__(self, model, params, vocabulary, attention_key_size): self.vocabulary = vocabulary self.attention_module = Attention(model, params.decoder_state_size, attention_key_size, attention_key_size) self.state_transform_weights = du.add_params( model, (params.decoder_state_size + attention_key_size, params.decoder_state_size), "weights-state-transform") self.vocabulary_weights = du.add_params( model, (params.decoder_state_size, len(vocabulary)), "weights-vocabulary") self.vocabulary_biases = du.add_params(model, tuple([len(vocabulary)]), "biases-vocabulary")
def get_model(self): input = Input((self.maxlen, )) embedding = Embedding(self.max_features, self.embedding_dims, input_length=self.maxlen)(input) x = Bidirectional(CuDNNLSTM(128, return_sequences=True))( embedding) # LSTM or GRU x = Attention(self.maxlen)(x) output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) return model
def get_model(self): # Word part input_word = Input(shape=(self.maxlen_word, )) x_word = Embedding(self.max_features, self.embedding_dims, input_length=self.maxlen_word)(input_word) x_word = Bidirectional(CuDNNLSTM(128, return_sequences=True))( x_word) # LSTM or GRU x_word = Attention(self.maxlen_word)(x_word) model_word = Model(input_word, x_word) # Sentence part input = Input(shape=(self.maxlen_sentence, self.maxlen_word)) x_sentence = TimeDistributed(model_word)(input) x_sentence = Bidirectional(CuDNNLSTM(128, return_sequences=True))( x_sentence) # LSTM or GRU x_sentence = Attention(self.maxlen_sentence)(x_sentence) output = Dense(self.class_num, activation=self.last_activation)(x_sentence) model = Model(inputs=input, outputs=output) return model
def __init__(self, vocab_size, embed_hidden=300, mlp_hidden=512): super(TopDown, self).__init__() self.vocab_size = vocab_size self.v_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden) self.q_net = FCNet([mlp_hidden, mlp_hidden]) self.v_net = FCNet([mlp_hidden, mlp_hidden])
def embedding_RNN_1_lstm_attention(input_shape): device = device_lib.list_local_devices()[0].device_type input = Input(batch_shape=input_shape) if device == 'CPU': x = Bidirectional(LSTM(units=32, return_sequences=True))(input) else: x = Bidirectional(CuDNNLSTM(units=32, return_sequences=True))(input) x, attention = Attention(return_attention=True)(x) return x, input, attention
def __init__(self, config, pretrained_embedding=None): super(NewsEncoder, self).__init__() self.config = config if pretrained_embedding is None: self.word_embedding = nn.Embedding(config.word_num, config.embedding_dim, padding_idx=0) else: self.word_embedding = nn.Embedding.from_pretrained( pretrained_embedding, freeze=False) self.category_embedding = nn.Embedding(config.category_num, config.category_embedding_dim, padding_idx=0) self.subcategory_embedding = nn.Embedding( config.subcategory_num, config.category_embedding_dim, padding_idx=0) self.title_cnn = nn.Conv2d(1, config.num_filters, (config.window_size, config.embedding_dim), padding=(1, 0)) self.abstract_cnn = nn.Conv2d( 1, config.num_filters, (config.window_size, config.embedding_dim), padding=(1, 0)) self.title_attention = Attention(config.attention_dim, config.num_filters) self.abstract_attention = Attention(config.attention_dim, config.num_filters) self.category_dense = nn.Linear(config.category_embedding_dim, config.num_filters) self.subcategory_dense = nn.Linear(config.category_embedding_dim, config.num_filters) self.view_attention = Attention(config.attention_dim, config.num_filters)
def __init__(self): super(Model, self).__init__() self.embed = nn.Embedding(config.vocab_size, config.embed_dim).cuda() self.gru = nn.GRU(300, config.hidden_dim).cuda() self.bi_gru = nn.GRU(300, config.hidden_dim, bidirectional=True).cuda() self.output_linear = nn.Linear( 1 * config.batch_size * config.hidden_dim, 1).cuda() self.linear = nn.Linear( config.hidden_dim * 2, config.hidden_dim).cuda() # output: (L, B, 2*H) -> (L, B, H) self.hidden_linear = nn.Linear( 1 * config.batch_size * config.hidden_dim, 1 * config.batch_size * config.hidden_dim).cuda() self.attention = Attention()
def build_caq_newatt(dataset, num_hid): w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) v_att = Attention(dataset.v_dim, q_emb.num_hid, num_hid) q_net = FCNet([q_emb.num_hid, num_hid // 2]) v_net = FCNet([dataset.v_dim, num_hid // 2]) updated_query_composer = FCNet([num_hid + num_hid // 2, num_hid]) neighbour_attention = MultiHeadedAttention(4, num_hid // 2, dropout=0.1) Dropout_C = nn.Dropout(0.1) classifier = SimpleClassifier(num_hid // 2, num_hid * 2, dataset.num_ans_candidates + 1, 0.5) return CAQModel(w_emb, q_emb, v_att, q_net, v_net, updated_query_composer, neighbour_attention, Dropout_C, classifier, dataset)
def __init__(self, h, d_model, dropout=0.1): super().__init__() assert d_model % h == 0 # We assume d_v always equals d_k self.d_k = d_model // h self.h = h self.linear_layers = nn.ModuleList( [nn.Linear(d_model, d_model) for _ in range(3)]) self.output_linear = nn.Linear(d_model, d_model) self.attention = Attention() self.dropout = nn.Dropout(p=dropout)
def attentionModel(self): inputs = Input(shape=(self._sequence_long, self._features)) encoded = LSTM(self._lstm_neurons, return_sequences=True, activation="tanh", )(inputs) decoded = Attention()(encoded) decoded = RepeatVector(self._sequence_long)(decoded) decoded = LSTM(self._features, return_sequences=True)(decoded) autoencoder = Model(inputs=inputs, outputs=decoded) encoder = Model(inputs=inputs, outputs=encoded) autoencoder.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) autoencoder.summary() return autoencoder, encoder
def __init__(self, hidden_size, output_size, n_layers=3, dropout_p=0.1): super(AttentionDecoder, self).__init__() self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.dropout_p = dropout_p self.embedding = nn.Embedding(output_size, hidden_size) self.gru = nn.GRU(hidden_size * 2, hidden_size, n_layers, dropout=dropout_p) self.linear = nn.Linear(hidden_size * 2, output_size) self.attention = Attention(hidden_size)
def BidLstmMpAtn(inp, max_len, max_features, embed_size, embedding_matrix): x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(inp) x = Bidirectional(CuDNNLSTM(50, return_sequences=True))(x) x = Dropout(0.25)(x) x = Bidirectional(CuDNNLSTM(50, return_sequences=True))(x) x = Dropout(0.25)(x) x1 = Attention(max_len)(x) x2 = GlobalMaxPooling1D()(x) x = concatenate([x1, x2]) return x
def __init__(self, vocab_size: int, hidden_dim: int, dropout_rate: float, *args, **kwargs): super().__init__(*args, **kwargs) self.vocab_size = vocab_size self.hidden_dim = hidden_dim attention_base_layer = Attention(depth=hidden_dim) ffn_base_layer = FeedForwardNetwork(hidden_dim=hidden_dim, dropout_rate=dropout_rate) self.attention = AddNormalizationWrapper(attention_base_layer, dropout_rate) self.ffn = AddNormalizationWrapper(ffn_base_layer, dropout_rate) self.output_normalization = LayerNormalization()
def get_model(self): input = Input((self.maxlen, ), name="input") embedding = Embedding(self.max_features, self.embedding_dims, input_length=self.maxlen, weights=[self.embedding_matrix])(input) x = Bidirectional(LSTM(128, return_sequences=True))( embedding) # LSTM or GRU x = Attention(self.maxlen, name="attention")(x) output = Dense(self.class_num, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) model.trainable = False return model
def __init__(self, vocab_size, emb_dim, n_hidden, bidirectional, n_layer, dropout=0.0): super().__init__() # embedding weight parameter is shared between encoder, decoder, # and used as final projection layer to vocab logit # and can be initialized with pretrained word vectors self._embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0) self._enc_lstm = nn.LSTM(emb_dim, n_hidden, n_layer, bidirectional=bidirectional, dropout=dropout) state_layer = n_layer * (2 if bidirectional else 1) self._init_enc_h = nn.Parameter(torch.Tensor(state_layer, n_hidden)) self._init_enc_c = nn.Parameter(torch.Tensor(state_layer, n_hidden)) init.uniform_(self._init_enc_h, -INIT, INIT) init.uniform_(self._init_enc_c, -INIT, INIT) self._dec_lstm = MultiLayerLSTMCells(2 * emb_dim, n_hidden, n_layer, dropout=dropout) # project encoder final states to decoder initial states enc_out_dim = n_hidden * (2 if bidirectional else 1) self._dec_h = nn.Linear(enc_out_dim, n_hidden, bias=False) self._dec_c = nn.Linear(enc_out_dim, n_hidden, bias=False) # multiplicative attention self._attn_wm = nn.Parameter(torch.Tensor(enc_out_dim, n_hidden)) self._attn_wq = nn.Parameter(torch.Tensor(n_hidden, n_hidden)) init.xavier_normal_(self._attn_wm) init.xavier_normal_(self._attn_wq) # attention layer self._attention = Attention(n_hidden, n_hidden) # project decoder output to emb_dim, then # apply weight matrix from embedding layer self._projection = nn.Sequential( nn.Linear(2 * n_hidden, n_hidden), nn.Tanh(), nn.Linear(n_hidden, emb_dim, bias=False)) # functional object for easier usage self._decoder = AttentionalDecoder(self._embedding, self._dec_lstm, self._attn_wq, self._attention, self._projection)
def set_cnn_model_attention(input_dim=4, input_length=2701): attention_reg_x = 0.25 attention_reg_xr = 1 attentionhidden_x = 16 attentionhidden_xr = 8 nbfilter = 16 input = Input(shape=(input_length, input_dim)) x = conv.Convolution1D(nbfilter, 10, border_mode="valid")(input) x = Dropout(0.5)(x) x = Activation('relu')(x) x = conv.MaxPooling1D(pool_length=3)(x) x_reshape = core.Reshape((x._keras_shape[2], x._keras_shape[1]))(x) x = Dropout(0.5)(x) x_reshape = Dropout(0.5)(x_reshape) decoder_x = Attention(hidden=attentionhidden_x, activation='linear') # success decoded_x = decoder_x(x) output_x = myFlatten(x._keras_shape[2])(decoded_x) decoder_xr = Attention(hidden=attentionhidden_xr, activation='linear') decoded_xr = decoder_xr(x_reshape) output_xr = myFlatten(x_reshape._keras_shape[2])(decoded_xr) output = merge([output_x, output_xr, Flatten()(x)], mode='concat') #output = BatchNormalization()(output) output = Dropout(0.5)(output) print output.shape output = Dense(nbfilter * 10, activation="relu")(output) output = Dropout(0.5)(output) out = Dense(2, activation='softmax')(output) #output = BatchNormalization()(output) model = Model(input, out) model.compile(loss='categorical_crossentropy', optimizer='rmsprop') return model
def dual_bert(): set_seed(33) opt = Adam(learning_rate=2e-5) id1 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) id2 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) mask1 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) mask2 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) atn1 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) atn2 = Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32) config = BertConfig() config.output_hidden_states = False # Set to True to obtain hidden states bert_model1 = TFBertModel.from_pretrained('bert-base-uncased', config=config) bert_model2 = TFBertModel.from_pretrained('bert-base-uncased', config=config) embedding1 = bert_model1(id1, attention_mask=mask1, token_type_ids=atn1)[0] embedding2 = bert_model2(id2, attention_mask=mask2, token_type_ids=atn2)[0] x = Concatenate()([embedding1, embedding2]) x = keras.layers.Bidirectional( # 加上这个就变成了双向lstm keras.layers.LSTM( # 这个是单向lstm 64, # 权重初始化 kernel_initializer='he_normal', # 返回每个token的输出,如果设置为False 只出最后一个。 return_sequences=True))(x) #x = Lambda(lambda x: x[:, 0], name='CLS-token')(x)#降维 #x1 = GlobalAveragePooling1D()(embedding1) #x2 = GlobalAveragePooling1D()(embedding2) #x = Concatenate()([x1, x2]) x = Attention(128)(x) # 加入attention x = Dense(64, activation='relu')(x) x = Dropout(0.2)(x) #out = Dense(len(map_label), activation='softmax')(x) out = Dense(5, activation='softmax')(x) model = Model(inputs=[id1, mask1, atn1, id2, mask2, atn2], outputs=out) model.compile(loss='sparse_categorical_crossentropy', optimizer=opt, metrics=['accuracy']) #加个评测指标 return model
def __init__(self, encoder, gpu_mode, embed_hidden=300, mlp_hidden=512): super(BaseModel, self).__init__() self.normalize = tv.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) self.train_transform = tv.transforms.Compose([ tv.transforms.RandomRotation(10), tv.transforms.RandomResizedCrop(224), tv.transforms.RandomHorizontalFlip(), tv.transforms.ToTensor(), self.normalize, ]) self.dev_transform = tv.transforms.Compose([ tv.transforms.Resize(224), tv.transforms.CenterCrop(224), tv.transforms.ToTensor(), self.normalize, ]) self.encoder = encoder self.gpu_mode = gpu_mode self.n_roles = self.encoder.get_num_roles() self.n_verbs = self.encoder.get_num_verbs() self.vocab_size = self.encoder.get_num_labels() self.max_role_count = self.encoder.get_max_role_count() self.n_role_q_vocab = len(self.encoder.question_words) #self.conv = vgg16_modified() self.down = nn.Linear(mlp_hidden*2, mlp_hidden) self.verb_lookup = nn.Embedding(self.n_verbs, embed_hidden) self.w_emb = nn.Embedding(self.n_role_q_vocab + 1, embed_hidden, padding_idx=self.n_role_q_vocab) self.q_emb = nn.LSTM(embed_hidden, mlp_hidden, batch_first=True, bidirectional=True) self.q_prep = FCNet([mlp_hidden, mlp_hidden]) self.lstm_proj = nn.Linear(mlp_hidden * 2, mlp_hidden) self.verb_transform = nn.Linear(embed_hidden, mlp_hidden) self.v_att = Attention(mlp_hidden, mlp_hidden, mlp_hidden) self.q_net = FCNet([mlp_hidden, mlp_hidden]) self.v_net = FCNet([mlp_hidden, mlp_hidden]) self.classifier = SimpleClassifier( mlp_hidden, 2 * mlp_hidden, self.vocab_size, 0.5) #self.conv_hidden = self.conv.base_size() self.mlp_hidden = mlp_hidden self.embed_hidden = embed_hidden