def build_discriminator(): input_line = Input(shape=IMG_SHAPE) input_shade = Input(shape=IMG_SHAPE) input_cond = Input((3, )) input_img = Composite()([input_line, input_shade]) x = CoordinateChannel2D()(PixelwiseConcat()([input_img, input_cond])) x = residual_block_downscaling(x, (8, 8, 32)) x = residual_block(x, (8, 8, 32)) x = residual_block_downscaling(x, (16, 16, 64)) x = residual_block(x, (16, 16, 64)) x = residual_block_downscaling(x, (32, 32, 128)) x = residual_block(x, (32, 32, 128)) x = SelfAttention()(x) x = residual_block_downscaling(x, (64, 64, 256)) x = residual_block(x, (64, 64, 256)) x = SelfAttention()(x) x = residual_block_downscaling(x, (128, 128, 512)) x = residual_block(x, (128, 128, 512)) x = GlobalAvgPool2D()(x) features = Dropout(0.3)(x) x = Dense(256)(features) validity = Dense(1, activation='sigmoid')(x) return Model([input_cond, input_line, input_shade], validity)
def model(x_train, num_labels, LSTM_units, num_conv_filters, batch_size, F, D): """ The proposed model with CNN layer, LSTM RNN layer and self attention layers. Inputs: - x_train: required for creating input shape for RNN layer in Keras - num_labels: number of output classes (int) - LSTM_units: number of RNN units (int) - num_conv_filters: number of CNN filters (int) - batch_size: number of samples to be processed in each batch - F: the attention length (int) - D: the length of the output (int) Returns - model: A Keras model """ cnn_inputs = Input(shape=(x_train.shape[1], x_train.shape[2], 1), batch_size=batch_size, name='rnn_inputs') cnn_layer = Conv2D(num_conv_filters, kernel_size = (1, x_train.shape[2]), strides=(1, 1), padding='valid', data_format="channels_last") cnn_out = cnn_layer(cnn_inputs) sq_layer = Lambda(lambda x: K.squeeze(x, axis = 2)) sq_layer_out = sq_layer(cnn_out) rnn_layer = LSTM(LSTM_units, return_sequences=True, name='lstm', return_state=True) #return_state=True rnn_layer_output, _, _ = rnn_layer(sq_layer_out) encoder_output, attention_weights = SelfAttention(size=F, num_hops=D, use_penalization=False, batch_size = batch_size)(rnn_layer_output) dense_layer = Dense(num_labels, activation = 'softmax') dense_layer_output = dense_layer(encoder_output) model = Model(inputs=cnn_inputs, outputs=dense_layer_output) print (model.summary()) return model
def _build_eneityencoder(self, embedding_layer, name): """The main function to create news encoder of NRMS. Args: embedding_layer(obj): embedding layer. # LP modified Return: obj: the news encoder of NRMS. """ hparams = self.hparams sequences_input_title = keras.Input(shape=(hparams['title_size'], ), dtype="int32", name='sequences_input_title') embedded_sequences_title = embedding_layer( sequences_input_title) # TODO shape可能有问题 y = layers.Dropout(hparams['dropout'])(embedded_sequences_title) y = SelfAttention(hparams['head_num'], hparams['head_dim'], seed=self.seed)([y, y, y]) y = layers.Dropout(hparams['dropout'])(y) pred_title = AttLayer2(hparams['attention_hidden_dim'], seed=self.seed)(y) model = keras.Model(sequences_input_title, pred_title, name=name) return model
def __init__(self, num_classes: int): super(BertAttentionClassifier, self).__init__() self.bert = BertModel.from_pretrained('bb_lm_ft/') self.num_classes = num_classes self.linear1 = nn.Linear(self.bert.config.hidden_size, 256) self.self_attention = SelfAttention(256, batch_first=True, non_linearity="tanh") self.out = nn.Linear(256, num_classes)
def __init__(self, bert_weights: str): super(BertBinaryClassifier, self).__init__() self.bert = BertModel.from_pretrained(bert_weights) # Freeze Bert Params for param in list(self.bert.parameters())[:-10]: param.requires_grad = False self.dropout = nn.Dropout(p=.10) self.linear1 = nn.Linear(self.bert.config.hidden_size, 512) self.attention = SelfAttention(512, batch_first=True) self.clf = nn.Linear(512, 2)
def _build_newsencoder(self, embedding_layer): hparams = self.hparams sequences_input_title = keras.Input(shape=(hparams.doc_size, ), dtype="int32") #embedded_sequences_title = embedding_layer(sequences_input_title) input_ids = sequences_input_title input_mask = keras.Input(shape=(hparams.doc_size, ), dtype="int32") segment_ids = keras.Input(shape=(hparams.doc_size, ), dtype="int32") input_len = keras.Input(shape=(1, ), dtype="int32") # bert_inputs = dict( # input_ids=input_ids, # input_mask=input_mask, # segment_ids=segment_ids) bert_inputs = [input_ids, input_mask, segment_ids] # bert_path='https://tfhub.dev/google/small_bert/bert_uncased_L-12_H-128_A-2/1' # mybert = hub.Module( # bert_path, # trainable=True, # #name="{}_module".format(self.name) # ) # bert_inputs = dict( # input_ids=input_ids, input_mask=input_mask, segment_ids=segment_ids # ) # bert_output = mybert(inputs=bert_inputs, signature="tokens", as_dict=True)['sequence_output'] #print('???sequences_input_title: ',sequences_input_title) #bert_inputs = [sequences_input_title[:,x,:] for x in range(3)] #bert_inputs = sequences_input_title[:,0] #print("???bert_inputs: ",bert_inputs) bert_output = BertLayer(n_fine_tune_layers=6)(bert_inputs) embedded_sequences_title = bert_output y = layers.Dropout(hparams.dropout)(embedded_sequences_title) y = SelfAttention(hparams.head_num, hparams.head_dim, seed=self.seed)([y, y, y, input_len, input_len]) y = layers.Dropout(hparams.dropout)(y) pred_title = AttLayer2(hparams.attention_hidden_dim, seed=self.seed)(y, input_len) self.test1 = keras.Model( [sequences_input_title, input_mask, segment_ids], bert_output, name="test1") #self.test1=K.function([sequences_input_title,input_mask,segment_ids], [bert_output]) model = keras.Model( [sequences_input_title, input_mask, segment_ids, input_len], pred_title, name="news_encoder") # model = keras.Model([sequences_input_title], pred_title, name="news_encoder") #model = keras.Model([sequences_input_title], bert_inputs, name="news_encoder") return model
def __init__(self, embeddings, nclasses=3, **kwargs): """ Define the layer of the model and perform the initializations of the layers (wherever it is necessary) Args: embeddings (numpy.ndarray): the 2D ndarray with the word vectors nclasses (): """ super(AttentiveRNN, self).__init__() ######################################################## # Optional Parameters ######################################################## rnn_size = kwargs.get("rnn_size", 100) rnn_layers = kwargs.get("rnn_layers", 1) bidirectional = kwargs.get("bidirectional", False) noise = kwargs.get("noise", 0.) dropout_words = kwargs.get("dropout_words", 0.2) dropout_rnn = kwargs.get("dropout_rnn", 0.2) trainable_emb = kwargs.get("trainable_emb", False) ######################################################## # define the embedding layer, with the corresponding dimensions self.embedding = nn.Embedding(num_embeddings=embeddings.shape[0], embedding_dim=embeddings.shape[1]) # initialize the weights of the Embedding layer, # with the given pre-trained word vectors self.init_embeddings(embeddings, trainable_emb) # the dropout "layer" for the word embeddings self.drop_emb = nn.Dropout(dropout_words) # the gaussian noise "layer" for the word embeddings self.noise_emb = GaussianNoise(noise) # the RNN layer (or layers) self.rnn = nn.LSTM(input_size=embeddings.shape[1], hidden_size=rnn_size, num_layers=rnn_layers, bidirectional=bidirectional, dropout=dropout_rnn, batch_first=True) # the dropout "layer" for the output of the RNN self.drop_rnn = nn.Dropout(dropout_rnn) if self.rnn.bidirectional: rnn_size *= 2 self.attention = SelfAttention(rnn_size, batch_first=True) # the final Linear layer which maps the representation of the sentence, # to the classes self.linear = nn.Linear(in_features=rnn_size, out_features=nclasses)
def _build_userencoder(self, titleencoder): hparams = self.hparams his_input_title = keras.Input(shape=(hparams.his_size, hparams.doc_size), dtype="int32") h_input_masks = keras.Input(shape=(hparams.his_size, hparams.doc_size), dtype="int32") h_segments = keras.Input(shape=(hparams.his_size, hparams.doc_size), dtype="int32") h_length = keras.Input(shape=(hparams.his_size, 1), dtype="int32") all_his = keras.Input(shape=(1, ), dtype="int32") # his_input_title_reshape=layers.Reshape((hparams.doc_size,))( # his_input_title # ) # h_input_masks_reshape=layers.Reshape((hparams.doc_size,))( # h_input_masks # ) # h_segments_reshape=layers.Reshape((hparams.doc_size,))( # h_segments # ) his_input_title_reshape = K.reshape(his_input_title, (-1, hparams.doc_size)) h_input_masks_reshape = K.reshape(h_input_masks, (-1, hparams.doc_size)) h_segments_reshape = K.reshape(h_segments, (-1, hparams.doc_size)) h_length_reshape = K.reshape(h_length, (-1, 1)) #click_title_presents = layers.TimeDistributed(titleencoder)([his_input_title,h_input_masks,h_segments]) click_title_presents1 = titleencoder([ his_input_title_reshape, h_input_masks_reshape, h_segments_reshape, h_length_reshape ]) print('???1: ', click_title_presents1) click_title_presents = K.reshape( click_title_presents1, (-1, hparams.his_size, click_title_presents1.shape[-1])) print('???2: ', click_title_presents) y = SelfAttention(hparams.head_num, hparams.head_dim, seed=self.seed)([click_title_presents] * 3) user_present = AttLayer2(hparams.attention_hidden_dim, seed=self.seed)(y, all_his) model = keras.Model( [his_input_title, h_input_masks, h_segments, h_length, all_his], user_present, name="user_encoder") return model
def __init__(self, noise_len, chunks, embedding_dim, cbn_mlp_dim, batch_size): super(GeneratorNetwork, self).__init__() self.split_len = noise_len // chunks self.concat_len = self.split_len + embedding_dim self.batch_size = batch_size self.dense1 = spectral_norm(nn.Linear(self.split_len, 1024)) # self.dropout = nn.Dropout(p=0.5) # inp, cbn_in, emb_size, cbn_hidden, batch_size, out self.resblock1 = ResBlockUp(256, self.concat_len, cbn_mlp_dim, 1, batch_size, 256) self.resblock2 = ResBlockUp(256, self.concat_len, cbn_mlp_dim, 1, batch_size, 128) self.resblock3 = ResBlockUp(128, self.concat_len, cbn_mlp_dim, 1, batch_size, 128) self.resblock4 = ResBlockUp(128, self.concat_len, cbn_mlp_dim, 1, batch_size, 64) self.resblock5 = ResBlockUp(64, self.concat_len, cbn_mlp_dim, 1, batch_size, 32) self.resblock6 = ResBlockUp(32, self.concat_len, cbn_mlp_dim, 1, batch_size, 16) self.resblock7 = ResBlockUp(16, self.concat_len, cbn_mlp_dim, 1, batch_size, 16) self.resblocks = [ self.resblock1, self.resblock2, self.resblock3, self.resblock4, self.resblock5, self.resblock6, self.resblock7, ] self.self_attention = SelfAttention(64) self.penultimate_activation = nn.ReLU() self.conv = spectral_norm( nn.Conv2d(in_channels=16, out_channels=1, kernel_size=3, padding=1, bias=False)) self.bn = nn.BatchNorm2d(1) self.final_activation = nn.Tanh()
def _build_bodyencoder(self): hparams = self.hparams sequences_input_body = keras.Input(shape=(hparams['title_size'], ), dtype="int32") embedded_sequences_body = self.bert_model(sequences_input_body) y = layers.Dropout(hparams['dropout'])(embedded_sequences_body) y = SelfAttention(hparams['head_num'], hparams['head_dim'], seed=self.seed)([y, y, y]) y = layers.Dropout(hparams['dropout'])(y) pred_body = AttLayer2(hparams['attention_hidden_dim'], seed=self.seed)(y) pred_body = layers.Reshape((1, hparams['filter_num']))(pred_body) model = keras.Model(sequences_input_body, pred_body, name="body_encoder") return model
def __init__(self, num_classes: int, bert_weights: str, dropout: float=.10): super(BertCRFClassifier, self).__init__() self.bert = BertModel.from_pretrained(bert_weights) for param in list(self.bert.parameters())[:-5]: param.requires_grad = False hidden_size = self.bert.config.hidden_size self.span_clf_head = nn.Linear(hidden_size, num_classes) self.binary_clf_head = nn.Linear(hidden_size, 2) self.attention = SelfAttention(hidden_size, batch_first=True) self.dropout = nn.Dropout(p=dropout) self.crf = CRF(num_tags=num_classes)
def __init__(self, config): super(ContextAware, self).__init__() self.config = config self.word_emb = nn.Embedding(config.data_word_vec.shape[0], config.data_word_vec.shape[1]) self.word_emb.weight.data.copy_(torch.from_numpy(config.data_word_vec)) self.word_emb.weight.requires_grad = False self.ner_emb = nn.Embedding(7, config.entity_type_size, padding_idx=0) self.coref_embed = nn.Embedding(config.max_length, config.coref_size, padding_idx=0) # self.char_emb = nn.Embedding(config.data_char_vec.shape[0], config.data_char_vec.shape[1]) # self.char_emb.weight.data.copy_(torch.from_numpy(config.data_char_vec)) # char_dim = config.data_char_vec.shape[1] # char_hidden = 100 # self.char_cnn = nn.Conv1d(char_dim, char_hidden, 5) hidden_size = 128 input_size = config.data_word_vec.shape[ 1] + config.coref_size + config.entity_type_size #+ char_hidden self.rnn = EncoderLSTM(input_size, hidden_size, 1, True, True, 1 - config.keep_prob, False) self.linear_re = nn.Linear(hidden_size * 2, hidden_size) self.bili = torch.nn.Bilinear(hidden_size, hidden_size, hidden_size) self.self_att = SelfAttention(hidden_size, 1.0) self.bili = torch.nn.Bilinear(hidden_size + config.dis_size, hidden_size + config.dis_size, hidden_size) self.dis_embed = nn.Embedding(20, config.dis_size, padding_idx=10) self.linear_output = nn.Linear(hidden_size * 2, config.relation_num)
def __init__(self, embeddings, num_classes, **kwargs): super(RNN, self).__init__() rnn_hidden_size = kwargs.get("rnn_size", 150) num_rnn_layers = kwargs.get("num_rnn_layers", 2) bidirectional = kwargs.get("bidirectional", True) noise = kwargs.get("noise", 0.5) dropout_embeds = kwargs.get("dropout_embeds", 0.5) dropout_rnn = kwargs.get("dropout_rnn", 0.5) trainable_emb = kwargs.get("trainable_emb", False) self.embedding = nn.Embedding(num_embeddings=embeddings.shape[0], embedding_dim=embeddings.shape[1]) self.noise_emb = GaussianNoise(noise) self.init_embeddings(embeddings, trainable_emb) self.dropout_embeds = nn.Dropout(dropout_embeds) self.dropout_rnn = nn.Dropout(dropout_rnn) self.batch_size = 128 self.seed = 1111 self.shared_lstm = nn.LSTM(input_size=embeddings.shape[1], hidden_size=rnn_hidden_size, num_layers=num_rnn_layers, bidirectional=bidirectional, dropout=dropout_rnn, batch_first=True) if bidirectional: rnn_hidden_size *= 4 else: rnn_hidden_size *= 2 self.attention = SelfAttention(attention_size=rnn_hidden_size, batch_first=True) self.linear = nn.Linear(rnn_hidden_size, num_classes)
def __init__(self): super(DiscriminatorNetwork, self).__init__() self.resblock1 = ResBlockDown(1, 16) self.resblock2 = ResBlockDown(16, 16) self.resblock3 = ResBlockDown(16, 32) self.resblock4 = ResBlockDown(32, 64) self.resblock5 = ResBlockDown(64, 128) self.resblock6 = ResBlockDown(128, 128) self.resblock7 = ResBlockDown(128, 256) self.resblock8 = ResBlock(256, 256) self.resdownblocks = [ self.resblock1, self.resblock2, self.resblock3, self.resblock4, self.resblock5, self.resblock6, self.resblock7, ] self.self_attention = SelfAttention(32) self.global_sum_pooling = nn.LPPool2d(norm_type=1, kernel_size=(1, 4)) self.dense = spectral_norm(nn.Linear(256, 1))
X = Input(shape=(sequence_length, ), batch_size=batch_size) # Word-Embedding Layer embedded = Embedding(input_dim=vocabulary_size, output_dim=embedding_dims)(X) # Recurrent Layers if config != 0: encoder_output, hidden_state, cell_state = CuDNNLSTM( units=128, return_sequences=True, return_state=True)(embedded) attention_input = [encoder_output, hidden_state] else: encoder_output = CuDNNLSTM(units=128)(embedded) # Optional Attention Mechanisms if config == 1: encoder_output, attention_weights = SelfAttention( size=128, num_hops=10, use_penalization=False)(encoder_output) elif config == 2: encoder_output, attention_weights = Attention( context='many-to-one', alignment_type='global')(attention_input) encoder_output = Flatten()(encoder_output) elif config == 3: encoder_output, attention_weights = Attention( context='many-to-one', alignment_type='local-p*', window_width=100, score_function='scaled_dot')(attention_input) encoder_output = Flatten()(encoder_output) # Prediction Layer Y = Dense(units=num_categories, activation='softmax')(encoder_output)
def _def_layers(self): # word embeddings if self.use_pretrained_embedding: self.word_embedding = Embedding(embedding_size=self.word_embedding_size, vocab_size=self.word_vocab_size, id2word=self.word_vocab, dropout_rate=self.embedding_dropout, load_pretrained=True, trainable=self.word_embedding_trainable, embedding_oov_init="random", pretrained_embedding_path=self.pretrained_embedding_path) else: self.word_embedding = Embedding(embedding_size=self.word_embedding_size, vocab_size=self.word_vocab_size, trainable=self.word_embedding_trainable, dropout_rate=self.embedding_dropout) # node embeddings self.node_embedding = Embedding(embedding_size=self.node_embedding_size, vocab_size=self.node_vocab_size, trainable=self.node_embedding_trainable, dropout_rate=self.embedding_dropout) # relation embeddings self.relation_embedding = Embedding(embedding_size=self.relation_embedding_size, vocab_size=self.relation_vocab_size, trainable=self.relation_embedding_trainable, dropout_rate=self.embedding_dropout) self.word_embedding_prj = torch.nn.Linear(self.word_embedding_size, self.block_hidden_dim, bias=False) self.encoder = torch.nn.ModuleList([EncoderBlock(conv_num=self.encoder_conv_num, ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.encoder_layers)]) self.rgcns = StackedRelationalGraphConvolution(entity_input_dim=self.node_embedding_size+self.block_hidden_dim, relation_input_dim=self.relation_embedding_size+self.block_hidden_dim, num_relations=self.relation_vocab_size, hidden_dims=self.gcn_hidden_dims, num_bases=self.gcn_num_bases, use_highway_connections=self.gcn_highway_connections, dropout_rate=self.dropout, real_valued_graph=self.real_valued_graph) self.attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout) self.attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False) self.self_attention_text = SelfAttention(self.block_hidden_dim, self.n_heads, self.dropout) self.self_attention_graph = SelfAttention(self.block_hidden_dim, self.n_heads, self.dropout) # recurrent memories self.recurrent_memory_bi_input = LSTMCell(self.block_hidden_dim * 2, self.block_hidden_dim, use_bias=True) self.recurrent_memory_single_input = LSTMCell(self.block_hidden_dim, self.block_hidden_dim, use_bias=True) linear_function = NoisyLinear if self.noisy_net else torch.nn.Linear self.action_scorer_linear_1_tri_input = linear_function(self.block_hidden_dim * 3, self.block_hidden_dim) self.action_scorer_linear_1_bi_input = linear_function(self.block_hidden_dim * 2, self.block_hidden_dim) self.action_scorer_linear_2 = linear_function(self.block_hidden_dim, 1) # text encoder for pretraining tasks # (we separate this because we don't want to init text encoder with pretrained parameters when training RL) self.encoder_for_pretraining_tasks = torch.nn.ModuleList([EncoderBlock(conv_num=self.encoder_conv_num, ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.encoder_layers)]) # command generation self.cmd_gen_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout) self.cmd_gen_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False) self.decoder = torch.nn.ModuleList([DecoderBlock(ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.decoder_layers)]) self.tgt_word_prj = torch.nn.Linear(self.block_hidden_dim, self.word_vocab_size, bias=False) self.pointer_softmax = PointerSoftmax(input_dim=self.block_hidden_dim, hidden_dim=self.block_hidden_dim) # observation generation self.obs_gen_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout) self.obs_gen_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False) self.obs_gen_decoder = torch.nn.ModuleList([DecoderBlockForObsGen(ch_num=self.block_hidden_dim, k=5, block_hidden_dim=self.block_hidden_dim, n_head=self.n_heads, dropout=self.block_dropout) for _ in range(self.decoder_layers)]) self.obs_gen_tgt_word_prj = torch.nn.Linear(self.block_hidden_dim, self.word_vocab_size, bias=False) self.obs_gen_linear_1 = torch.nn.Linear(self.block_hidden_dim, self.block_hidden_dim) self.obs_gen_linear_2 = torch.nn.Linear(self.block_hidden_dim, int(len(self.relation_vocab) / 2) * len(self.node_vocab) * len(self.node_vocab)) self.obs_gen_attention_to_rnn_input = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim) self.obs_gen_graph_rnncell = torch.nn.GRUCell(self.block_hidden_dim, self.block_hidden_dim) self.observation_discriminator = ObservationDiscriminator(self.block_hidden_dim) # action prediction self.ap_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout) self.ap_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False) self.ap_self_attention = SelfAttention(self.block_hidden_dim * 3, self.n_heads, self.dropout) self.ap_linear_1 = torch.nn.Linear(self.block_hidden_dim * 3, self.block_hidden_dim) self.ap_linear_2 = torch.nn.Linear(self.block_hidden_dim, 1) # state prediction self.sp_attention = CQAttention(block_hidden_dim=self.block_hidden_dim, dropout=self.attention_dropout) self.sp_attention_prj = torch.nn.Linear(self.block_hidden_dim * 4, self.block_hidden_dim, bias=False) self.sp_self_attention = SelfAttention(self.block_hidden_dim * 3, self.n_heads, self.dropout) self.sp_linear_1 = torch.nn.Linear(self.block_hidden_dim * 3, self.block_hidden_dim) self.sp_linear_2 = torch.nn.Linear(self.block_hidden_dim, 1) # deep graph infomax self.dgi_discriminator = DGIDiscriminator(self.gcn_hidden_dims[-1])
def __init__(self, dict_args): super(RNet, self).__init__() #character embedding layer self.use_charemb = dict_args['use_charemb'] self.charemb_rnn_hdim = 0 if self.use_charemb: self.cvocab_size = dict_args['charvocab_size'] self.charemb_dim = dict_args['charemb_dim'] self.charemb_rnn_hdim = dict_args['charemb_rnn_hdim'] self.charemb_rnn_type = dict_args['charemb_rnn_type'] self.charemb_padix = dict_args['charemb_padix'] #input embedding layer self.wordemb_dim = dict_args['wordemb_dim'] self.contextemb_rnn_hdim = dict_args['contextemb_rnn_hdim'] self.contextemb_rnn_type = dict_args['contextemb_rnn_type'] self.contextemb_num_layers = dict_args['contextemb_num_layers'] #gated attention layer self.gated_attention_similarity_function = dict_args[ 'gated_attention_similarity_function'] self.gated_attentio_rnn_type = dict_args['gated_attentio_rnn_type'] #self matching layer self.self_matching_similarity_function = dict_args[ 'self_matching_similarity_function'] #modeling layer self.modelinglayer_rnn_hdim = dict_args['contextemb_rnn_hdim'] self.modelinglayer_rnn_type = dict_args['modelinglayer_rnn_type'] self.modelinglayer_num_layers = dict_args['modelinglayer_num_layers'] #question attention layer self.question_attention_similarity_function = dict_args[ 'question_attention_similarity_function'] #pointer network layer self.pointer_network_similarity_function = dict_args[ 'pointer_network_similarity_function'] self.pointer_network_rnn_type = dict_args['pointer_network_rnn_type'] #dropout layer self.use_dropout = False self.dropout_rate = 0 if dict_args['dropout_rate'] > 0: self.use_dropout = True self.dropout_rate = dict_args['dropout_rate'] #######Dropout layer if self.use_dropout: self.dropout_layer = nn.Dropout(p=self.dropout_rate) #######character embedding layer if self.use_charemb: charemb_layer_args = { 'cvocab_size': self.cvocab_size, 'charemb_dim': self.charemb_dim, 'charemb_rnn_hdim': self.charemb_rnn_hdim, 'charemb_rnn_type': self.charemb_rnn_type, 'charemb_padix': self.charemb_padix, 'dropout_rate': self.dropout_rate } self.charemb_layer = RNNCharEmb(charemb_layer_args) #######context embedding layer contextemb_layer_args = { 'input_dim': 2 * self.charemb_rnn_hdim + self.wordemb_dim, 'rnn_hdim': self.contextemb_rnn_hdim, 'rnn_type': self.contextemb_rnn_type, 'num_layers': self.contextemb_num_layers, 'dropout_rate': self.dropout_rate } self.contextemb_layer = BiDirEncoder(contextemb_layer_args) #######gated attention layer self.projection_dim = 2 * self.contextemb_rnn_hdim self.question_projection_weights_u = nn.Parameter( torch.Tensor(self.projection_dim, 2 * self.contextemb_rnn_hdim)) self.passage_projection_weights_u = nn.Parameter( torch.Tensor(self.projection_dim, 2 * self.contextemb_rnn_hdim)) self.passage_projection_weights_v = nn.Parameter( torch.Tensor(self.projection_dim, 2 * self.contextemb_rnn_hdim)) self.dotproduct_weights = nn.Parameter( torch.Tensor(self.projection_dim, 1)) stdv = 1.0 / math.sqrt(self.question_projection_weights_u.size(-1)) self.question_projection_weights_u.data.uniform_(-stdv, stdv) stdv = 1.0 / math.sqrt(self.passage_projection_weights_u.size(-1)) self.passage_projection_weights_u.data.uniform_(-stdv, stdv) stdv = 1.0 / math.sqrt(self.passage_projection_weights_v.size(-1)) self.passage_projection_weights_v.data.uniform_(-stdv, stdv) stdv = 1.0 / math.sqrt(self.dotproduct_weights.size(-1)) self.dotproduct_weights.data.uniform_(-stdv, stdv) gated_similarity_function_args = { 'projection_dim': self.projection_dim, 'sequence1_weights': self.question_projection_weights_u, 'sequence2_weights': self.passage_projection_weights_u, 'sequence3_weights': self.passage_projection_weights_v, 'weights': self.dotproduct_weights } self.gated_similarity_function_pointer = similarity.ProjectionSimilaritySharedWeights( gated_similarity_function_args) gated_attention_layer_args = { 'similarity_function': self.gated_attention_similarity_function, 'similarity_function_pointer': self.gated_similarity_function_pointer, 'sequence1_dim': 2 * self.contextemb_rnn_hdim, 'sequence2_dim': 2 * self.contextemb_rnn_hdim, 'rnn_type': self.gated_attentio_rnn_type, 'rnn_hdim': 2 * self.contextemb_rnn_hdim, 'gated_attention': dict_args['use_gating'] } self.use_bidirectional = dict_args['use_bidirectional'] self.gated_attention_layer_forward = MatchAttention( gated_attention_layer_args) self.gatedattn_dim = 2 * self.contextemb_rnn_hdim if self.use_bidirectional: self.gated_attention_layer_backward = MatchAttention( gated_attention_layer_args) self.gatedattn_dim = 4 * self.contextemb_rnn_hdim #######self matching layer self.use_selfmatching = dict_args['use_selfmatching'] self.selfmatching_dim = 0 if self.use_selfmatching: #self.projection_dim = 2*self.contextemb_rnn_hdim #Shared self.passage_projection_weights_v1 = nn.Parameter( torch.Tensor(self.projection_dim, self.gatedattn_dim)) #Shared self.passageprime_projection_weights_v = nn.Parameter( torch.Tensor(self.projection_dim, self.gatedattn_dim)) self.dotproduct_weights1 = nn.Parameter( torch.Tensor(self.projection_dim, 1)) #Shared stdv = 1.0 / math.sqrt( self.passageprime_projection_weights_v.size(-1)) self.passageprime_projection_weights_v.data.uniform_(-stdv, stdv) stdv = 1.0 / math.sqrt(self.passage_projection_weights_v1.size(-1)) self.passage_projection_weights_v1.data.uniform_(-stdv, stdv) stdv = 1.0 / math.sqrt(self.dotproduct_weights1.size(-1)) self.dotproduct_weights1.data.uniform_(-stdv, stdv) selfmatching_similarity_function_args = { 'projection_dim': self.projection_dim, 'sequence1_weights': self.passage_projection_weights_v1, 'sequence2_weights': self.passageprime_projection_weights_v, 'weights': self.dotproduct_weights1 } self.selfmatching_similarity_function_pointer = similarity.ProjectionSimilaritySharedWeights( selfmatching_similarity_function_args) self_matching_layer_args = { 'similarity_function': self.self_matching_similarity_function, 'similarity_function_pointer': self.selfmatching_similarity_function_pointer, 'sequence_dim': 2 * self.contextemb_rnn_hdim, 'projection_dim': 2 * self.contextemb_rnn_hdim } self.self_matching_layer = SelfAttention(self_matching_layer_args) self.selfmatching_dim = self.gatedattn_dim #######Gated layer self.gated_selfmatching = self.use_selfmatching and dict_args[ 'use_gating'] if self.gated_selfmatching: self.gate_dim = self.gatedattn_dim + self.selfmatching_dim self.selfmatchinggate = Gate({ 'sigmoidinputdim': self.gate_dim, 'gateinputdim': self.gate_dim }) #######modeling layer modeling_layer_args = { 'input_dim': self.gatedattn_dim + self.selfmatching_dim, 'rnn_hdim': self.modelinglayer_rnn_hdim, 'rnn_type': self.modelinglayer_rnn_type, 'num_layers': self.modelinglayer_num_layers, 'dropout_rate': 0 } self.modeling_layer = BiDirEncoder(modeling_layer_args) #######question attention layer self.question_query_vector = nn.Parameter( torch.Tensor(2 * self.contextemb_rnn_hdim)) stdv = 1.0 / math.sqrt(self.question_query_vector.size(-1)) self.question_query_vector.data.uniform_(-stdv, stdv) #self.projection_dim = 2*self.contextemb_rnn_hdim #Shared self.question_projection_weights_u2 = nn.Parameter( torch.Tensor(self.projection_dim, 2 * self.contextemb_rnn_hdim)) #Shared self.question_projection_weights_v = nn.Parameter( torch.Tensor(self.projection_dim, 2 * self.contextemb_rnn_hdim)) self.dotproduct_weights2 = nn.Parameter( torch.Tensor(self.projection_dim, 1)) #Shared stdv = 1.0 / math.sqrt(self.question_projection_weights_v.size(-1)) self.question_projection_weights_v.data.uniform_(-stdv, stdv) stdv = 1.0 / math.sqrt(self.question_projection_weights_u2.size(-1)) self.question_projection_weights_u2.data.uniform_(-stdv, stdv) stdv = 1.0 / math.sqrt(self.dotproduct_weights2.size(-1)) self.dotproduct_weights2.data.uniform_(-stdv, stdv) question_similarity_function_args = { 'projection_dim': self.projection_dim, 'sequence1_weights': self.question_projection_weights_u2, 'sequence2_weights': self.question_projection_weights_v, 'weights': self.dotproduct_weights2 } self.question_similarity_function_pointer = similarity.ProjectionSimilaritySharedWeights( question_similarity_function_args) question_attention_layer_args = { 'similarity_function': self.question_attention_similarity_function, 'similarity_function_pointer': self.question_similarity_function_pointer, 'sequence1_dim': 2 * self.contextemb_rnn_hdim, 'sequence2_dim': 2 * self.contextemb_rnn_hdim, 'projection_dim': 2 * self.contextemb_rnn_hdim } self.question_attention_layer = UniDirAttention( question_attention_layer_args) #######pointer network layer #self.projection_dim = 2*self.contextemb_rnn_hdim #Shared self.passage_projection_weights_h = nn.Parameter( torch.Tensor(self.projection_dim, 2 * self.contextemb_rnn_hdim)) self.decoder_projection_weights_h = nn.Parameter( torch.Tensor(self.projection_dim, 2 * self.contextemb_rnn_hdim)) self.dotproduct_weights3 = nn.Parameter( torch.Tensor(self.projection_dim, 1)) #Shared stdv = 1.0 / math.sqrt(self.passage_projection_weights_h.size(-1)) self.passage_projection_weights_h.data.uniform_(-stdv, stdv) stdv = 1.0 / math.sqrt(self.decoder_projection_weights_h.size(-1)) self.decoder_projection_weights_h.data.uniform_(-stdv, stdv) stdv = 1.0 / math.sqrt(self.dotproduct_weights3.size(-1)) self.dotproduct_weights3.data.uniform_(-stdv, stdv) pointer_similarity_function_args = { 'projection_dim': self.projection_dim, 'sequence1_weights': self.passage_projection_weights_h, 'sequence2_weights': self.decoder_projection_weights_h, 'weights': self.dotproduct_weights3 } self.pointer_similarity_function_pointer = similarity.ProjectionSimilaritySharedWeights( pointer_similarity_function_args) pointer_network_layer_args = { 'similarity_function': self.pointer_network_similarity_function, 'similarity_function_pointer': self.pointer_similarity_function_pointer, 'sequence_dim': 2 * self.contextemb_rnn_hdim, 'projection_dim': 2 * self.contextemb_rnn_hdim, 'rnn_type': self.pointer_network_rnn_type, 'rnn_hdim': 2 * self.contextemb_rnn_hdim, } self.pointer_network_layer = PointerNetwork(pointer_network_layer_args)
def build_generator(): input_cond = Input((3, )) embed_cond = Dense(128, activation='tanh')(input_cond) input_img = Input((None, None, IMG_CHAN)) # encoder d1 = CoordinateChannel2D()(input_img) d1 = residual_block(d1, (8, 8, 32), True) # 1/1 320 d1 = residual_block(d1, (8, 8, 32)) d2 = residual_block_downscaling(d1, (16, 16, 64)) # 1/2 160 d2 = residual_block(d2, (16, 16, 64)) d2 = residual_block(d2, (16, 16, 64)) d3 = residual_block_downscaling(d2, (32, 32, 128)) # 1/4 80 d3 = residual_block(d3, (32, 32, 128)) d3 = residual_block(d3, (32, 32, 128)) d4 = residual_block_downscaling(d3, (64, 64, 256)) # 1/8 40 d4 = residual_block(d4, (64, 64, 256)) d4 = residual_block(d4, (64, 64, 256)) d5 = residual_block_downscaling(d4, (64, 64, 256)) # 1/16 20 d5 = residual_block(d5, (64, 64, 256)) d5 = residual_block(d5, (64, 64, 256)) # bottleneck d6 = residual_block_downscaling(d5, (128, 128, 512)) # 1/32 10 d6 = residual_block(d6, (128, 128, 512)) d6 = residual_block(d6, (128, 128, 512)) d6 = filmed_residual_block(embed_cond, CoordinateChannel2D()(d6), 512) d6 = residual_block(d6, (128, 128, 512)) d6 = residual_block(d6, (128, 128, 512)) d6 = residual_block(d6, (128, 128, 512)) d6 = residual_block(d6, (128, 128, 512)) d6 = SelfAttention()(d6) # decoder u1 = residual_block_upscaling(d6, (64, 64, 256)) # 20 u1 = CoordinateChannel2D()(Concatenate()([u1, se(u1, d5)])) u1 = filmed_residual_block(embed_cond, u1, 256) u1 = residual_block(u1, (64, 64, 256)) u1 = residual_block(u1, (64, 64, 256)) u1 = SelfAttention()(u1) s1 = UpSampling2D(16)(Conv2D(1, 1, activation='tanh')(u1)) u2 = residual_block_upscaling(u1, (64, 64, 256)) # 40 u2 = CoordinateChannel2D()(Concatenate()([u2, se(u2, d4)])) u2 = filmed_residual_block(embed_cond, u2, 256) u2 = residual_block(u2, (64, 64, 256)) u2 = residual_block(u2, (64, 64, 256)) u2 = SelfAttention()(u2) u3 = residual_block_upscaling(u2, (32, 32, 128)) # 80 u3 = CoordinateChannel2D()(Concatenate()([u3, se(u3, d3)])) u3 = filmed_residual_block(embed_cond, u3, 128) u3 = residual_block(u3, (32, 32, 128)) u3 = residual_block(u3, (32, 32, 128)) u3 = SelfAttention()(u3) s2 = UpSampling2D(4)(Conv2D(1, 1, activation='tanh')(u3)) u4 = residual_block_upscaling(u3, (16, 16, 64)) # 160 u4 = CoordinateChannel2D()(Concatenate()([u4, se(u4, d2)])) u4 = filmed_residual_block(embed_cond, u4, 64) u4 = residual_block(u4, (16, 16, 64)) u4 = residual_block(u4, (16, 16, 64)) u4 = SelfAttention()(u4) u5 = residual_block_upscaling(u4, (8, 8, 32)) # 320 u5 = CoordinateChannel2D()(Concatenate()([u5, se(u5, d1)])) u5 = filmed_residual_block(embed_cond, u5, 32) u5 = residual_block(u5, (8, 8, 32)) u5 = residual_block(u5, (8, 8, 32)) u6 = residual_block(u5, (4, 4, 16), True) u6 = residual_block(u6, (4, 4, 16)) u6 = residual_block(u6, (4, 4, 16)) output_img = Conv2D(1, 1, activation='tanh')(u6) return Model([input_cond, input_img], [output_img, s1, s2])
def __init__(self, training=True): super(VGGATTModel, self).__init__() # block 1 self.block1_conv1 = Conv2D( 64, (3, 3), activation='relu', padding='same', name='block1_conv1', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block1_conv2 = Conv2D( 64, (3, 3), activation='relu', padding='same', name='block1_conv2', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block1_pool = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool') self.block1_batch_norm = BatchNormalization(name='block1_batch_norm') # block 2 self.block2_conv1 = Conv2D( 128, (3, 3), activation='relu', padding='same', name='block2_conv1', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block2_conv2 = Conv2D( 128, (3, 3), activation='relu', padding='same', name='block2_conv2', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block2_pool = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool') self.block2_batch_norm = BatchNormalization(name='block2_batch_norm') # Block 3 self.block3_conv1 = Conv2D( 256, (3, 3), activation='relu', padding='same', name='block3_conv1', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block3_conv2 = Conv2D( 256, (3, 3), activation='relu', padding='same', name='block3_conv2', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block3_conv3 = Conv2D( 256, (3, 3), activation='relu', padding='same', name='block3_conv3', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block3_pool = MaxPooling2D((2, 2), strides=(1, 2), name='block3_pool') self.block3_batch_norm = BatchNormalization(name='block3_batch_norm') # Block 4 self.block4_conv1 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block4_conv1', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block4_conv2 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block4_conv2', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block4_conv3 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block4_conv3', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block4_pool = MaxPooling2D((2, 2), strides=(1, 2), name='block4_pool') self.block4_batch_norm = BatchNormalization(name='block4_batch_norm') # Block 5 self.blcok5_conv1 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block5_conv1', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block5_conv2 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block5_conv2', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block5_conv3 = Conv2D( 512, (3, 3), activation='relu', padding='same', name='block5_conv3', kernel_regularizer=tf.keras.regularizers.l2(0.0001)) self.block5_pool = MaxPooling2D((1, 2), strides=(1, 2), name='block5_pool') self.block5_batch_norm = BatchNormalization(name='block5_batch_norm') # Block 6 self.block6_reshape = Reshape(target_shape=(-1, 512)) self.self_attention1 = SelfAttention(name='attention') # Block 7 self.block7_prediction = Dense(units=4651, kernel_initializer='he_normal', name='ctc_y') self.training = training if not training: self.block7_softmax_pred = Activation('softmax', name='softmax')
def __init__(self, i_norm=True, noise=False): super(Net, self).__init__() self.noise = noise self.normalization_layer = nn.InstanceNorm2d if i_norm else nn.BatchNorm2d self.block_0 = PartialConv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3, multi_channel=True, return_mask=True) self.block_1 = PartialConv2d(in_channels=64, out_channels=128, kernel_size=5, stride=2, padding=2, multi_channel=True, return_mask=True) self.norm_1 = self.normalization_layer(num_features=128) self.block_2 = PartialConv2d(in_channels=128, out_channels=256, kernel_size=5, stride=1, padding=2, multi_channel=True, return_mask=True) self.norm_2 = self.normalization_layer(num_features=256) self.block_3 = PartialConv2d(in_channels=256, out_channels=128, kernel_size=5, stride=2, padding=2, multi_channel=True, return_mask=True) self.norm_3 = self.normalization_layer(num_features=128) self.dilated_block_1 = PartialConv2d(in_channels=128, out_channels=128, kernel_size=3, padding=2, dilation=2, multi_channel=True, return_mask=True) self.dilated_block_2 = PartialConv2d(in_channels=128, out_channels=128, kernel_size=3, padding=4, dilation=4, multi_channel=True, return_mask=True) self.dilated_block_3 = PartialConv2d(in_channels=128, out_channels=128, kernel_size=3, padding=8, dilation=8, multi_channel=True, return_mask=True) self.dilated_block_4 = PartialConv2d( in_channels=128, out_channels=128, kernel_size=3, padding=2, dilation=2, stride=1, multi_channel=True) # decrease spatial dimension?? self.dilated_norm_1 = self.normalization_layer(num_features=128) self.dilated_norm_2 = self.normalization_layer(num_features=128) self.dilated_norm_3 = self.normalization_layer(num_features=128) self.dilated_norm_4 = self.normalization_layer(num_features=128) # self.s_attention_5 = SelfAttention(in_channels=128) # self.block_5 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1) # self.norm_5 = self.normalization_layer(num_features=128) self.s_attention_6 = SelfAttention(in_channels=128) self.block_6 = nn.Conv2d(in_channels=256, out_channels=128, kernel_size=3, padding=1) self.norm_6 = self.normalization_layer(num_features=128) self.s_attention_7 = SelfAttention(in_channels=384) self.block_7 = nn.Conv2d(in_channels=384, out_channels=128, kernel_size=3, padding=1) self.norm_7 = self.normalization_layer(num_features=128) self.block_8 = nn.Conv2d(in_channels=192, out_channels=64, kernel_size=3, padding=1) self.norm_8 = self.normalization_layer(num_features=64) self.block_9 = nn.Conv2d(in_channels=67, out_channels=3, kernel_size=3, padding=1) self.noise_layer = GaussianNoise() # noise?? self.upsample = nn.UpsamplingNearest2d(scale_factor=2.0)
Y_test) = imdb.load_data(num_words=vocabulary_size) # Pad & truncate sequences to fixed sequence length X_train = pad_sequences(sequences=X_train, maxlen=sequence_length) X_test = pad_sequences(sequences=X_test, maxlen=sequence_length) # Create word-level binary sentiment classification model # Input Layer X = Input(shape=(sequence_length, ), batch_size=batch_size) # Word-Embedding Layer embedded = Embedding(input_dim=vocabulary_size, output_dim=embedding_dims)(X) # Optional Self-Attention Mechanisms if config == 1: embedded, attention_weights = SelfAttention( size=50, num_hops=6, use_penalization=False)(embedded) elif config == 2: embedded, attention_weights = SelfAttention( size=50, num_hops=6, use_penalization=True, penalty_coefficient=0.1)(embedded) # Multi-Layer Perceptron embedded_flattened = Flatten()(embedded) fully_connected = Dense(units=250, activation='relu')(embedded_flattened) # Prediction Layer Y = Dense(units=1, activation='sigmoid')(fully_connected) # Compile model model = Model(inputs=X, outputs=Y) model.compile(loss='binary_crossentropy',
def _build_userencoder(self, his_input_title, his_input_segment, titleencoder, entityencoder, contextencoder): """The main function to create user encoder of NRMS. Args: titleencoder(obj): the news encoder of NRMS. Return: obj: the user encoder of NRMS. """ hparams = self.hparams # his_input_title = keras.Input( # shape=(hparams.his_size, hparams.title_size), dtype="int32", name='ue_his_input_title' # ) # # his_input_segment = keras.Input( # shape=(hparams.his_size, hparams.title_size), dtype="int32", name='ue_his_input_segment' # ) embedded_sequences_title = layers.TimeDistributed(self.bert_model)( his_input_title) # TODO shape可能有问题 (-1, 50,30,768) embedded_sequences_title = keras.layers.Reshape( (hparams['his_size'], hparams['title_size'], 768), name='embedded_sequences_title_reshape')(embedded_sequences_title) click_title_presents = layers.TimeDistributed( titleencoder, name='news_time_distributed')(embedded_sequences_title) # y = SelfAttention(hparams['head_num'], hparams['head_dim'], seed=self.seed)( # [click_title_presents] * 3 # ) y = MultiHeadAttention(hparams['head_num'], hparams['head_dim'])([click_title_presents] * 3) if entityencoder is not None: his_input_title_entity = keras.Input(shape=(hparams['his_size'], hparams['title_size']), dtype="int32", name='his_input_title_entity') click_title_entity_presents = layers.TimeDistributed( entityencoder, name='entity_time_distributed')(his_input_title_entity) entity_y = SelfAttention(hparams['head_num'], hparams['head_dim'], seed=self.seed)( [click_title_entity_presents] * 3) if contextencoder is not None: click_title_context_presents = layers.TimeDistributed( contextencoder, name='context_time_distributed')(his_input_title_entity) context_y = SelfAttention( hparams['head_num'], hparams['head_dim'], seed=self.seed)([click_title_context_presents] * 3) y = layers.Concatenate()([y, entity_y, context_y]) else: y = layers.Concatenate()([y, entity_y]) user_present = AttLayer2(hparams['attention_hidden_dim'], seed=self.seed)(y) if entityencoder is not None: model = keras.Model( inputs=[his_input_title, his_input_title_entity], outputs=user_present, name="user_encoder") else: model = keras.Model(his_input_title, user_present, name="user_encoder") return model