def __init__(self, batch_size, image_size=64, z_dim=100, conv_dim=64): super(Generator, self).__init__() self.imsize = image_size repeat_num = int(np.log2(self.imsize)) - 3 mult = 2**repeat_num # 8 self.l1 = nn.Sequential( SpectralNorm(nn.ConvTranspose2d(z_dim, conv_dim * mult, 4)), nn.BatchNorm2d(conv_dim * mult), nn.ReLU()) curr_dim = conv_dim * mult self.l2 = nn.Sequential( SpectralNorm(nn.ConvTranspose2d(curr_dim, curr_dim // 2, 4, 2, 1)), nn.BatchNorm2d(curr_dim // 2), nn.ReLU()) curr_dim //= 2 self.l3 = nn.Sequential( SpectralNorm(nn.ConvTranspose2d(curr_dim, curr_dim // 2, 4, 2, 1)), nn.BatchNorm2d(curr_dim // 2), nn.ReLU()) if self.imsize == 64: curr_dim //= 2 self.l4 = nn.Sequential( SpectralNorm( nn.ConvTranspose2d(curr_dim, curr_dim // 2, 4, 2, 1)), nn.BatchNorm2d(curr_dim // 2), nn.ReLU()) curr_dim //= 2 self.last = nn.Sequential(nn.ConvTranspose2d(curr_dim, 3, 4, 2, 1), nn.Tanh()) self.attn1 = SelfAttention(128, 'relu') self.attn2 = SelfAttention(64, 'relu')
def __init__(self, config: Config, checkpoint_path: Optional[str] = None): super(Model, self).__init__() regularizer = keras.regularizers.l2( config.embedding_regularization_coef) if config.use_pretrained_embeddings: weights = _load_character_embeddings() self.embedding_layer = Embedding(config.vocab_size, config.embedding_size, weights=[weights], trainable=False, mask_zero=True) else: self.embedding_layer = Embedding( config.vocab_size, config.embedding_size, embeddings_regularizer=regularizer, mask_zero=True) dense_regularizer = keras.regularizers.l2( config.dense_regularization_coef) if config.use_word_level_embeddings: if checkpoint_path is None: weights = _load_word_embeddings(config.glove_vocab_size)[1] embed_init = Constant(weights) else: embed_init = None self.word_embedding_layer = Embedding( config.glove_vocab_size + 1, 300, embeddings_initializer=embed_init, trainable=False, mask_zero=True) self.word_embedding_dropout = Dropout(config.dense_dropout) self.word_embbedding_attention = SelfAttention(2, 64) self.word_dense_h_1 = Dense(config.lstm_size, activation='relu', kernel_regularizer=dense_regularizer) self.word_dense_h_2 = Dense(config.lstm_size, activation='tanh', kernel_regularizer=dense_regularizer) self.word_dense_c_1 = Dense(config.lstm_size, activation='relu', kernel_regularizer=dense_regularizer) self.word_dense_c_2 = Dense(config.lstm_size, activation='tanh', kernel_regularizer=dense_regularizer) self.recurrent_layer = LSTM(config.lstm_size, recurrent_dropout=config.lstm_dropout, return_state=not config.use_attention, return_sequences=config.use_attention) if config.use_attention: self.attention_layer = SelfAttention(config.attention_num_heads, config.attention_head_size) self.attention_dropout = Dropout(config.dense_dropout) self.dense_layer = Dense(config.num_classes * 8, kernel_regularizer=dense_regularizer, activation='relu') self.output_layer = Dense(config.num_classes, activation=tf.nn.softmax) self.config = config if checkpoint_path is not None: self.load_weights(checkpoint_path)
def __init__(self, batch_size=64, image_size=64, conv_dim=64): super(Discriminator, self).__init__() self.imsize = image_size self.l1 = nn.Sequential(SpectralNorm(nn.Conv2d(3, conv_dim, 4, 2, 1)), nn.LeakyReLU(0.1)) curr_dim = conv_dim self.l2 = nn.Sequential( SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)), nn.LeakyReLU(0.1)) curr_dim *= 2 self.l3 = nn.Sequential( SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)), nn.LeakyReLU(0.1)) curr_dim *= 2 if self.imsize == 64: self.l4 = nn.Sequential( SpectralNorm(nn.Conv2d(curr_dim, curr_dim * 2, 4, 2, 1)), nn.LeakyReLU(0.1)) curr_dim *= 2 self.last = nn.Sequential(nn.Conv2d(curr_dim, 1, 4)) self.attn1 = SelfAttention(256, 'relu') self.attn2 = SelfAttention(512, 'relu')
def __init__(self): super(Discriminator, self).__init__() self.conv1 = SpectralNorm( nn.Conv2d(channels, 64, 3, stride=1, padding=(2, 2))) self.conv2 = SpectralNorm( nn.Conv2d(64, 64, 4, stride=2, padding=(1, 1))) self.conv3 = SpectralNorm( nn.Conv2d(64, 128, 3, stride=1, padding=(1, 1))) self.conv4 = SpectralNorm( nn.Conv2d(128, 128, 4, stride=2, padding=(1, 1))) self.conv5 = SpectralNorm( nn.Conv2d(128, 256, 3, stride=1, padding=(1, 1))) self.conv6 = SpectralNorm( nn.Conv2d(256, 256, 4, stride=2, padding=(1, 1))) self.attention_size = 32 self.att = SelfAttention(256, self.attention_size) self.att_post = SelfAttentionPost(256, self.attention_size) self.conv7 = SpectralNorm( nn.Conv2d(256, 512, 3, stride=1, padding=(1, 1))) self.embed = SpectralNorm(nn.Linear(num_classes, w_g * w_g * 512)) self.fc = SpectralNorm(nn.Linear(w_g * w_g * 512, 1))
def __init__(self, embed_size, heads, forward_expansion, dropout, device): super(DecoderBlock, self).__init__() self.attention = SelfAttention(embed_size, heads) self.norm = nn.LayerNorm(embed_size) self.transformer_block = TransformerBlock( embed_size, heads, dropout, forward_expansion) self.dropout = nn.Dropout(dropout)
def __init__(self, image_size=64, z_dim=100, conv_dim=64): # no use cause network needs to be build first and can not change duing forward super(Generator, self).__init__() self.imsize = image_size self.watch_list1 = [0] # a list used to store attention map self.watch_list2 = [0] layers = [] repeat_num = int(np.log2(self.imsize)) - 3 # 3 mult = 2**repeat_num # 8; multiplier to conv_dim curr_dim = z_dim # initial dim equals z_dim tar_dim = conv_dim * mult # initial tar_dim layers.append( SpectralNorm(nn.ConvTranspose2d(curr_dim, conv_dim * mult, 4))) layers.append(nn.BatchNorm2d(conv_dim * mult)) # batch norm before none-linearity layers.append(nn.ReLU()) curr_dim = tar_dim tar_dim = int(tar_dim / 2) for i in range(repeat_num): layers.append( SpectralNorm(nn.ConvTranspose2d(curr_dim, tar_dim, 4, 2, 1))) # transpose layers.append(nn.BatchNorm2d(tar_dim)) layers.append(nn.ReLU()) curr_dim = tar_dim tar_dim = int(tar_dim / 2) if curr_dim == 64: self.attn1 = SelfAttention(64, self.watch_list1) layers.append(self.attn1) if curr_dim == 128: self.attn2 = SelfAttention(128, self.watch_list2) layers.append(self.attn2) layers.append( nn.ConvTranspose2d(curr_dim, 3, kernel_size=4, stride=2, padding=1)) layers.append(nn.Tanh()) self.main = nn.Sequential(*layers)
def __init__(self, vocab_size, embed_size, hidden_size, slot_size, intent_size, dropout=0.3, pad_idx=0): super(SDEN, self).__init__() self.pad_idx = 0 self.embed = nn.Embedding(vocab_size, embed_size, padding_idx=self.pad_idx) self.bigru_m = nn.GRU(embed_size, hidden_size, batch_first=True, bidirectional=True) self.bigru_c = nn.GRU(embed_size, hidden_size, batch_first=True, bidirectional=True) self.context_encoder = nn.Sequential( nn.Linear(hidden_size * 4, hidden_size * 2), nn.Sigmoid()) self.Att = Attn('concat', hidden_size) self.context_encoder1 = nn.Sequential( nn.Linear(hidden_size * 8, hidden_size * 2), nn.Sigmoid()) self.session_encoder = nn.GRU(hidden_size * 2, hidden_size * 2, batch_first=True, bidirectional=True) self.decoder_1 = nn.GRU(embed_size, hidden_size * 2, batch_first=True, bidirectional=True) self.decoder_2 = nn.LSTM(hidden_size * 4, hidden_size * 2, batch_first=True, bidirectional=True) self.intent_linear = nn.Linear(hidden_size * 4, intent_size) self.slot_linear = nn.Linear(hidden_size * 4, slot_size) self.dropout = nn.Dropout(dropout) self.attention = SelfAttention(hidden_size) self.att = SelfA(hidden_size) self.hidden_size = hidden_size # self.att = Attn('concat', 64) for param in self.parameters(): if len(param.size()) > 1: nn.init.xavier_uniform_(param) else: param.data.zero_()
def get_attn_layer(self, self_attn): attn_layer = None if self.attns_mode == SelfAttention: if self_attn > 0: attn_layer = SelfAttention(self_attn) if self.attns_mode == GoogleAttention: if self_attn != None: attn_layer = GoogleAttention(self_attn) return attn_layer
def __init__(self, k, heads, mask=False): super().__init__() self.attention = SelfAttention(k, heads=heads) self.norm1 = nn.LayerNorm(k) self.norm2 = nn.LayerNorm(k) self.ff = nn.Sequential(nn.Linear(k, 4 * k), nn.ReLU(), nn.Linear(4 * k, k)) self.mask = mask
def __init__(self, embed_size, heads, dropout, forward_expansion): super(TransformerBlock, self).__init__() self.attention = SelfAttention(embed_size, heads) self.norm1 = nn.LayerNorm(embed_size) self.norm2 = nn.LayerNorm(embed_size) self.feed_forward = nn.Sequential( nn.Linear(embed_size, forward_expansion * embed_size), nn.ReLU(), nn.Linear(forward_expansion * embed_size, embed_size) ) self.dropout = nn.Dropout(dropout)
def __init__(self, conv_dim=64, watch_on=False): super(Discriminator, self).__init__() self.watch_list1 = [0] self.watch_list2 = [0] layers = [] curr_dim = 3 # initial dim equals z_dim tar_dim = conv_dim # initial tar_dim for i in range(4): layers.append(SpectralNorm(nn.Conv2d(curr_dim, tar_dim, 4, 2, 1))) layers.append(nn.BatchNorm2d(tar_dim)) layers.append(nn.LeakyReLU(0.1)) curr_dim = tar_dim tar_dim = curr_dim * 2 if curr_dim == 256: layers.append(SelfAttention(256, self.watch_list1)) if curr_dim == 512: layers.append(SelfAttention(512, self.watch_list2)) layers.append(nn.Conv2d(curr_dim, 1, 4)) self.main = nn.Sequential(*layers)
def _self_attention(self): """ Add the self_attention to the result of the fuse, the fuse_p_encode's size is (batch_size, time, 2*dim) """ dim = self.fuse_p_encodes.shape[-1] atten_layer = SequenceMapperSeq(VariationalDropoutLayer(0.8), ResidualLayer(SequenceMapperSeq( SelfAttention(attention=TriLinear(bias=True), merge=ConcatWithProduct()), FullyConnected(dim, activation="relu") )), VariationalDropoutLayer(0.8) ) self.fuse_p_encodes = atten_layer.apply(self.is_train, self.fuse_p_encodes, self.p_length)
def conv_block(input_tensor, kernel_size, filters, stage, block, strides=2): """conv_block is the block that has a conv layer at shortcut # Arguments input_tensor: input tensor kernel_size: defualt 3, the kernel size of middle conv layer at main path filters: list of integers, the filterss of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names # Returns Output tensor for the block. Note that from stage 3, the first conv layer at main path is with strides=(2,2) And the shortcut should have strides=(2,2) as well """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) # x = Conv2D(filters2, kernel_size, padding='same', # name=conv_name_base + '2b')(x) x = SelfAttention(filters2, kernel_size, 8)(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) if strides != 1: x = AveragePooling2D((2, 2), strides=strides, padding='same')(x) shortcut = Conv2D(filters3, (1, 1), strides=strides, name=conv_name_base + '1')(input_tensor) shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) x = layers.add([x, shortcut]) x = Activation('relu')(x) return x
def __init__(self): super(Discriminator, self).__init__() self.first = FirstResBlockDiscriminator(channels, DISC_SIZE, stride=2) self.block1 = ResBlockDiscriminator(DISC_SIZE, DISC_SIZE, stride=2) self.block2 = ResBlockDiscriminator(DISC_SIZE, DISC_SIZE) self.attention_size = 16 self.att = SelfAttention(128, self.attention_size) self.att_post = SelfAttentionPost(128, self.attention_size) self.block3 = ResBlockDiscriminator(DISC_SIZE, DISC_SIZE) self.pool = nn.AvgPool2d(8) self.fc = nn.Linear(DISC_SIZE, 1) nn.init.xavier_uniform_(self.fc.weight.data, 1.) self.fc = SpectralNorm(self.fc) self.embed = SpectralNorm(nn.Linear(num_classes, DISC_SIZE))
def identity_block(input_tensor, kernel_size, filters, stage, block): """The identity block is the block that has no conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: defualt 3, the kernel size of middle conv layer at main path filters: list of integers, the filterss of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names # Returns Output tensor for the block. """ filters1, filters2, filters3 = filters if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) # x = Conv2D(filters2, kernel_size, # padding='same', name=conv_name_base + '2b')(x) x = SelfAttention(filters2, kernel_size, 8)(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) x = layers.add([x, input_tensor]) x = Activation('relu')(x) return x
l2_regularization = 0.001 learning_rate = 0.01 n_x = 32 epochs = 20 time_steps = MAX_LENGTH # Build model print("Build model...") sequence_input = Input(shape=(time_steps, ), dtype='float32') print('Sequence input is:', sequence_input) # (batch_size, time_steps=500) embedded_sequences = embedding_layer(sequence_input) print('Embedding layer is:', embedded_sequences) # (batch_size, time_steps=500, embedding_dim=25) # Self attention self_att = SelfAttention( 8, 16)([embedded_sequences, embedded_sequences, embedded_sequences]) L = Bidirectional( GRU(n_x, activation='tanh', dropout=0.2, recurrent_dropout=0.1, return_sequences=True, kernel_initializer='he_uniform', name='Pre-BiGRU'))(self_att) print('Bi-GRU is:', L) # (batch_size, time_steps, units=32*2) '''''' '''''' ''' Original attention: ''' '''''' ''' L = __attention3DBlock__(L) # (batch_size, time_steps=500, units=32*2) print ('Attention layer is:', L)
def train(): with tf.device('/cpu:0'): x_text, y = data_helpers.load_data_and_labels(FLAGS.train_dir) # Build vocabulary # Example: x_text[3] = "A misty ridge uprises from the surge." # ['a misty ridge uprises from the surge <UNK> <UNK> ... <UNK>'] # => # [27 39 40 41 42 1 43 0 0 ... 0] # dimension = FLAGS.max_sentence_length vocab_processor = tf.contrib.learn.preprocessing.VocabularyProcessor(FLAGS.max_sentence_length) x = np.array(list(vocab_processor.fit_transform(x_text))) print("Text Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("x = {0}".format(x.shape)) print("y = {0}".format(y.shape)) print("") # Randomly shuffle data np.random.seed(10) shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] # Split train/test set # TODO: This is very crude, should use cross-validation dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y))) x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:] y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:] print("Train/Dev split: {:d}/{:d}\n".format(len(y_train), len(y_dev))) with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): model = SelfAttention( sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, hidden_size=FLAGS.hidden_size, d_a_size=FLAGS.d_a_size, r_size=FLAGS.r_size, fc_size=FLAGS.fc_size, p_coef=FLAGS.p_coef ) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) train_op = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize(model.loss, global_step=global_step) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", model.loss) acc_summary = tf.summary.scalar("accuracy", model.accuracy) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, acc_summary]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) # Pre-trained word2vec if FLAGS.word2vec: # initial matrix with random uniform initW = np.random.uniform(-0.25, 0.25, (len(vocab_processor.vocabulary_), FLAGS.embedding_dim)) # load any vectors from the word2vec print("Load word2vec file {0}".format(FLAGS.word2vec)) with open(FLAGS.word2vec, "rb") as f: header = f.readline() vocab_size, layer1_size = map(int, header.split()) binary_len = np.dtype('float32').itemsize * layer1_size for line in range(vocab_size): word = [] while True: ch = f.read(1).decode('latin-1') if ch == ' ': word = ''.join(word) break if ch != '\n': word.append(ch) idx = vocab_processor.vocabulary_.get(word) if idx != 0: initW[idx] = np.fromstring(f.read(binary_len), dtype='float32') else: f.read(binary_len) sess.run(model.W_text.assign(initW)) print("Success to load pre-trained word2vec model!\n") # Generate batches batches = data_helpers.batch_iter( list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) # Train feed_dict = { model.input_text: x_batch, model.input_y: y_batch } _, step, summaries, loss, accuracy = sess.run( [train_op, global_step, train_summary_op, model.loss, model.accuracy], feed_dict) train_summary_writer.add_summary(summaries, step) # Training log display if step % FLAGS.display_every == 0: time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) # Evaluation if step % FLAGS.evaluate_every == 0: print("\nEvaluation:") # Generate batches batches_dev = data_helpers.batch_iter( list(zip(x_dev, y_dev)), FLAGS.batch_size, 1) # Evaluation loop. For each batch... loss_dev = 0 accuracy_dev = 0 cnt = 0 for batch_dev in batches_dev: x_batch_dev, y_batch_dev = zip(*batch_dev) feed_dict_dev = { model.input_text: x_batch_dev, model.input_y: y_batch_dev } summaries_dev, loss, accuracy = sess.run( [dev_summary_op, model.loss, model.accuracy], feed_dict_dev) dev_summary_writer.add_summary(summaries_dev, step) loss_dev += loss accuracy_dev += accuracy cnt += 1 time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss_dev / cnt, accuracy_dev / cnt)) # Model checkpoint if step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=step) print("Saved model checkpoint to {}\n".format(path))
def ResNet50(include_top=True, input_shape=(224, 224, 3), pooling=None, classes=1000, stem="SA", repeat=[1, 2, 4, 1]): """Instantiates the ResNet50 architecture. Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. # Arguments include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or `(3, 224, 244)` (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 197. E.g. `(200, 200, 3)` would be one valid value. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ img_input = Input(input_shape) if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 if stem == 'conv': x = Conv2D(64, (7, 7), strides=(1, 1), name='conv1', padding="same")(img_input) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(1, 1))(x) elif stem == 'SA': x = SelfAttention(hidden_dim=64, k_size=4, Nh=1, strides=1, padding='SAME', m_for_stem=4)(img_input) #x = MaxPooling2D((4, 4), strides=(4, 4))(x) x = conv_block(x, 7, [64, 64, 256], stage=2, block=a, strides=1) for i in range(repeat[0]): x = identity_block(x, 7, [64, 64, 256], stage=2, block=chr(98 + i)) x = conv_block(x, 7, [128, 128, 512], stage=3, block='a') for i in range(repeat[1]): x = identity_block(x, 7, [64, 64, 256], stage=3, block=chr(98 + i)) x = conv_block(x, 7, [256, 256, 1024], stage=4, block='a') for i in range(repeat[2]): x = identity_block(x, 7, [64, 64, 256], stage=4, block=chr(98 + i)) x = conv_block(x, 7, [512, 512, 2048], stage=5, block='a') for i in range(repeat[3]): x = identity_block(x, 7, [512, 512, 2048], stage=5, block=chr(98 + i)) x = AveragePooling2D((4, 4), name='avg_pool')(x) if include_top: x = Flatten()(x) x = Dense(classes, activation='softmax', name='fc1000')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) inputs = img_input # Create model. model = Model(inputs, x, name='resnet50') return model
def build_model(word_index): embedding_matrix = get_embedding_matrix(word_index) print('Building model...') embedding_layer = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], trainable=True) # model # ------ news encoder ------- title_input = Input(shape=(MAX_TITLE_LENGTH, ), dtype='int32') title_embedded_sequences = embedding_layer(title_input) title_embedded_sequences = Dropout(0.2)(title_embedded_sequences) title_selfattention = SelfAttention(16, 16)([ title_embedded_sequences, title_embedded_sequences, title_embedded_sequences ]) title_selfattention = Dropout(0.2)(title_selfattention) news_r = Attention(200)(title_selfattention) news_encoder = Model([title_input], news_r, name='news_encoder') # from tensorflow.keras.utils import plot_model # plot_model(news_encoder, to_file='news_encoder.png', show_shapes=True) # ----- user encoder ----- browsed_title_input = Input(( MAX_BROWSED, MAX_TITLE_LENGTH, ), dtype='int32', name='b_t') browsed_news = TimeDistributed(news_encoder)(browsed_title_input) user_input = Input(( MAX_BROWSED, 256, ), name='user_input') user_r = SelfAttention(16, 16)([user_input, user_input, user_input]) user_r = Dropout(0.2)(user_r) user_r = Attention(200)(user_r) user_encoder = Model(user_input, user_r, name='user_encoder') train_user_r = user_encoder(browsed_news) test_user_r = Input((256, ), name='test_user_r') # ----- candidate_news ----- candidate_title_input = Input(( 1 + NEG_SAMPLE, MAX_TITLE_LENGTH, ), dtype='int32', name='c_t') candidate_r = TimeDistributed(news_encoder)(candidate_title_input) candidate_one_r = Input((256, ), name="c_t_1") # ----- click predictor ----- pred = Dot(axes=-1)([train_user_r, candidate_r]) pred = Activation(activation='softmax')(pred) model = Model([browsed_title_input, candidate_title_input], pred) pred_one = Dot(axes=-1)([test_user_r, candidate_one_r]) pred_one = Activation(activation='sigmoid')(pred_one) model_test = Model([test_user_r, candidate_one_r], pred_one) return news_encoder, user_encoder, model, model_test