def OHDecoder_SEQ(hidden_size, noise_layer, noise_param): model = tf.keras.Sequential() model.add(noise_layer(noise_param)) model.add(Dense(hidden_size, activation='relu')) model.add(LayerNormalization()) model.add(Dense(256, activation='softmax')) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[ber_metric_oh, 'acc']) return model
def __init__(self): super(ConvBlock, self).__init__() self.conv = Conv1D(filters=hparams.enc_conv_dim, kernel_size=5, padding='same', dilation_rate=1) self.speaker_proj = Dense(hparams.enc_conv_dim) self.ln = LayerNormalization() self.dropout = tf.keras.layers.Dropout(hparams.enc_conv_dropout)
def Encoder(hidden_sizes, use_BN=False, use_LN=False, use_WN=False, encoder_activation='tanh', hidden_activation='relu', enc_kernel_reg=None, enc_activity_reg=None, additional_layers=[]): inputs = tf.keras.Input(shape=(8, )) if use_WN: for i in range(len(hidden_sizes)): if i == 0: enc = WeightNormalization( Dense(hidden_sizes[i], activation=hidden_activation))(inputs) else: enc = WeightNormalization( Dense(hidden_sizes[i], activation=hidden_activation))(enc) if use_BN: enc = BatchNormalization()(enc) if use_LN: enc = LayerNormalization()(enc) else: for i in range(len(hidden_sizes)): if i == 0: enc = Dense(hidden_sizes[i], activation=hidden_activation)(inputs) else: enc = Dense(hidden_sizes[i], activation=hidden_activation)(enc) if use_BN: enc = BatchNormalization()(enc) if use_LN: enc = LayerNormalization()(enc) enc = Dense(16, activation=encoder_activation, kernel_regularizer=enc_kernel_reg, activity_regularizer=enc_activity_reg)(enc) for add_layer in additional_layers: enc = add_layer()(enc) model = tf.keras.Model(inputs, enc) return model
def __init__(self, hidden_size, num_attention_heads, attention_probs_dropout_prob, hidden_dropout_prob, intermediate_size, hidden_act, initializer_range, name="encoder_layer", **kwargs): super(Encoder, self).__init__(**kwargs) self.hidden_size = hidden_size self.num_attention_heads = num_attention_heads self.attention_probs_dropout_prob = attention_probs_dropout_prob self.hidden_dropout_prob = hidden_dropout_prob self.intermediate_size = intermediate_size self.hidden_act = hidden_act self.initializer_range = initializer_range self.attn_layer = AttentionLayer(hidden_size=hidden_size, num_attention_heads=num_attention_heads, attention_probs_dropout_prob=attention_probs_dropout_prob, initializer_range=initializer_range, name="attention_layer") self.attn_dense_layer = Dense(hidden_size, kernel_initializer=create_initializer(initializer_range), name="attention_dense_layer") self.attn_dropout_layer = Dropout(attention_probs_dropout_prob, name="attention_dropout_layer") self.attn_layerNorm_layer = LayerNormalization(name="attention_layerNorm_layer") self.inter_encoder_layer = Dense(intermediate_size, activation=hidden_act, kernel_initializer=create_initializer(initializer_range), name="intermediate_encoder_layer") self.inter_decoder_layer = Dense(hidden_size, kernel_initializer=create_initializer(initializer_range), name="intermediate_decoder_layer") self.inter_dropout_layer = Dropout(hidden_dropout_prob, name="intermediate_dropout_layer") self.inter_layerNorm_layer = LayerNormalization(name="intermediate_layerNorm_layer")
def __init__(self, heads=8, model_dim=512, units_dim=512, epsilon=0.001, drop_rate=0.2, **kwargs): self.heads = heads self.model_dim = model_dim self.multi_head_attention = MultiHeadAttention(self.heads, model_dim=model_dim, mode="encoder") self.ff_netword = FeedForwardNetwork(units_dim, model_dim) self.layer_norm1 = LayerNormalization(epsilon=epsilon) self.layer_norm2 = LayerNormalization(epsilon=epsilon) self.dropout1 = Dropout(drop_rate) self.dropout2 = Dropout(drop_rate) self.dropout3 = Dropout(drop_rate) super(EncoderLayer, self).__init__(**kwargs)
def __init__(self, model_dim=256, num_heads=8, num_encoder_layers=6, num_decoder_layers=6, dim_feedforward=2048, dropout=0.1, activation='relu', normalize_before=False, return_intermediate_dec=False, **kwargs): super().__init__(**kwargs) self.model_dim = model_dim self.num_heads = num_heads enc_norm = LayerNormalization(epsilon=1e-5, name='norm_pre') if normalize_before else None self.encoder = TransformerEncoder(model_dim, num_heads, dim_feedforward, dropout, activation, normalize_before, enc_norm, num_encoder_layers, name='encoder') dec_norm = LayerNormalization(epsilon=1e-5, name='norm') self.decoder = TransformerDecoder(model_dim, num_heads, dim_feedforward, dropout, activation, normalize_before, dec_norm, num_decoder_layers, name='decoder', return_intermediate=return_intermediate_dec)
def feedforward(self, inp): """ 1D convolution layer (temporal convolution) - This layer creates a convolution kernel that is convolved with the layer input over a single spatial (or temporal) dimension to produce a tensor of outputs. """ ff = Conv1D(self.d_model, 1, dilation_rate=1, use_bias=False)(inp) norm = LayerNormalization(axis=2, epsilon=1e-6)(ff) act = ReLU()(norm) return act
def __init__(self, d_model, num_heads=1, ffn_hidden_unit=128, dropout=0., norm_training=True, causality=True): """ Self Attention Block :param d_model: A scalar. The self-attention hidden size. :param num_heads: A scalar. Number of heads. :param ffn_hidden_unit: A scalar. Number of hidden unit in FFN :param dropout: A scalar. Number of dropout. :param norm_training: Boolean. If True, using layer normalization, default True :param causality: Boolean. If True, using causality, default True """ super(SelfAttentionBlock, self).__init__() self.mha = MultiHeadAttention(d_model, num_heads, causality) self.ffn = FFN(ffn_hidden_unit, d_model) self.layernorm1 = LayerNormalization(epsilon=1e-6, trainable=norm_training) self.layernorm2 = LayerNormalization(epsilon=1e-6, trainable=norm_training) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout)
def build(self, input_shape): # 멀티해드 어텐션이 2개가 들어감. 하나는 셀프 어텐션이 들어감 # self attention 은 mask를 필요로 함 self.self_attention = MultiHeadAttention(self.num_head, input_shape[0][-1], self.d_r, masked=True) self.layer_norm1 = LayerNormalization(input_shape=input_shape) # 멀티 어텐션은 마스크가 필요 없음 self.multi_attention = MultiHeadAttention(self.num_head, input_shape[0][-1], self.d_r) self.layer_norm2 = LayerNormalization(input_shape=input_shape) self.dense1 = Dense(input_shape[0][-1] * 4, input_shape=input_shape[0], activation='relu') self.dense2 = Dense(input_shape[0][-1], input_shape=self.dense1.compute_output_shape(input_shape[0])) # input_shape[0] 을 안해주면 Dimension value must be integer or None or have an __index__ method, got TensorShape([None, 65, 16]) 오류 발생 self.layer_norm3 = LayerNormalization(input_shape=input_shape) super().build(input_shape)
def wrap_residual_with_dropout(input_layer, name, NextLayer, dropout, epsilon, **kwargs): logger.debug( f'Adding layer "{name}" - {NextLayer.__name__} w/ residual: {kwargs}') next_layer = NextLayer(name=name, **kwargs)(input_layer) if dropout: next_layer = Dropout(rate=dropout, name=f"{name}_dropout")(next_layer) residual_layer = Add(name=f"{name}_res")([input_layer, next_layer]) return LayerNormalization(epsilon=epsilon, name=f"{name}_layernorm")(residual_layer)
def __init__( self, inp, n_outp, d_model, n_blocks, n_heads, warmup_steps, max_len, causal, outp_act, ): """ Argument/s: inp - input placeholder. n_outp - number of outputs. d_model - model size. n_blocks - number of blocks. n_heads - number of attention heads. warmup_steps - number of warmup steps. max_len - maximum length for positional encoding. causal - causal flag. outp_act - output activation function. """ self.n_outp = n_outp self.d_model = d_model self.n_blocks = n_blocks self.n_heads = n_heads self.d_ff = d_model * 4 self.max_len = max_len self.warmup_steps = warmup_steps self.d_k = self.d_model // self.n_heads att_mask = AttentionMaskV2(causal)(inp) x = Conv1D(self.d_model, 1, use_bias=False)(inp) x = LayerNormalization(axis=2, epsilon=1e-6, center=True, scale=True)(x) x = ReLU()(x) ## Add postitional encoding. position_idx = tf.tile([tf.range(tf.shape(x)[1])], [tf.shape(x)[0], 1]) positional_encoding = Embedding(self.max_len, self.d_model)(position_idx) x = Add()([x, positional_encoding]) for _ in range(self.n_blocks): x = self.block(x, att_mask) self.outp = Conv1D(self.n_outp, 1, use_bias=True)(x) if outp_act == "Sigmoid": self.outp = Activation('sigmoid')(self.outp) elif outp_act == "ReLU": self.outp = ReLU()(self.outp) elif outp_act == "Linear": self.outp = self.outp else: raise ValueError("Invalid outp_act")
def define_encoder_block(layer_in, n_filters, batchnorm=True): init = RandomNormal(stddev=0.02) g = Conv2D(n_filters, (4,4), strides=(2,2), padding='same', kernel_initializer=init)(layer_in) if batchnorm: g = LayerNormalization()(g, training=True) g = Activation('relu')(g) return g
def __init__(self, model_depth, num_heads, feed_forward_depth, dropout_rate=0.1): super(EncoderLayer, self).__init__() self.multi_headed_attention = MHA(model_depth, num_heads) self.pw_feedf_net_relu = Dense( feed_forward_depth, activation='relu') #First layer must have a ReLu self.pw_feedf_net_out = Dense( model_depth ) #Output of the point wise feed forward net that we are interested of self.layerNormalization1 = LayerNormalization(epsilon=1e-6) self.layerNormalization2 = LayerNormalization(epsilon=1e-6) self.dropout1 = Dropout(dropout_rate) self.dropout2 = Dropout(dropout_rate)
def __init__(self, units, emb_dim, head, dropout_rate): self.self_attention = MultiHeadAttention(emb_dim=emb_dim, head=head, name="attention_1") self.attention2 = MultiHeadAttention(emb_dim=emb_dim, head=head, name="attention_2") self.layernormalization = LayerNormalization(epsilon=1e-6) self.dropout = Dropout(rate=dropout_rate) self.ff1 = Dense(units=units, activation='relu') self.ff2 = Dense(units=emb_dim)
def addSmallModel(inputModel): inputModel.add(LayerNormalization()) inputModel.add(Flatten()) inputModel.add(Dense(1024, activation='relu', name='fc1')) inputModel.add(Dropout(0.5)) inputModel.add(Dense(256, activation='relu', name='fc2')) #inputModel.add(LayerNormalization()) inputModel.add(Dropout(0.5)) inputModel.add(Dense(total_classes, activation='softmax', name='fc3')) #inputModel.summary() return inputModel
def nvidia(): """ Implementation of Nvidia's End-to-End Learning model for Self-driving cars """ global X_train, y_train # Model Design inputs = Input(shape=(160,320,3)) cropped = Cropping2D(cropping=((64, 0), (0, 0)))(inputs) resized_input = Lambda(lambda image: tf.image.resize(image, (66,200)))(cropped) normalize_layer = LayerNormalization(axis=1)(resized_input) conv1 = Conv2D(filters=24, kernel_size=5, strides=(2,2), activation='relu')(normalize_layer) conv2 = Conv2D(filters=36, kernel_size=5, strides=(2,2), activation='relu')(conv1) conv3 = Conv2D(filters=48, kernel_size=5, strides=(2,2), activation='relu')(conv2) conv4 = Conv2D(filters=64, kernel_size=3, activation='relu')(conv3) conv5 = Conv2D(filters=64, kernel_size=3, activation='relu')(conv4) flatten = Flatten()(conv5) dense1 = Dense(100,activation='relu')(flatten) dense2 = Dense(50,activation='relu')(dense1) dense3 = Dense(10,activation='relu')(dense2) out = Dense(1, activation='linear')(dense3) # Specifications and training checkpoint = ModelCheckpoint(filepath="./ckpts/model_nvidia.h5", monitor='val_loss', save_best_only=True) stopper = EarlyStopping(monitor='val_loss', min_delta=0.0003, patience = 10) lr_schedule = ExponentialDecay(initial_learning_rate=0.0001, decay_steps=100000, decay_rate=0.95) optimizer = Adam(learning_rate=lr_schedule) loss = Huber(delta=0.5, reduction="auto", name="huber_loss") t2 = time() model = Model(inputs=inputs, outputs=out) model.compile(loss = loss, optimizer = optimizer) result = model.fit(X_train, y_train, validation_split = 0.2, shuffle = True, epochs = 100, callbacks=[checkpoint, stopper]) # Visualization of loss variations across epochs plt.plot(result.history['loss']) plt.plot(result.history['val_loss']) plt.title('Huber Loss') plt.ylabel('Loss') plt.xlabel('Epoch') plt.legend(['Training set', 'Validation set'], loc = 'upper right') plt.savefig('loss.png') plt.show() print("Time taken to train: {:.2f}s".format(time()-t2)) model.load_weights('./ckpts/model_nvidia.h5') model.save('model.h5')
def build(self, input_classes, output_classes): self.embedding = Embedding(input_classes, self.feature_maps, name="embedding") self.start_conv = Conv2D(self.feature_maps, self.kernel_size, padding="same") self.residual_tower = [ ResidualBlock(self.feature_maps, self.kernel_size, name=f"res_block_{i}") for i in range(self.tower_size)] self.norm = LayerNormalization(axis=-1, name="last_norm") self.linear = Dense(output_classes, name="linear")
def __init__(self): super().__init__(name = "critic") init = RandomNormal(stddev=0.02) #Layers self.conv_1 = Conv2D(64, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init, input_shape=[28, 28, 1]) self.leaky_1 = LeakyReLU(alpha=0.2) self.conv_2 = Conv2D(128, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init) self.layer_norm_2 = LayerNormalization() self.leaky_2 = LeakyReLU(alpha=0.2) self.conv_3 = Conv2D(256, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init) self.layer_norm_3 = LayerNormalization() self.leaky_3 = LeakyReLU(alpha=0.2) self.flat = Flatten() self.logits = Dense(1) # This neuron tells us if the input is fake or real self.optimizer = Adam(learning_rate=0.0001,beta_1=0,beta_2=0.9)
def encode_block(x, num_heads, dim, bias, block_number): """ single encoder layer from transformer paper """ # mha + add/norm mha_in = [x, x, x] if bias is None else [x, x, x, bias] _x = MultiheadAttention(dim, dim // 2, dim // 2, num_heads, name=f'mha_{block_number}')(mha_in) x = Add()([x, _x]) x = LayerNormalization()(x) # ffn + add/norm _x = Dense(2 * dim, activation='relu')(x) _x = Dense(dim)(_x) x = Add()([x, _x]) x = LayerNormalization()(x) return x
def build(self, input_shape): with K.name_scope(self.name): # name scope used to make sure weights get unique names self.layers = [] self.res_output_shape = input_shape for k in range(2): name = 'conv1D_{}'.format(k) with K.name_scope(name): # name scope used to make sure weights get unique names self._add_and_activate_layer(Conv1D(filters=self.nb_filters, kernel_size=self.kernel_size, dilation_rate=self.dilation_rate, padding=self.padding, name=name, kernel_initializer=self.kernel_initializer)) with K.name_scope('norm_{}'.format(k)): if self.use_batch_norm: self._add_and_activate_layer(BatchNormalization()) elif self.use_layer_norm: self._add_and_activate_layer(LayerNormalization()) self._add_and_activate_layer(Activation('relu')) self._add_and_activate_layer(SpatialDropout1D(rate=self.dropout_rate)) if self.nb_filters != input_shape[-1]: # 1x1 conv to match the shapes (channel dimension). name = 'matching_conv1D' with K.name_scope(name): # make and build this layer separately because it directly uses input_shape self.shape_match_conv = Conv1D(filters=self.nb_filters, kernel_size=1, padding='same', name=name, kernel_initializer=self.kernel_initializer) else: name = 'matching_identity' self.shape_match_conv = Lambda(lambda x: x, name=name) with K.name_scope(name): self.shape_match_conv.build(input_shape) self.res_output_shape = self.shape_match_conv.compute_output_shape(input_shape) self.final_activation = Activation(self.activation) self.final_activation.build(self.res_output_shape) # probably isn't necessary # this is done to force Keras to add the layers in the list to self._layers for layer in self.layers: self.__setattr__(layer.name, layer) self.__setattr__(self.shape_match_conv.name, self.shape_match_conv) self.__setattr__(self.final_activation.name, self.final_activation) super(ResidualBlock, self).build(input_shape) # done to make sure self.built is set True
def __init__(self, vocab_size, hidden_dim, input_length=10, **kwargs): super(ContextEmbeddingLayer, self).__init__(**kwargs) self.embedding = Embedding(vocab_size, hidden_dim, input_length=input_length, name="Embedding") self.bias = self.add_weight(shape=hidden_dim, dtype=tf.float32, initializer='zero', name="Embedding_bias") self.norm = LayerNormalization(axis=-2, name='norm')
def __init__(self, vocab_size, d_model, num_layer, dff, head_count, dropout): super(Encoder, self).__init__() self.num_layer = num_layer self.embedding = Embedding(d_model, vocab_size) self.layers = [ EncoderLayer(d_model, dff, head_count, dropout) for _ in range(num_layer) ] self.pe = PositionalEncoding(d_model, dropout=dropout) self.norm = LayerNormalization(d_model)
def __init__(self, d_model=256, num_heads=4, dff=256, rate=0.1, eps=1e-6, **kwargs): super(EncoderLayer, self).__init__(**kwargs) self.d_model = d_model self.num_heads = num_heads self.dff = dff self.rate = rate self.eps = eps self.mha = MultiHeadAttention(d_model, num_heads) self.ffn = _point_wise_feed_forward_network(d_model, dff) self.layernorm1 = LayerNormalization(epsilon=eps) self.layernorm2 = LayerNormalization(epsilon=eps) self.dropout1 = Dropout(rate) self.dropout2 = Dropout(rate)
def __init__(self, model_dim=256, num_heads=8, dim_feedforward=2048, dropout=0.1, activation='relu', normalize_before=False, **kwargs): super().__init__(**kwargs) self.self_attn = MultiHeadAttention(model_dim, num_heads, dropout=dropout, name='self_attn') self.multihead_attn = MultiHeadAttention(model_dim, num_heads, dropout=dropout, name='multihead_attn') self.dropout = Dropout(dropout) self.activation = Activation(activation) self.linear1 = Linear(dim_feedforward, name='linear1') self.linear2 = Linear(model_dim, name='linear2') self.norm1 = LayerNormalization(epsilon=1e-5, name='norm1') self.norm2 = LayerNormalization(epsilon=1e-5, name='norm2') self.norm3 = LayerNormalization(epsilon=1e-5, name='norm3') self.normalize_before = normalize_before
def simple_be(hparams): inputs = Input(AUDIO_SHAPE) x = LogMelSpectrogram()(inputs) x = LayerNormalization(axis=2, name='batch_norm')(x) x = Flatten()(x) x = Dense(512, activation='relu')(x) x = Dense(512, activation='relu')(x) x = Dense(512, activation='relu')(x) y = Dense(hparams.num_classes)(x) model = tf.keras.Model(inputs, y) return model
def distance_module(x, num_channels, num_heads=8, max_distance=30): distance_layer = MultiHeadDistanceLayer(num_heads, num_channels, 'local', num_channels//num_heads, distance_norm=True, max_distance=max_distance, smooth_embedding_ratio=8) distance_layer = tf.recompute_grad(distance_layer) distance = distance_layer(x) # distance = BatchNormalization()(distance) distance = LayerNormalization()(distance) return distance
def NERModel(vocab_size, num_classes): inp_ = Input(shape=(None,)) x = Embedding(vocab_size, CONFIG.embed_dims)(inp_) x = Dropout(CONFIG.dropout)(x) for i in range(CONFIG.num_layers): x = GRU(CONFIG.hidden_dims, dropout=CONFIG.dropout, return_sequences=True)(x) tf.clip_by_value(x, -1, 1) x = LayerNormalization()(x) x = Dense(num_classes, activation='softmax')(x) model = Model(inp_, x) return model
def Linear_Lstm(input_shape, num_classes): inputs = Input(shape=(input_shape[1], )) x = Dense(512, activation='relu')(inputs) x = LayerNormalization()(x) #x=Dropout(0.3)(x) x = Dense(512, activation='relu')(x) x = LayerNormalization()(x) x = Dropout(0.3)(x) x = tf.concat([x, inputs], axis=1) x = Dense(256, activation='relu')(x) x = LayerNormalization()(x) #x=Dropout(0.3)(x) x = Dense(128)(x) x = LayerNormalization()(x) #x=Dropout(0.3)(x) outputs = Dense(num_classes, activation='softmax')(x) return tf.keras.models.Model(inputs=[inputs], outputs=[outputs])
def transformer_regression(units, x): query = Dense(8)(x) value = Dense(8)(x) key = Dense(8)(x) query, value, key = [ tf.expand_dims(x, axis=1) for x in [query, value, key] ] x = Attention()([query, value, key]) x = LayerNormalization()(x) x = GlobalAveragePooling1D(data_format='channels_last')(x) x = Dense(units)(x) return x
def __init__(self, batch_size, num_mel, num_linear): super(Decoder, self).__init__() self.batch_size = batch_size self.num_mel = num_mel self.num_linear = num_linear self.prenet1 = Conv1D(filters=hparams.dec_prenet_dim, kernel_size=5, padding='causal') self.prenet1_drop = Dropout(hparams.dec_prenet_dropout) self.prenet2 = Dense(hparams.dec_prenet_dim, 'relu') self.prenet_ln = LayerNormalization() self.query_lstm = LSTM(hparams.dec_prenet_dim, return_sequences=True) self.query_lstm_drop = Dropout(hparams.dec_query_lstm_dropout) self.query_proj = Dense(hparams.query_key_dim) self.skip_proj = Dense(hparams.value_dim) self.attention = ScaledDotProductAttention(batch_size) self.context_ln = LayerNormalization() self.lstm1 = LSTM(hparams.dec_lstm_dim, return_sequences=True) self.lstm2 = LSTM(hparams.dec_lstm_dim, return_sequences=True) self.mel_ln = LayerNormalization() self.mel_proj = Dense(self.num_mel, 'sigmoid') self.stop = Dense(1) self.post_conv1 = Conv1D(filters=self.num_mel * 2, kernel_size=3, padding='same') self.post_conv1_ln = LayerNormalization() self.post_conv2 = Conv1D(filters=self.num_linear // 2, kernel_size=3, padding='same') self.post_conv3 = Conv1D(filters=self.num_linear, kernel_size=3, padding='same')