def build_LSTMCellwRNN_model(mdlstm_units=32, dense_units=200): dense_act = 'tanh' input_img = layers.Input(shape=(max_img_width, max_img_height, 1), name='image', dtype='float32') labels = layers.Input(name='label', shape=(None, ), dtype='float32') input_reshaped = layers.Reshape(target_shape=(max_img_width, max_img_height))(input_img) x = layers.RNN(layers.LSTMCell(mdlstm_units), return_sequences=True)(input_reshaped) x = layers.Dense(100, activation=dense_act, name='x_out')(x) y = layers.Permute((2, 1))(input_reshaped) y = layers.RNN(layers.LSTMCell(mdlstm_units), return_sequences=True)(y) y = layers.Dense(200, activation=dense_act, name='y_out')(y) y = layers.Permute((2, 1))(y) print(x) print(y) added = layers.Add()([x, y]) out = layers.Dense(len(alphabet) + 1, activation='softmax', name='dense_out')(added) classified = CTCLayer(name='ctc_loss')(labels, out) model = keras.models.Model(inputs=[input_img, labels], outputs=classified, name='LSTMlayerModel') model.compile(optimizer=keras.optimizers.Adam()) return model
def build(self, input_shape): (_, h, w, num_features) = input_shape self.transposer = layers.Permute((3, 1, 2)) self.reshaper = layers.Reshape((num_features, h * w)) self.softmaxer = layers.Softmax(axis=-1) self.unflattener = layers.Reshape((num_features, h, w)) self.untransposer = layers.Permute((2, 3, 1))
def call(self, audio, forward=True): if forward is True: audio = layers.Permute(dims=(2, 1), dtype=self.dtype)(audio) output_chunk = layers.Cropping1D( cropping=(0, self.n_remaining_channels), dtype=self.dtype)(audio) audio = layers.Cropping1D(cropping=(self.n_early_size, 0), dtype=self.dtype)(audio) audio = layers.Permute(dims=(2, 1), dtype=self.dtype)(audio) output_chunk = layers.Permute(dims=(2, 1), dtype=self.dtype)(output_chunk) output_chunk = tf.reshape(output_chunk, [ output_chunk.shape[0], output_chunk.shape[1] * output_chunk.shape[2], 1 ]) return audio, output_chunk else: raise NotImplementedError( 'The false forward boolean for this layer is not working yet')
def __init__(self, vertices: int = 9, edges: int = 5, nodes: int = 5, dropout_rate: float = 0., embedding_dim: int = 10, name: str = "SimpleMolGANGenerator", **kwargs): """ Initialize model. Parameters ---------- vertices : int, optional number of max atoms dataset molecules (incl. empty atom), by default 9 edges : int, optional number of bond types in molecules, by default 5 nodes : int, optional number of atom types in molecules, by default 5 dropout_rate : float, optional rate of dropout, by default 0. embedding_dim : int, optional noise input dimensions, by default 10 name : str, optional name of the model, by default "SimpleMolGANGenerator" """ super(BasicMolGANGenerator, self).__init__(name=name, **kwargs) self.vertices = vertices self.edges = edges self.nodes = nodes self.dropout_rate = dropout_rate self.embedding_dim = embedding_dim self.dense1 = layers.Dense(128, activation="tanh", input_shape=(self.embedding_dim, )) self.dropout1 = layers.Dropout(self.dropout_rate) self.dense2 = layers.Dense(256, activation="tanh") self.dropout2 = layers.Dropout(self.dropout_rate) self.dense3 = layers.Dense(512, activation="tanh") self.dropout3 = layers.Dropout(self.dropout_rate) # edges logits used during training self.edges_dense = layers.Dense(units=self.edges * self.vertices * self.vertices, activation=None) self.edges_reshape = layers.Reshape( (self.edges, self.vertices, self.vertices)) self.edges_matrix_transpose1 = layers.Permute((1, 3, 2)) self.edges_matrix_transpose2 = layers.Permute((2, 3, 1)) self.edges_dropout = layers.Dropout(self.dropout_rate) # nodes logits used during training self.nodes_dense = layers.Dense(units=(self.vertices * self.nodes), activation=None) self.nodes_reshape = layers.Reshape((self.vertices, self.nodes)) self.nodes_dropout = layers.Dropout(self.dropout_rate)
def func(inputs): dims = len(inputs.shape) per = list(range(2, dims))+[1] a = kl.Permute(per, name='%s_permute0'%name)(inputs) probs = kl.Dense(inputs.shape[1], activation='softmax', name='%s_fc'%name)(a) per = [dims-1] + list(range(1, dims-1)) probs = kl.Permute(per, name='%s_permute1'%name)(probs) outputs = kl.Multiply(name='%s_out'%name)([inputs, probs]) return outputs, probs
def squeeze_excite_block(inputs, prefix, ratio=4): ''' Create a channel-wise squeeze-excite block References - [Squeeze and Excitation Networks](https://arxiv.org/abs/1709.01507) ''' init = inputs channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 #filters = init._keras_shape[channel_axis] filters = init._shape_val[channel_axis] se_shape = (1, 1, filters) se = layers.GlobalAveragePooling2D()(init) se = layers.Reshape(se_shape)(se) se = layers.Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se) se = layers.Dense(filters, kernel_initializer='he_normal', use_bias=False)(se) se = layers.Activation(HardSigmoid, name=prefix + 'se_hm')(se) if backend.image_data_format() == 'channels_first': se = layers.Permute((3, 1, 2))(se) x = layers.multiply([init, se]) return x
def define_model(nchan, L, Fs): model = tf.keras.Sequential() model.add(layers.InputLayer((L, nchan), batch_size=1)) model.add(layers.LayerNormalization(axis=[1, 2], center=False, scale=False)) model.add( MorletConvRaw([L, nchan], Fs, input_shape=[L, nchan, 1], etas=etas, wtime=wtime)) model.add( layers.Conv2D(filters=filters, kernel_size=[1, nchan], activation='elu')) model.add(layers.Permute((3, 1, 2), name="second_permute")) model.add( layers.AveragePooling2D(pool_size=(1, 71), strides=(1, 15), name="pooling")) model.add(layers.Dropout(0.75)) model.add(layers.Flatten()) model.add(layers.Dense(3)) model.add(layers.Activation('softmax')) model.compile(loss=losses.CategoricalCrossentropy(), optimizer=optimizers.Adam(), metrics=['accuracy'], run_eagerly=False) return model
def make_generator_model(): model = tf.keras.Sequential() model.add(layers.Input(shape=(47, 1))) # Noise shape model.add(layers.Bidirectional(layers.LSTM(64, return_sequences=True))) model.add(layers.Conv1D(filters=128, kernel_size=16, strides=1, padding='same')) model.add(layers.LeakyReLU()) model.add(layers.Conv1D(filters=64, kernel_size=16, strides=1, padding='same')) model.add(layers.LeakyReLU()) model.add(layers.UpSampling1D(2)) model.add(layers.Conv1D(filters=32, kernel_size=16, strides=1, padding='same')) model.add(layers.LeakyReLU()) model.add(layers.Conv1D(filters=16, kernel_size=16, strides=1, padding='same')) model.add(layers.LeakyReLU()) model.add(layers.UpSampling1D(2)) model.add(layers.Conv1D(filters=1, kernel_size=16, strides=1, padding='same', activation='tanh')) model.add(layers.Permute((2, 1))) return model
def make_discriminator_model(): model = tf.keras.Sequential() model.add(layers.Input(shape=(1, 188))) # model.add(layers.Input(shape=(1, 187))) model.add(layers.Permute((2, 1))) model.add(layers.Conv1D(filters=32, kernel_size=16, strides=1, padding='same')) model.add(layers.LeakyReLU()) model.add(layers.Dropout(0.1)) # COMMENT OUT MAYBE model.add(layers.Conv1D(filters=64, kernel_size=16, strides=1, padding='same')) model.add(layers.LeakyReLU()) model.add(layers.MaxPool1D(pool_size=2)) model.add(layers.Conv1D(filters=128, kernel_size=16, strides=1, padding='same')) model.add(layers.LeakyReLU()) model.add(layers.Dropout(0.1)) # COMMENT OUT MAYBE model.add(layers.Conv1D(filters=256, kernel_size=16, strides=1, padding='same')) model.add(layers.LeakyReLU()) model.add(layers.MaxPool1D(pool_size=2)) model.add(layers.Flatten()) model.add(layers.Dense(1)) return model
def multi_res_u_net(pretrained_weights=None, input_size=(30,3), lr=0.001): inputs = layers.Input(input_size) resshape = layers.Permute((2,1))(inputs) resshape = tf.keras.layers.MaxPool1D(3)(resshape) res_block = mlti_res_block(resshape, 8, 17, 26, 51) res_path = res_path1(res_block, 32, 1) c = layers.Concatenate()([res_block,resshape]) res_block = mlti_res_block(c, 17, 35, 53, 105) res_path = res_path1(res_block, 64, 2) c = layers.Concatenate()([res_block,resshape]) res_block = mlti_res_block(c, 31, 72, 106, 209) res_path = res_path1(res_block, 128, 3) c = layers.Concatenate()([res_block, resshape]) res_block = mlti_res_block(c, 71, 142, 213, 426) res_path = res_path1(res_block, 256, 4) c = layers.Concatenate()([res_block,resshape]) res_block = mlti_res_block(c, 142, 284, 427, 853) res_path = res_path1(res_block, 256, 4) c = layers.Concatenate()([res_block,resshape,res_path]) flatten = layers.Flatten()(c) outputs = layers.Dense(1)(flatten) model = tf.keras.Model(inputs, outputs) return model
def multi_res_u_net_v2(pretrained_weights=None, input_size=(10,1), lr=0.001): inputs = layers.Input(input_size) resshape = layers.Permute((2,1))(inputs) res_block = mlti_res_block(resshape, 8, 17, 26, 51) res_path = res_path(res_block, 32, 1) c = layers.Concatenate()([res_block,res_path,resshape]) res_block = mlti_res_block(c, 17, 35, 53, 105) res_path = res_path(res_block, 64, 2) c = layers.Concatenate()([res_block,res_path,resshape]) res_block = mlti_res_block(c, 31, 72, 106, 209) res_path = res_path(res_block, 128, 3) c = layers.Concatenate()([res_block, res_path, resshape]) res_block = mlti_res_block(c, 71, 142, 213, 426) res_path = res_path(res_block, 256, 4) c = layers.Concatenate()([res_block,res_path,resshape]) res_block = mlti_res_block(c, 142, 284, 427, 853) res_path = res_path(res_block, 256, 4) c = layers.Concatenate()([res_block,res_path,resshape]) flatten = layers.Flatten()(c) dense = layers.Dense(128,activation="relu")(flatten) outputs = layers.Dense(1)(dense) model = tf.keras.Model(inputs, outputs)
def _build_model(self): input = keras.Input(shape=(self.height, self.width, 1)) conv1 = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(input) #(batch,32,128,64) pool1 = layers.MaxPool2D(pool_size=(2, 2), strides=2, padding='same')(conv1) conv2 = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(pool1) #(batch,16,64,128) pool2 = layers.MaxPool2D(pool_size=(2, 2), strides=2, padding='same')(conv2) #(batch,8,32,128) trans = layers.Permute((2, 1, 3))(pool2) #(batch,32,8,128) reshape = layers.Reshape((32, 1024))(trans) #(batch,32,1024) RNN_in = layers.Dropout(0.5)(reshape) # lstm1=layers.Bidirectional(layers.LSTM(31, return_sequences=True), backward_layer=layers.LSTM(31,return_sequences=True, go_backwards=True))(RNN_in) #(batch,31,512) rnn = layers.SimpleRNN(256, return_sequences=True)(RNN_in) # drop1=layers.Dropout(0.5)(lstm1) # lstm2=layers.Bidirectional(layers.LSTM(31, return_sequences=True), backward_layer=layers.LSTM(31,return_sequences=True, go_backwards=True))(Drop1) # lstm1=layers.LSTM(256, return_sequences=True, activation='sigmoid')(RNN_in) # lstm2=layers.LSTM(256, return_sequences=True, activation='sigmoid')(bn4) drop2 = layers.Dropout(0.5)(rnn) logits = layers.Dense(self.num_of_classes, activation='softmax', kernel_constraint='UnitNorm', use_bias=False)(drop2) #(batch,31,84) # decoded, log_prob = tf.nn.ctc_beam_search_decoder( # logits, [6,5]) # dense_decoded = tf.sparse.to_dense( # decoded[0], default_value=-1, name="dense_decoded" # ) self.model = keras.Model(inputs=input, outputs=logits) self.model.compile(loss=self.loss.ctc_loss, optimizer=self.optimizer ) #,metrics=[self.loss.ctc_beam_decoder_loss])
def policy_value_network_alpha(config): board_shape = get_board_shape(config) action_shape = get_action_shape(config) BT_K = board_shape[1] input = keras.Input(shape=board_shape, name='board') x = layers.Reshape(board_shape)(input) # assert board shape x = layers.Permute((2, 3, 1, 4))(x) x = layers.Reshape((BT_K, BT_K, -1))(x) x = residual_block(x, "pv_a", convert=True) x = residual_block(x, "pv_b") x = residual_block(x, "pv_c") policy = residual_block(x, "pv_d", size=action_shape[-1], convert=True) policy = layers.Flatten()(policy) policy = layers.Activation(activation='softmax')(policy) policy = layers.Reshape(action_shape, name='policy')(policy) value = residual_block(x, "pv_e") value = layers.Flatten()(value) value = layers.Dense( (1), activation='sigmoid', name='value', kernel_regularizer=l2(config.training.weight_decay), bias_regularizer=l2(config.training.weight_decay))(value) return keras.Model(inputs=input, outputs={ "policy": policy, "value": value })
def representation_network_atari(config): board_shape = get_board_shape(config) hidden_shape = config.mu.repr_shape BT_K = board_shape[1] input = keras.Input(shape=board_shape, name='board') x = layers.Reshape(board_shape)(input) # assert board shape x = layers.Permute((2, 3, 1, 4))(x) x = layers.Reshape((BT_K, BT_K, board_shape[0] * board_shape[3]))(x) x = layers.Conv2D(32, 3, padding='same', strides=2)(x) x = residual_block(x, "repr_a", size=32) x = layers.Conv2D(64, 3, padding='same', strides=2)(x) x = residual_block(x, "repr_b", size=64) x = layers.AveragePooling2D()(x) x = residual_block(x, "repr_c", size=64) x = layers.AveragePooling2D()(x) x = residual_block(x, "repr_d", size=64) repr_board = layers.Conv2D( hidden_shape[-1], (3, 3), padding='same', activation='relu', kernel_regularizer=l2(config.training.weight_decay), bias_regularizer=l2(config.training.weight_decay), name='repr_board')(x) return keras.Model(inputs=input, outputs=repr_board, name="Representation")
def generator(self): inputs = layers.Input(shape=(125, 1)) r1 = layers.Reshape((125, 1, 1))(inputs) # TC1 = layers.Conv2DTranspose(256, (3, 3), (1, 1), data_format='channels_first', padding='same')(inputs) TC1 = layers.Conv2DTranspose(64, (3, 3), (1, 1), activation=LeakyReLU(), padding='same')(r1) TC1 = layers.Conv2DTranspose(32, (3, 3), (2, 2), activation=LeakyReLU(), padding='same')(TC1) TC1 = layers.Conv2DTranspose(22, (3, 3), (2, 1), activation=LeakyReLU(), padding='same')(TC1) p1 = layers.Permute((3, 1, 2))(TC1) r1 = layers.Reshape((22, 1000))(p1) model = models.Model(inputs=inputs, outputs=r1, name='generator') model.summary() return model
def representation_network(config): if config.game.kind == "Gym": return representation_network_atari(config) else: board_shape = get_board_shape(config) hidden_shape = config.mu.repr_shape BT_K = board_shape[1] input = keras.Input(shape=board_shape, name='board') x = layers.Reshape(board_shape)(input) # assert board shape x = layers.Permute((2, 3, 1, 4))(x) x = layers.Reshape((BT_K, BT_K, board_shape[0] * board_shape[3]), name='RepresentationNetworkBoard')(x) x = residual_block(x, "repr_a", convert=True) x = residual_block(x, "repr_b") x = residual_block(x, "repr_c") repr_board = layers.Conv2D( hidden_shape[-1], (3, 3), padding='same', activation='relu', kernel_regularizer=l2(config.training.weight_decay), bias_regularizer=l2(config.training.weight_decay), name='repr_board')(x) return keras.Model(inputs=input, outputs=repr_board, name="Representation")
def attention(x_inner, x_outer, n_factor, dropout): x_Q = L.Conv1D( n_factor, 1, activation='linear', kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform', )(x_inner) x_K = L.Conv1D( n_factor, 1, activation='linear', kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform', )(x_outer) x_V = L.Conv1D( n_factor, 1, activation='linear', kernel_initializer='glorot_uniform', bias_initializer='glorot_uniform', )(x_outer) x_KT = L.Permute((2, 1))(x_K) res = L.Lambda(lambda c: K.batch_dot(c[0], c[1]) / np.sqrt(n_factor))( [x_Q, x_KT]) # res = tf.expand_dims(res, axis = 3) # res = L.Conv2D(16, 3, 1, padding = "same", activation = "relu")(res) # res = L.Conv2D(1, 3, 1, padding = "same", activation = "relu")(res) # res = tf.squeeze(res, axis = 3) att = L.Lambda(lambda c: K.softmax(c, axis=-1))(res) att = L.Lambda(lambda c: K.batch_dot(c[0], c[1]))([att, x_V]) return att
def channel_squeeze_excite_block(input, ratio=0.25): init = input channel_axis = 1 if K.image_data_format() == "channels_first" else -1 filters = init._keras_shape[channel_axis] cse_shape = (1, 1, filters) cse = layers.GlobalAveragePooling2D()(init) cse = layers.Reshape(cse_shape)(cse) ratio_filters = int(np.round(filters * ratio)) if ratio_filters < 1: ratio_filters += 1 cse = layers.Conv2D( ratio_filters, (1, 1), padding="same", activation="relu", kernel_initializer="he_normal", use_bias=False, )(cse) cse = layers.BatchNormalization()(cse) cse = layers.Conv2D( filters, (1, 1), activation="sigmoid", kernel_initializer="he_normal", use_bias=False, )(cse) if K.image_data_format() == "channels_first": cse = layers.Permute((3, 1, 2))(cse) cse = layers.Multiply()([init, cse]) return cse
def create_q_model(num_actions, window): """ preprocessing based on Deep Learning Quick Reference by Mike Bernico """ # Network defined by the Deepmind paper inputs = layers.Input(shape=(window, 84, 84)) # comment the line below to use with GPU inputs_sort = layers.Permute((2, 3, 1))(inputs) # Convolutions on the frames on the screen # Change data_format="channels_first" to use GPU # change inputs_sort by inputs to use GPU layer1 = layers.Conv2D(32, 8, strides=4, activation="relu", data_format="channels_last")(inputs_sort) layer2 = layers.Conv2D(64, 4, strides=2, activation="relu", data_format="channels_last")(layer1) layer3 = layers.Conv2D(64, 3, strides=1, activation="relu", data_format="channels_last")(layer2) layer4 = layers.Flatten()(layer3) layer5 = layers.Dense(512, activation="relu")(layer4) action = layers.Dense(num_actions, activation="linear")(layer5) return K.Model(inputs=inputs, outputs=action)
def call(self, x): #x (w,h,ch) h_i = x.shape[2] w_i = x.shape[3] ch = x.shape[4] x = tfkl.Reshape((self.rows, self.cols, h_i, w_i, ch))(x) x = tfkl.Permute((2, 4, 5, 1, 3))(x) #c,w_i,ch,r,h_i x = tfkl.Reshape((self.cols, w_i, ch, self.rows * h_i))(x) #Concatenate rows (c,w_i,ch,h) x = tfkl.Permute((3, 4, 1, 2))(x) #ch,h,c,w_i x = tfkl.Reshape((ch, self.rows * h_i, self.cols * w_i))(x) #Concatenate cols (ch,h,w) x = tfkl.Permute((3, 2, 1))(x) return x
def LSTM(N_CLASSES=10, SR=16000, DT=1.0): i = layers.Input(shape=(1, int(SR*DT)), name='input') x = Melspectrogram(n_dft=512, n_hop=160, padding='same', sr=SR, n_mels=128, fmin=0.0, fmax=SR/2, power_melgram=1.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False, name='melbands')(i) x = Normalization2D(str_axis='batch', name='batch_norm')(x) x = layers.Permute((2,1,3), name='permute')(x) x = TimeDistributed(layers.Reshape((-1,)), name='reshape')(x) s = TimeDistributed(layers.Dense(64, activation='tanh'), name='td_dense_tanh')(x) x = layers.Bidirectional(layers.LSTM(32, return_sequences=True), name='bidirectional_lstm')(s) x = layers.concatenate([s, x], axis=2, name='skip_connection') x = layers.Dense(64, activation='relu', name='dense_1_relu')(x) x = layers.MaxPooling1D(name='max_pool_1d')(x) x = layers.Dense(32, activation='relu', name='dense_2_relu')(x) x = layers.Flatten(name='flatten')(x) x = layers.Dropout(rate=0.2, name='dropout')(x) x = layers.Dense(32, activation='relu', activity_regularizer=l2(0.001), name='dense_3_relu')(x) o = layers.Dense(N_CLASSES, activation='softmax', name='softmax')(x) model = Model(inputs=i, outputs=o, name='long_short_term_memory') model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return model
def Conv1D(N_CLASSES=10, SR=16000, DT=1.0): i = layers.Input(shape=(1, int(SR*DT)), name='input') x = Melspectrogram(n_dft=512, n_hop=160, padding='same', sr=SR, n_mels=128, fmin=0.0, fmax=SR/2, power_melgram=1.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False, name='melbands')(i) x = Normalization2D(str_axis='batch', name='batch_norm')(x) x = layers.Permute((2,1,3), name='permute')(x) x = TimeDistributed(layers.Conv1D(8, kernel_size=(4), activation='tanh'), name='td_conv_1d_tanh')(x) x = layers.MaxPooling2D(pool_size=(2,2), name='max_pool_2d_1')(x) x = TimeDistributed(layers.Conv1D(16, kernel_size=(4), activation='relu'), name='td_conv_1d_relu_1')(x) x = layers.MaxPooling2D(pool_size=(2,2), name='max_pool_2d_2')(x) x = TimeDistributed(layers.Conv1D(32, kernel_size=(4), activation='relu'), name='td_conv_1d_relu_2')(x) x = layers.MaxPooling2D(pool_size=(2,2), name='max_pool_2d_3')(x) x = TimeDistributed(layers.Conv1D(64, kernel_size=(4), activation='relu'), name='td_conv_1d_relu_3')(x) x = layers.MaxPooling2D(pool_size=(2,2), name='max_pool_2d_4')(x) x = TimeDistributed(layers.Conv1D(128, kernel_size=(4), activation='relu'), name='td_conv_1d_relu_4')(x) x = layers.GlobalMaxPooling2D(name='global_max_pooling_2d')(x) x = layers.Dropout(rate=0.1, name='dropout')(x) x = layers.Dense(64, activation='relu', activity_regularizer=l2(0.001), name='dense')(x) o = layers.Dense(N_CLASSES, activation='softmax', name='softmax')(x) model = Model(inputs=i, outputs=o, name='1d_convolution') model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return model
def squeeze_excite_block(input_tensor, ratio=8): """Create a channel-wise squeeze-excite block. Args: input_tensor: input Keras tensor ratio: number of output filters Returns: a Keras tensor References - [Squeeze and Excitation Networks](https://arxiv.org/abs/1709.01507) """ init = input_tensor channel_axis = 1 if backend.image_data_format() == "channels_first" else -1 filters = init.shape[channel_axis]#_tensor_shape(init)[channel_axis] se_shape = (1, filters) se = layers.GlobalAvgPool1D()(init) se = layers.Reshape(se_shape)(se) se = layers.Dense(filters // ratio, activation='relu', kernel_initializer='he_normal', use_bias=False)(se) se = layers.Dense(filters, activation='sigmoid', kernel_initializer='he_normal', use_bias=False)(se) if backend.image_data_format() == 'channels_first': se = layers.Permute((3, 1, 2))(se) x = layers.multiply([init, se]) return x
def AttRNNSpeechModel(nCategories, samplingrate=16000, inputLength=16000, rnn_func=L.LSTM): # simple LSTM sr = samplingrate iLen = inputLength inputs = L.Input((inputLength,), name='input') x = L.Reshape((1, -1))(inputs) m = Melspectrogram(n_dft=1024, n_hop=128, input_shape=(1, iLen), padding='same', sr=sr, n_mels=80, fmin=40.0, fmax=sr / 2, power_melgram=1.0, return_decibel_melgram=True, trainable_fb=False, trainable_kernel=False, name='mel_stft') m.trainable = False x = m(x) x = Normalization2D(int_axis=0, name='mel_stft_norm')(x) # note that Melspectrogram puts the sequence in shape (batch_size, melDim, timeSteps, 1) # we would rather have it the other way around for LSTMs x = L.Permute((2, 1, 3))(x) x = L.Conv2D(10, (5, 1), activation='relu', padding='same')(x) x = L.BatchNormalization()(x) x = L.Conv2D(1, (5, 1), activation='relu', padding='same')(x) x = L.BatchNormalization()(x) # x = Reshape((125, 80)) (x) # keras.backend.squeeze(x, axis) x = L.Lambda(lambda q: K.squeeze(q, -1), name='squeeze_last_dim')(x) x = L.Bidirectional(rnn_func(64, return_sequences=True) )(x) # [b_s, seq_len, vec_dim] x = L.Bidirectional(rnn_func(64, return_sequences=True) )(x) # [b_s, seq_len, vec_dim] xFirst = L.Lambda(lambda q: q[:, -1])(x) # [b_s, vec_dim] query = L.Dense(128)(xFirst) # dot product attention attScores = L.Dot(axes=[1, 2])([query, x]) attScores = L.Softmax(name='attSoftmax')(attScores) # [b_s, seq_len] # rescale sequence attVector = L.Dot(axes=[1, 1])([attScores, x]) # [b_s, vec_dim] x = L.Dense(64, activation='relu')(attVector) x = L.Dense(32)(x) output = L.Dense(nCategories, activation='softmax', name='output')(x) model = Model(inputs=[inputs], outputs=[output]) return model
def build(self, input_shape): height, width, channels = input_shape[1:] print(height, width, channels) channels_per_group = channels / self.groups channels_per_group = tf.cast(channels_per_group, 'int32') self.reshape_ver1 = layers.Reshape( [height, width, self.groups, channels_per_group]) self.permute_dimension = layers.Permute(dims=(1, 2, 4, 3)) self.reshape_ver2 = layers.Reshape([height, width, channels])
def convert_channels_to_frequency_domain(images): """Convert a tensor of images to their Fourier transforms. The tensor contains ch channels representing ch/2 real parts and ch/2 imag parts. Args: images(float): A tensor of shape (batch_size, N, N, ch) Returns: spectra(float): An FFT-ed tensor of shape (batch_size, N, N, ch) """ n = images.shape[1] reim_imgs = join_reim_channels(images) perm_imgs = layers.Permute((3, 1, 2))(reim_imgs) perm_ffts = layers.Permute((2, 3, 1))(tf.signal.fft2d(perm_imgs)) spectra = tf.signal.fftshift(split_reim_channels(perm_ffts), axes=(1, 2)) return spectra
def token_learner(inputs, number_of_tokens=NUM_TOKENS): # Layer normalize the inputs. x = layers.LayerNormalization(epsilon=LAYER_NORM_EPS)( inputs) # (B, H, W, C) # Applying Conv2D => Reshape => Permute # The reshape and permute is done to help with the next steps of # multiplication and Global Average Pooling. attention_maps = keras.Sequential([ # 3 layers of conv with gelu activation as suggested # in the paper. layers.Conv2D( filters=number_of_tokens, kernel_size=(3, 3), activation=tf.nn.gelu, padding="same", use_bias=False, ), layers.Conv2D( filters=number_of_tokens, kernel_size=(3, 3), activation=tf.nn.gelu, padding="same", use_bias=False, ), layers.Conv2D( filters=number_of_tokens, kernel_size=(3, 3), activation=tf.nn.gelu, padding="same", use_bias=False, ), # This conv layer will generate the attention maps layers.Conv2D( filters=number_of_tokens, kernel_size=(3, 3), activation="sigmoid", # Note sigmoid for [0, 1] output padding="same", use_bias=False, ), # Reshape and Permute layers.Reshape((-1, number_of_tokens)), # (B, H*W, num_of_tokens) layers.Permute((2, 1)), ])(x) # (B, num_of_tokens, H*W) # Reshape the input to align it with the output of the conv block. num_filters = inputs.shape[-1] inputs = layers.Reshape( (1, -1, num_filters))(inputs) # inputs == (B, 1, H*W, C) # Element-Wise multiplication of the attention maps and the inputs attended_inputs = (attention_maps[..., tf.newaxis] * inputs ) # (B, num_tokens, H*W, C) # Global average pooling the element wise multiplication result. outputs = tf.reduce_mean(attended_inputs, axis=2) # (B, num_tokens, C) return outputs
def resnet18(num_classes, batch_size=None): """Instantiates the ResNet architecture. Arguments: num_classes: optional number of classes to classify images into batch_size: Size of the batches for each step. Returns: A Keras model instance. """ input_shape = (224, 224, 3) img_input = layers.Input(shape=input_shape, batch_size=batch_size) x = img_input if backend.image_data_format() == 'channels_first': x = layers.Permute((3, 1, 2))(x) bn_axis = 1 else: # channels_last bn_axis = -1 x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x) x = layers.Conv2D(filters=64, kernel_size=(7, 7), strides=(2, 2), padding='valid', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='conv1')(x) x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = layers.Activation('relu')(x) x = layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='same')(x) x = resnet_block(x, size=2, kernel_size=3, filters=64, stage=2, conv_strides=1) x = resnet_block(x, size=2, kernel_size=3, filters=128, stage=3) x = resnet_block(x, size=2, kernel_size=3, filters=256, stage=4) x = resnet_block(x, size=2, kernel_size=3, filters=512, stage=5) x = layers.GlobalAveragePooling2D()(x) x = layers.Dense(units=num_classes, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc1000')(x) # A softmax that is followed by the model loss must be done # cannot be done in float16 due to numeric issues. # So we pass dtype=float32. x = layers.Activation('softmax', dtype='float32')(x) # Create model. return models.Model(img_input, x, name='resnet18')
def convert_channels_to_image_domain(spectra): """Convert a tensor of Fourier spectra to the corresponding images. The tensor contains ch channels representing ch/2 real parts and ch/2 imag parts. Args: spectra(float): An array of shape (batch_size, N, N, ch) Returns: images(float): An IFFT-ed array of shape (batch_size, N, N, ch) """ n = spectra.shape[1] reim_spectra = join_reim_channels(tf.signal.fftshift(spectra, axes=(1, 2))) perm_spectra = layers.Permute((3, 1, 2))(reim_spectra) perm_images = layers.Permute((2, 3, 1))(tf.signal.ifft2d(perm_spectra)) images = split_reim_channels(perm_images) return images
def vgg16(num_classes, batch_size=None): """Instantiates the Vgg16 architecture. Arguments: num_classes: optional number of classes to classify images into batch_size: Size of the batches for each step. Returns: A Keras model instance. """ input_shape = (224, 224, 3) img_input = layers.Input(shape=input_shape, batch_size=batch_size) x = img_input if backend.image_data_format() == 'channels_first': x = layers.Permute((3, 1, 2))(x) bn_axis = 1 else: # channels_last bn_axis = -1 x = vgg_block(x, size=3, filters=64, stage='1') x = vgg_block(x, size=3, filters=128, stage='2') x = vgg_block(x, size=4, filters=256, stage='3') x = vgg_block(x, size=4, filters=512, stage='4') x = vgg_block(x, size=4, filters=512, stage='5') x = layers.Flatten()(x) x = layers.Dense(units=4096, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc')(x) x = layers.BatchNormalization(axis=bn_axis, name='bn')(x) x = layers.Activation('relu')(x) x = layers.Dropout(rate=0.5, name='dropout')(x) x = layers.Dense(units=4096, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc_')(x) x = layers.BatchNormalization(axis=bn_axis, name='bn_')(x) x = layers.Activation('relu')(x) x = layers.Dropout(rate=0.5, name='dropout_')(x) x = layers.Dense(units=num_classes, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY), name='fc_score')(x) # A softmax that is followed by the model loss must be done # cannot be done in float16 due to numeric issues. # So we pass dtype=float32. x = layers.Activation('softmax', dtype='float32')(x) # Create model. return models.Model(img_input, x, name='vgg16')