def GenerateBLSTMTFDC(): # The same pre-processing is appled to different DNN structures [inp_shape, out_shape, inp, convoutput] = preprocess() # The RNN model that being applied to each frame SIZE_RLAYERS = 32 # Regularization parameters DROPOUT = 0.5 # Feed forward dropout RDROPOUT = 0.2 # Recurrent dropout L2R = 1e-6 # L2 regularization factor rnnModel = models.Sequential(name='BiLSTM_f') rnnModel.add( layers.Bidirectional(layers.LSTM(SIZE_RLAYERS, return_sequences=True, kernel_regularizer=l2(L2R), recurrent_regularizer=l2(L2R), bias_regularizer=l2(L2R), dropout=DROPOUT, recurrent_dropout=RDROPOUT), input_shape=[ convoutput._keras_shape[2], convoutput._keras_shape[3] ])) # (32, 64) (frequency, #kernles) x = layers.TimeDistributed(rnnModel, name='RNN_f')(convoutput) x_f = layers.Reshape((x._keras_shape[1], np.prod(x._keras_shape[2::])))(x) SIZE_RLAYERS2 = 256 x_f = layers.Bidirectional( layers.LSTM(SIZE_RLAYERS2, return_sequences=True, kernel_regularizer=l2(L2R), recurrent_regularizer=l2(L2R), bias_regularizer=l2(L2R), dropout=DROPOUT, recurrent_dropout=RDROPOUT))(x_f) EMBEDDINGS_DIM = 40 cluster_o = layers.TimeDistributed(layers.Dense(out_shape[-1] * EMBEDDINGS_DIM, activation='sigmoid', kernel_regularizer=l2(L2R), bias_regularizer=l2(L2R)), name='cluster_o')(x_f) train_model = models.Model(inputs=[inp], outputs=[cluster_o]) return train_model
def mrcnn_feature(self, inputs): x = KL.TimeDistributed(KL.Conv2D(1024, self.cfg.POOL_SIZE, padding="valid"), name="mrcnn_class_conv1")(inputs) x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn1')( x, training=self.cfg.TRAIN_BN) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(1024, (1, 1)), name="mrcnn_class_conv2")(x) x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn2')( x, training=self.cfg.TRAIN_BN) x = KL.Activation('relu')(x) x = KL.Lambda(lambda x: tf.squeeze(x, [2, 3]))(x) return x
def common_representation(x1, x2): repc = int(x1.shape[1]) x2 = keras.layers.Reshape(target_shape=(1, np.int32(x2.shape[1]), np.int32(x2.shape[2]), np.int32(x2.shape[3])))(x2) x2 = Rep_mask(repc)(x2) print(x1.shape) print(x2.shape) x = layers.concatenate([x1, x2], axis=4) x = layers.TimeDistributed( layers.Conv2D(128, 3, padding='same', kernel_initializer='he_normal'))(x) x = layers.TimeDistributed(layers.BatchNormalization(axis=3))(x) x = layers.TimeDistributed(layers.Activation('relu'))(x) return x
def ctpn(base_features, num_anchors, rnn_units=128, fc_units=512): """ ctpn网络 :param base_features: (B,H,W,C) :param num_anchors: anchors个数 :param rnn_units: :param fc_units: :return: """ # 沿着宽度方式做rnn rnn_forward = layers.TimeDistributed(layers.GRU( rnn_units, return_sequences=True, kernel_initializer='he_normal'), name='gru_forward')(base_features) rnn_backward = layers.TimeDistributed(layers.GRU( rnn_units, return_sequences=True, kernel_initializer='he_normal', go_backwards=True), name='gru_backward')(base_features) rnn_output = layers.Concatenate(name='gru_concat')( [rnn_forward, rnn_backward]) # (B,H,W,256) # conv实现fc fc_output = layers.Conv2D(fc_units, kernel_size=(1, 1), activation='relu', name='fc_output')(rnn_output) # (B,H,W,512) # 分类 class_logits = layers.Conv2D(2 * num_anchors, kernel_size=(1, 1), name='cls')(fc_output) class_logits = layers.Reshape(target_shape=(-1, 2), name='cls_reshape')(class_logits) # 中心点垂直坐标和高度回归 predict_deltas = layers.Conv2D(2 * num_anchors, kernel_size=(1, 1), name='deltas')(fc_output) predict_deltas = layers.Reshape(target_shape=(-1, 2), name='deltas_reshape')(predict_deltas) # 侧边精调 predict_side_deltas = layers.Conv2D(2 * num_anchors, kernel_size=(1, 1), name='side_deltas')(fc_output) predict_side_deltas = layers.Reshape( target_shape=(-1, 2), name='side_deltas_reshape')(predict_side_deltas) return class_logits, predict_deltas, predict_side_deltas
def build_fpn_mask_graph(rois, feature_maps, image_meta, pool_size, nb_classes, train_bn=True): """Builds the computation graph of the mask head of Feature Pyramid Network. rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized coordinates. feature_maps: List of feature maps from different layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. num_classes: number of classes, which determines the depth of the results train_bn: Boolean. Train or freeze Batch Norm layers Returns: Masks [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, NB_CLASSES] """ # ROI Pooling # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] x = roi_align.PyramidROIAlign([pool_size, pool_size], name="roi_align_mask")([rois, image_meta] + feature_maps) # Conv layers x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv1")(x) x = KL.TimeDistributed(layers.BatchNorm(), name='mrcnn_mask_bn1')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv2")(x) x = KL.TimeDistributed(layers.BatchNorm(), name='mrcnn_mask_bn2')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv3")(x) x = KL.TimeDistributed(layers.BatchNorm(), name='mrcnn_mask_bn3')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), name="mrcnn_mask_conv4")(x) x = KL.TimeDistributed(layers.BatchNorm(), name='mrcnn_mask_bn4')(x, training=train_bn) x = KL.Activation('relu')(x) x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"), name="mrcnn_mask_deconv")(x) x = KL.TimeDistributed(KL.Conv2D(nb_classes, (1, 1), strides=1, activation="sigmoid"), name="mrcnn_mask")(x) return x
def buildLSTMModel(input_size, max_output_seq_len, hidden_size): model = km.Sequential() layer0 = kl.Masking(mask_value=0, input_shape=(max_output_seq_len, input_size)) model.add(layer0) # print layer0.input_shape, layer0.output_shape layer1 = kl.LSTM(input_dim=input_size, output_dim=hidden_size, return_sequences=False) model.add(layer1) # print layer1.input_shape, layer1.output_shape layer2 = kl.Dense(hidden_size, activation='relu') model.add(layer2) # print layer2.input_shape, layer2.output_shape layer3 = kl.RepeatVector(max_output_seq_len) model.add(layer3) # print layer3.input_shape, layer3.output_shape layer4 = kl.LSTM(hidden_size, return_sequences=True) model.add(layer4) # print layer4.input_shape, layer4.output_shape layer5 = kl.TimeDistributed(kl.Dense(output_dim=1, activation="linear")) model.add(layer5) # print layer5.input_shape, layer5.output_shape model.compile(loss='mse', optimizer='adam') return model
def build_model(): # As described in https://arxiv.org/abs/1511.02283 # Input: The 4101-dim feature from extract_features, and the previous output word visual_input = models.Sequential() visual_input_shape = (None, IMAGE_FEATURE_SIZE) visual_input.add(layers.TimeDistributed(layers.Dense( WORDVEC_DIM, activation='relu', name='visual_embed'), input_shape=visual_input_shape)) word_input = models.Sequential() word_input.add(layers.Embedding(VOCABULARY_SIZE, WORDVEC_DIM, dropout=.5)) model = models.Sequential() model.add(layers.Merge([visual_input, word_input], mode='concat', concat_axis=2)) model.add(layers.LSTM(1024, name='lstm_1', return_sequences=False)) model.add(layers.Dropout(.5)) model.add(layers.Dense( VOCABULARY_SIZE, activation='softmax', name='embed_out')) return model
def build(self, inputs_shape): # Import dimensions (max_atoms, max_degree, num_atom_features, num_bond_features, num_samples) = mol_shapes_to_dims(mol_shapes=inputs_shape) self.max_degree = max_degree # Add the dense layers (that contain trainable params) # (for each degree we convolve with a different weight matrix) self.trainable_weights = [] self.inner_3D_layers = [] for degree in range(max_degree): # Initialise inner layer, and rename it inner_layer = self.create_inner_layer_fn() inner_layer_type = inner_layer.__class__.__name__.lower() inner_layer.name = self.name + '_inner_' + inner_layer_type + '_' + str(degree) # Initialise TimeDistributed layer wrapper in order to parallelise # dense layer across atoms (3D) inner_3D_layer_name = self.name + '_inner_timedistributed_' + str(degree) inner_3D_layer = layers.TimeDistributed(inner_layer, name=inner_3D_layer_name) # Build the TimeDistributed layer (which will build the Dense layer) inner_3D_layer.build((None, max_atoms, num_atom_features + num_bond_features)) # Store inner_3D_layer and it's weights self.inner_3D_layers.append(inner_3D_layer) self.trainable_weights += inner_3D_layer.trainable_weights
def get_model(num_classes=Config.num_classes): # Define model video = keras.Input(shape=(16, 224, 224, 3), name='video') cnn = InceptionV3(weights='imagenet', include_top=False, pooling='avg') cnn.trainable = False frame_features = layers.TimeDistributed(cnn)(video) blstm_1 = Bidirectional( LSTM(1024, dropout=0.1, recurrent_dropout=0.5, return_sequences=True))(frame_features) blstm_2 = Bidirectional( LSTM(1024, dropout=0.1, recurrent_dropout=0.5, return_sequences=False))(blstm_1) Dense_2 = Dense(256, activation='sigmoid')(blstm_2) batchNorm = BatchNormalization()(Dense_2) enver = Dense(32, activation='sigmoid')(batchNorm) batchNorm2 = BatchNormalization()(enver) Dense_3 = Dense(num_classes, activation='sigmoid')(batchNorm2) model = keras.models.Model(input=video, output=Dense_3) model.summary() #plot_model(model, show_shapes=True, # to_file='model.png') from keras.optimizers import SGD sgd = SGD(lr=0.002, decay=1e-5, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['acc', f1_m, precision_m, recall_m]) return model
def att_process(candidates, att, activation='tanh'): att_dim = K.int_shape(att)[-1] candidates2 = layers.TimeDistributed(layers.Dense(att_dim, activation=activation))(candidates) dotted = layers.dot([candidates2, att], axes=(2, 1), normalize=True) weights = layers.Activation('softmax')(dotted) weighted = layers.dot([candidates, weights], axes=(1, 1)) return weighted, weights
def build_nn(): # https://blog.csdn.net/u011327333/article/details/78501054 细胞输出状态 encoder_input = layers.Input(shape=(None, word_dim), name='encoder_input') encoder_lstm = layers.LSTM(hidden_dim, return_state=True, name='encoder_lstm') encoder_output, state_h, state_c = encoder_lstm(encoder_input) encoder_state = [state_h, state_c] decoder_input = layers.Input(shape=(None, word_dim), name='decoder_input') decoder_lstm = layers.LSTM(hidden_dim, return_state=True, return_sequences=True, name='decoder_lstm') decoder_output, _, _ = decoder_lstm(decoder_input, initial_state=encoder_state) decoder_dense = layers.TimeDistributed(layers.Dense(output_dim=word_dim, activation='linear'), name='densor') outputs = decoder_dense(decoder_output) train_model = models.Model(inputs=[encoder_input, decoder_input], outputs=outputs) print(train_model.summary()) return train_model
def addPreAttentionLayer(self, merged_input): """Add attention mechanisms to the tensor merged_input. Args: merged_input: 3-dimensional Tensor, where the first dimension corresponds to the batch size, the second to the sequence timesteps and the last one to the concatenation of features. Retruns: 3-dimensional Tensor of the same dimension as merged_input """ activation = self.params.get('attentionActivation', None) if activation == 'None': activation = None feature_vector_size = K.int_shape(merged_input)[-1] att_layer = layers.TimeDistributed( layers.Dense(feature_vector_size, activation=activation), name='attention_matrix_score')(merged_input) # Calculate a single score for each timestep att_layer = layers.Lambda(lambda x: K.mean(x, axis=2), name='attention_vector_score')(att_layer) # Reshape to obtain the same shape as input att_layer = layers.Permute( (2, 1))(layers.RepeatVector(feature_vector_size)(att_layer)) merged_input = layers.multiply([att_layer, merged_input]) return merged_input
def build(self, input_shape): # Import dimensions (max_atoms, max_degree, num_atom_features, num_bond_features, num_samples) = mol_shapes_to_dims(mol_shapes=input_shape) # Add the dense layer that contains the trainable parameters # Initialise dense layer with specified params (kwargs) and name with tf.name_scope("dense_degree_" + str(degree)): inner_layer = self.create_inner_layer_fn() inner_layer_type = inner_layer.__class__.__name__.lower() inner_layer._name = self.name + '_inner_' + inner_layer_type # Initialise TimeDistributed layer wrapper in order to parallelise # dense layer across atoms inner_3D_layer_name = self.name + '_inner_timedistributed' self.inner_3D_layer = layers.TimeDistributed( inner_layer, name=inner_3D_layer_name) # Build the TimeDistributed layer (which will build the Dense layer) self.inner_3D_layer.build( (None, max_atoms, num_atom_features + num_bond_features)) # Store dense_3D_layer and it's weights self.train_weights = self.inner_3D_layer.trainable_weights
def trainmodel(self, X=None, y=None, fit_args=None, use_generator=False, generator=None): # Copy paste this from the diag above. model = keras.models.Sequential() model.add(layers.TimeDistributed(layers.Dense(28), input_shape=(self.lookback, len(self.tokens_unique)))) # model.add(layers.LeakyReLU(alpha=.001)) model.add(layers.CuDNNLSTM(64, input_shape=(self.lookback, len(self.tokens_unique)))) model.add(layers.Dropout(0.2, noise_shape=None, seed=None)) model.add(layers.Dense(len(self.tokens_unique), activation='softmax')) optimizer = keras.optimizers.Adam(lr=0.01) model.compile(loss='categorical_crossentropy', optimizer=optimizer) if use_generator: if not generator: generator = self.generator model.fit_generator(generator, **fit_args) else: model.fit(x=X, y=y, **fit_args) self.model = model
def TimeDistributed2DConvBlock(depth, filters, name, kernel_size=(3, 3), activation='elu'): tmp = Sequential(name=name) for i in range(depth): tmp.add( layers.TimeDistributed( layers.Conv2D(filters=filters, kernel_size=kernel_size, activation=activation, padding='same'))) tmp.add(layers.TimeDistributed(layers.MaxPooling2D(pool_size=(2, 2)))) return tmp
def simple_model(input_shape, num_output_categories): RNN = L.SimpleRNN HIDDEN_SIZE = 16 # 4, 16, 32 LAYERS = 2 # 1, 2, 4, layers #lr = 0.001 or 0.01 print('Build model...') print(input_shape) model = M.Sequential() model.add( RNN(HIDDEN_SIZE, input_shape=(None, input_shape[1]), return_sequences=True)) for _ in range(LAYERS - 1): model.add( RNN(HIDDEN_SIZE, input_shape=(None, 7), return_sequences=True)) # Apply a dense layer to the every temporal slice of an input. For each of step # of the output sequence, decide which character should be chosen. model.add(L.TimeDistributed(L.Dense(num_output_categories))) model.add(L.Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() return model
def DISABLED_test_sequential_as_downstream_of_masking_layer(): inputs = layers.Input(shape=(3, 4)) x = layers.Masking(mask_value=0., input_shape=(3, 4))(inputs) s = Sequential() s.add(layers.Dense(5, input_shape=(4, ))) s.add(layers.Activation('relu')) x = layers.TimeDistributed(s)(x) model = Model(inputs=inputs, outputs=x) model.compile(optimizer='rmsprop', loss='mse') model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)) for i in range(4): model_input[i, i:, :] = 0. model.fit(model_input, np.random.random((10, 3, 5)), epochs=1, batch_size=6) mask_outputs = [model.layers[1].compute_mask(model.layers[1].input)] mask_outputs += [ model.layers[2].compute_mask(model.layers[2].input, mask_outputs[-1]) ] func = K.function([inputs], mask_outputs) mask_outputs_val = func([model_input]) assert np.array_equal(mask_outputs_val[0], np.any(model_input, axis=-1)) assert np.array_equal(mask_outputs_val[1], np.any(model_input, axis=-1))
def fn_setup_model(inputs, labels): input_size = len(inputs[0]) label_size = len(labels[0]) HIDDEN_SIZE = 128 # BATCH_SIZE = 128 NUM_OF_HIDDEN_LAYERS = 1 model = Sequential() model.add( layers.LSTM(HIDDEN_SIZE, input_shape=(input_size, len(char_array)))) model.add(layers.RepeatVector(label_size)) model.add(layers.LSTM(HIDDEN_SIZE, return_sequences=True)) model.add(layers.TimeDistributed(layers.Dense(len(char_array)))) model.add(layers.Activation('softmax')) fn_compile_model(model) model.summary() return model
def addPreAttentionLayer(self, merged_input): """Add attention mechanisms to the tensor merged_input. Args: merged_input: 3-dimensional Tensor, where the first dimension corresponds to the batch size, the second to the sequence timesteps and the last one to the concatenation of features. Retruns: 3-dimensional Tensor of the same dimension as merged_input """ activation = self.params.get('attentionActivation', None) if activation == 'None': activation = None feature_vector_size = K.int_shape(merged_input)[-1] merged_input = layers.Permute((2, 1))(merged_input) att_layer = layers.TimeDistributed( layers.Dense(self.max_sentece_length, activation=activation), name='attention_matrix_score')(merged_input) # Calculate a single score for each timestep att_layer = layers.Lambda(lambda x: K.mean(x, axis=1), name='attention_vector_score')(att_layer) # Reshape to obtain the same shape as input att_layer = layers.RepeatVector(feature_vector_size)(att_layer) merged_input = layers.multiply([att_layer, merged_input]) merged_input = layers.Permute((2, 1))(merged_input) # We re add the mask layer after the attention is applied. # Of course we have the risk of masking elements that were zeroed # after the application of the attention scores. merged_input = layers.Masking(mask_value=0.0)(merged_input) return merged_input
def compile(self, lr=1e-4): print('Compiling model...') x_series = self.x_lstm(self.inputs) Bxy = layers.TimeDistributed(self.Bxy_hidden)(x_series) Bxy = layers.TimeDistributed(self.Bxy_output)(Bxy) self.outputs = Bxy self.model = Model(inputs=[self.inputs], outputs=[self.outputs]) adam = optimizers.adam(lr=lr, epsilon=.01) # sgd = optimizers.SGD(lr=.01, momentum=.9, nesterov=True) self.model.compile( optimizer=adam, loss='mse', sample_weight_mode='temporal') # weight time steps differently
def decoder2_module(n_in, dim_encoder, n_features, x): # define forecasting decoder x = layers.Dense(dim_encoder, activation='relu')(x) x = layers.RepeatVector(n_in)(x) x = layers.LSTM(dim_encoder, activation='relu', return_sequences=True)(x) x = layers.TimeDistributed(Dense(1))(x) return x
def build_bidirectional_model(num_tokens): # Try replacing GRU, or SimpleRNN. # RNN = layers.LSTM HIDDEN_SIZE = 128 LAYERS = 3 print('Build model...') model = Sequential() model.add( Bidirectional(LSTM(HIDDEN_SIZE), input_shape=(MAXLEN, num_tokens))) model.add(layers.RepeatVector(MAXLEN)) for _ in range(LAYERS): model.add( Bidirectional(layers.LSTM(HIDDEN_SIZE, return_sequences=True))) # Apply a dense layer to the every temporal slice of an input. For each of step # of the output sequence, decide which character should be chosen. model.add(layers.TimeDistributed(layers.Dense(num_tokens))) model.add(layers.Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() return model
def train(X, Y): hyperp = { "learning_rate": 0.001, "lstm_units": 192, } input = layers.Input([140], dtype="uint8") head = layers.Embedding(input_dim=219, output_dim=219, embeddings_initializer="identity", trainable=False)(input) for _ in range(2): head = layers.LSTM(units=hyperp["lstm_units"], unit_forget_bias=True, return_sequences=True)(head) head = layers.TimeDistributed( layer=layers.Dense(units=219, activation='softmax'))(head) model = Model(inputs=input, outputs=head) print("compile") model.compile(optimizer=optimizers.RMSprop(lr=hyperp["learning_rate"]), loss="sparse_categorical_crossentropy", metrics=["accuracy"]) model.summary() model.fit(X, Y, validation_split=0.2, batch_size=100, shuffle=True, epochs=2)
def test_3d_to_3d(): ''' Apply a same Dense layer for each element of time dimension of the input and make predictions of the output sequence elements. This does not make use of the temporal structure of the sequence (see TimeDistributedDense for more details) ''' np.random.seed(1337) (x_train, y_train), (x_test, y_test) = get_test_data(num_train=100, num_test=20, input_shape=(3, 5), output_shape=(3, 5), classification=False) model = Sequential() model.add( layers.TimeDistributed(layers.Dense(y_train.shape[-1]), input_shape=(x_train.shape[1], x_train.shape[2]))) model.compile(loss='hinge', optimizer='rmsprop') history = model.fit(x_train, y_train, epochs=20, batch_size=16, validation_data=(x_test, y_test), verbose=0) assert (history.history['loss'][-1] < 1.)
def model_build(DIGITS, MAXLEN, chars, checkpoint=''): RNN = layers.LSTM HIDDEN_SIZE = 128 LAYERS = 1 model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE. # Note: In a situation where your input sequences have a variable length, # use input_shape=(None, num_feature). model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars)))) # As the decoder RNN's input, repeatedly provide with the last output of # RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum # length of output, e.g., when DIGITS=3, max output is 999+999=1998. model.add(layers.RepeatVector(DIGITS + 1)) # The decoder RNN could be multiple layers stacked or a single layer. for _ in range(LAYERS): # By setting return_sequences to True, return not only the last output but # all the outputs so far in the form of (num_samples, timesteps, # output_dim). This is necessary as TimeDistributed in the below expects # the first dimension to be the timesteps. model.add(RNN(HIDDEN_SIZE, return_sequences=True)) # Apply a dense layer to the every temporal slice of an input. For each of step # of the output sequence, decide which character should be chosen. model.add( layers.TimeDistributed(layers.Dense(len(chars), activation='softmax'))) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) if checkpoint != '': model.load_weights(checkpoint) return model
def build_seq2seq(max_sentence_len: Union[int, None], vocabulary_size: Tuple[int, int], units: List[int], rnn_params: Dict = None, rnn: Union[kl.LSTM, kl.GRU] = kl.LSTM) -> km.Model: if rnn_params is None: rnn_params = {} encoder_input = kl.Input(shape=(max_sentence_len, vocabulary_size[0])) encoder_states = [] encoder = encoder_input for nlayer, unit in enumerate(units): encoder, *states = rnn(unit, name=f'encoder_{nlayer}', return_state=True, return_sequences=True, **rnn_params)(encoder) encoder_states.append(states) decoder_input = kl.Input(shape=(max_sentence_len, vocabulary_size[1])) decoder = decoder_input for nlayer, unit in enumerate(units): state = encoder_states[nlayer] decoder = rnn(unit, name=f'decoder_{nlayer}', return_sequences=True, **rnn_params)(decoder, initial_state=state) decoder_out = kl.TimeDistributed( kl.Dense(vocabulary_size[1], activation='softmax'))(decoder) model = km.Model([encoder_input, decoder_input], decoder_out) return model
def create_LSTM(self, x_train, y_train, x_val, y_val): model = Sequential() model.add( RNN(HIDDEN_SIZE, input_shape=(self.MAX_SEQUENCE_LENGTH, len(self.labels_index)))) model.add(layers.RepeatVector(len(self.labels_index))) for _ in range(5): model.add(RNN(HIDDEN_SIZE, return_sequences=True)) model.add(layers.TimeDistributed(layers.Dense(128))) model.add(layers.Activation('softmax')) #model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.compile(loss='categorical_crossentropy', optimizer=md.OPTIMIZER_PROP, metrics=['acc']) model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=self.EPOCHS, batch_size=self.BATCH_SIZE)
def test_sequential_model_pickling(): model = keras.Sequential() model.add(layers.Dense(2, input_shape=(3,))) model.add(layers.RepeatVector(3)) model.add(layers.TimeDistributed(layers.Dense(3))) model.compile(loss=losses.MSE, optimizer=optimizers.RMSprop(lr=0.0001), metrics=[metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) out = model.predict(x) state = pickle.dumps(model) new_model = pickle.loads(state) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05) # test that new updates are the same with both models x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) new_model.train_on_batch(x, y) out = model.predict(x) out2 = new_model.predict(x) assert_allclose(out, out2, atol=1e-05)
def GenerateBLSTMTime(): #################################### The DNN input inp_shape = (100, 127 * 3) out_shape = (100, 127) inp = layers.Input(shape=inp_shape, name='input') #################################### RNN along time SIZE_RLAYERS = 256 # Regularization parameters DROPOUT = 0.5 # Feed forward dropout RDROPOUT = 0.2 # Recurrent dropout L2R = 1e-6 # L2 regularization factor x = inp for i in range(2): # two stacked BiLSTM x = layers.Bidirectional(layers.LSTM(SIZE_RLAYERS, return_sequences=True, kernel_regularizer=l2(L2R), recurrent_regularizer=l2(L2R), bias_regularizer=l2(L2R), dropout=DROPOUT, recurrent_dropout=RDROPOUT))(x) mask_o = layers.TimeDistributed(layers.Dense(out_shape[-1], activation='sigmoid', kernel_regularizer=l2(L2R), bias_regularizer=l2(L2R)), name='mask_o')(x) train_model = models.Model(inputs=[inp], outputs=[mask_o]) return train_model
def distributed_cell(inputs): """ Creates a functional wrapper over RNN cell, applying it on each timestep without propagating hidden states over timesteps """ assert len(inputs) == 2 shapes = [elem._keras_shape for elem in inputs] # no shape validation, assuming all dims of inputs[0] and inputs[1] are equal input_dim, units, ndims = shapes[0][-1], shapes[1][-1], len(shapes[0]) if ndims > 3: dims_order = (1, ) + tuple(range(2, ndims)) + (2, ) inputs = [ kl.Permute(dims_order)(inputs[0]), kl.Permute(dims_order)(inputs[0]) ] first_shape, second_shape = shapes[0][2:], shapes[1][2:] cell = kl.GRUCell(units, input_shape=first_shape, implementation=0) if not cell.built: cell.build(first_shape) concatenated_inputs = kl.Concatenate()(inputs) def timestep_func(x): cell_inputs = x[..., :input_dim] cell_states = x[..., None, input_dim:] cell_output = cell.call(cell_inputs, cell_states) return cell_output[0] func = kl.TimeDistributed( kl.Lambda(timestep_func, output_shape=second_shape)) answer = func(concatenated_inputs) if ndims > 3: reverse_dims_order = (1, ndims - 1) + tuple(range(2, ndims - 1)) answer = kl.Permute(reverse_dims_order)(answer) return answer