def __call__(self, inputs): x = inputs[0] w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(128, 11, W_regularizer=w_reg)(x) x = kl.Activation('relu')(x) x = kl.MaxPooling1D(4)(x) x = kl.Flatten()(x) w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Dense(self.nb_hidden, init=self.init, W_regularizer=w_reg)(x) x = kl.Activation('relu')(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def test_regularizer(layer_class): layer = layer_class(output_dim, return_sequences=False, weights=None, batch_input_shape=(nb_samples, timesteps, embedding_dim), W_regularizer=regularizers.WeightRegularizer(l1=0.01), U_regularizer=regularizers.WeightRegularizer(l1=0.01), b_regularizer='l2') shape = (nb_samples, timesteps, embedding_dim) layer.build(shape) output = layer(K.variable(np.ones(shape))) K.eval(output) if layer_class == recurrent.SimpleRNN: assert len(layer.losses) == 3 if layer_class == recurrent.GRU: assert len(layer.losses) == 9 if layer_class == recurrent.LSTM: assert len(layer.losses) == 12
def _fine_tuning(self, X, y, encoders): self._model = models.Sequential() logger.info(u"Fine tuning of the neural network") for encoder in encoders: self._model.add(encoder) self._model.add( core.Dense(input_dim=self._hidden_layers[-1], output_dim=self.output_dim, activation='softmax', init=self._weight_init, W_regularizer=regularizers.WeightRegularizer( l1=self._l1_regularizer, l2=self._l2_regularizer), activity_regularizer=regularizers.ActivityRegularizer( l1=self._l1_regularizer, l2=self._l2_regularizer))) self._model.compile(optimizer=self._optimizer, loss='categorical_crossentropy') self._model.fit(X, y, batch_size=self._batch_size, nb_epoch=self._fine_tune_epochs, show_accuracy=True)
def __call__(self, inputs): x = inputs[0] w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(128, 11, name='conv1', init=self.init, W_regularizer=w_reg)(x) x = kl.BatchNormalization(name='bn1')(x) x = kl.Activation('relu', name='act1')(x) x = kl.MaxPooling1D(2, name='pool1')(x) # 124 x = self._res_unit(x, [32, 32, 128], stage=1, block=1, stride=2) x = self._res_unit(x, [32, 32, 128], stage=1, block=2) x = self._res_unit(x, [32, 32, 128], stage=1, block=3) # 64 x = self._res_unit(x, [64, 64, 256], stage=2, block=1, stride=2) x = self._res_unit(x, [64, 64, 256], stage=2, block=2) x = self._res_unit(x, [64, 64, 256], stage=2, block=3) # 32 x = self._res_unit(x, [128, 128, 512], stage=3, block=1, stride=2) x = self._res_unit(x, [128, 128, 512], stage=3, block=2) x = self._res_unit(x, [128, 128, 512], stage=3, block=3) # 16 x = self._res_unit(x, [256, 256, 1024], stage=4, block=1, stride=2) x = kl.GlobalAveragePooling1D()(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def __call__(self, inputs): x = self._merge_inputs(inputs) shape = getattr(x, '_keras_shape') replicate_model = self._replicate_model(kl.Input(shape=shape[2:])) x = kl.TimeDistributed(replicate_model)(x) w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Bidirectional(kl.GRU(128, W_regularizer=w_reg, return_sequences=True), merge_mode='concat')(x) w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Bidirectional(kl.GRU(256, W_regularizer=w_reg))(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def __call__(self, inputs): x = inputs[0] w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(128, 11, init=self.init, W_regularizer=w_reg)(x) x = kl.Activation('relu')(x) x = kl.MaxPooling1D(4)(x) w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(256, 7, init=self.init, W_regularizer=w_reg)(x) x = kl.Activation('relu')(x) x = kl.MaxPooling1D(4)(x) w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Bidirectional(kl.recurrent.GRU(256, W_regularizer=w_reg))(x) x = kl.Dropout(self.dropout)(x) return self._build(inputs, x)
def _res_unit(self, inputs, nb_filter, size=3, stride=1, stage=1, block=1): name = '%02d-%02d/' % (stage, block) id_name = '%sid_' % (name) res_name = '%sres_' % (name) # Residual branch x = kl.BatchNormalization(name=res_name + 'bn1')(inputs) x = kl.Activation('relu', name=res_name + 'act1')(x) w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(nb_filter, size, name=res_name + 'conv1', border_mode='same', subsample_length=stride, init=self.init, W_regularizer=w_reg)(x) x = kl.BatchNormalization(name=res_name + 'bn2')(x) x = kl.Activation('relu', name=res_name + 'act2')(x) w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Conv1D(nb_filter, size, name=res_name + 'conv2', border_mode='same', init=self.init, W_regularizer=w_reg)(x) # Identity branch if nb_filter != inputs._keras_shape[-1] or stride > 1: w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) identity = kl.Conv1D(nb_filter, size, name=id_name + 'conv1', border_mode='same', subsample_length=stride, init=self.init, W_regularizer=w_reg)(inputs) else: identity = inputs x = kl.merge([identity, x], name=name + 'merge', mode='sum') return x
def __call__(self, models): layers = [] for layer in range(self.nb_layer): w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) layers.append(kl.Dense(self.nb_hidden, init=self.init, W_regularizer=w_reg)) layers.append(kl.Activation('relu')) layers.append(kl.Dropout(self.dropout)) return self._build(models, layers)
def create_model_gen(n_layers=1, input_dim=None, output_dim=12, act_output='softmax', K=[150], D=[0, 0], act='relu', w_cons=None, l1=0, l2=0, init='he_normal', optimizer='Adadelta', loss='sparse_categorical_crossentropy', metrics_nn=['accuracy']): """ General function to create a Keras nn: Args: - n_layers: number of layers - input_dim: int, n_ layer first layer - output_dim: 1 for regression, nb_classes for clf - act_output: activation function for output layer, softmax for clf and linear or else for regression - K: list of shape n_layers of nb of neurones - D: list of shape n_layers+1 specifiying dropout - w_cons: constraint function to add to add to learning (weight capping). see keras constraint for list - l1, l2: regularization parameters - init: initialization of weights, he_normal recommanded - optimizer: adadelta or adam recommanded. - loss: see keras loss for list - metrics_nn: list of metrics to compute at each eopch, see metrics keras for list """ model = models.Sequential() model.add(Dropout(D[0], input_shape=(input_dim, ))) for i in range(n_layers): model.add(layers.Dense(K[i], # activation=act, init=init, W_constraint=w_cons, W_regularizer=regularizers.WeightRegularizer(l1=l1, l2=l2), name="hidden1_clf{}".format(i))) model.add(layers.normalization.BatchNormalization()) model.add(layers.advanced_activations.PReLU()) # model.add(Activation(act)) model.add(Dropout(D[i+1])) model.add(layers.Dense(output_dim, init='normal', activation=act_output)) model.compile(loss=loss, optimizer=optimizer, metrics=metrics_nn) return model
def CNN(lr1, lr2, input_len, input_channel=4, nb_filter=100, dense_dim=512, output_len=1, \ filter_len=11, pool_len=2, dropout=0.2, nDense=1, nConv=1): print("Using canonical CNN methods.") model = keras.models.Sequential() #Convolutional layers w_reg = kr.WeightRegularizer(l1=lr1, l2=lr2) #first conv layer model.add( keras.layers.convolutional.Convolution1D(input_shape=(input_len, input_channel), nb_filter=nb_filter, activation='relu', border_mode='same', init='glorot_uniform', W_regularizer=w_reg, filter_length=filter_len)) model.add(keras.layers.core.Activation("relu")) model.add(keras.layers.pooling.MaxPooling1D(pool_length=pool_len)) if nConv > 1: for i in range(nConv - 1): model.add( keras.layers.convolutional.Convolution1D( nb_filter=nb_filter, border_mode='same', filter_length=filter_len)) model.add(keras.layers.pooling.MaxPooling1D(pool_length=pool_len)) model.add(keras.layers.core.Flatten()) for i in range(nDense): model.add( keras.layers.core.Dense(output_dim=dense_dim, activation='linear', init='glorot_uniform', bias=True)) model.add(keras.layers.core.Activation("relu")) model.add(Dropout(dropout)) model.add( keras.layers.core.Dense(output_dim=output_len, activation='sigmoid', init='glorot_uniform', bias=True)) return model
def _fit(self, X, y): logger.info(u"Building the network architecture") self._model = models.Sequential() previous_layer_size = self.input_dim for layer_size in self._hidden_layers: self._model.add( core.Dense(input_dim=previous_layer_size, output_dim=layer_size, init=self._weight_init, activation=self._activation)) self._model.add( core.Dropout(self._dropout_ratio, input_shape=(layer_size, ))) previous_layer_size = layer_size self._model.add( core.Dense(input_dim=previous_layer_size, output_dim=self.output_dim, activation='softmax', init=self._weight_init, W_regularizer=regularizers.WeightRegularizer( l1=self._l1_regularizer, l2=self._l2_regularizer), activity_regularizer=regularizers.ActivityRegularizer( l1=self._l1_regularizer, l2=self._l2_regularizer))) logger.info(u"Compiling the network") self._model.compile(optimizer=self._optimizer, loss='categorical_crossentropy') logger.info(u"Fitting the data to the network") self._model.fit(X, y, batch_size=self._batch_size, nb_epoch=self._fine_tune_epochs, show_accuracy=True)
def cpg_layers(params): layers = [] if params.drop_in: layer = kcore.Dropout(params.drop_in) layers.append(('xd', layer)) nb_layer = len(params.nb_filter) w_reg = kr.WeightRegularizer(l1=params.l1, l2=params.l2) for l in range(nb_layer): layer = kconv.Convolution2D(nb_filter=params.nb_filter[l], nb_row=1, nb_col=params.filter_len[l], activation=params.activation, init='glorot_uniform', W_regularizer=w_reg, border_mode='same') layers.append(('c%d' % (l + 1), layer)) layer = kconv.MaxPooling2D(pool_size=(1, params.pool_len[l])) layers.append(('p%d' % (l + 1), layer)) layer = kcore.Flatten() layers.append(('f1', layer)) if params.drop_out: layer = kcore.Dropout(params.drop_out) layers.append(('f1d', layer)) if params.nb_hidden: layer = kcore.Dense(params.nb_hidden, activation='linear', init='glorot_uniform') layers.append(('h1', layer)) if params.batch_norm: layer = knorm.BatchNormalization() layers.append(('h1b', layer)) layer = kcore.Activation(params.activation) layers.append(('h1a', layer)) if params.drop_out: layer = kcore.Dropout(params.drop_out) layers.append(('h1d', layer)) return layers
def _runner(layer_class): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ for ret_seq in [True, False]: layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, input_shape=(timesteps, embedding_dim)) layer.input = K.variable(np.ones((nb_samples, timesteps, embedding_dim))) layer.get_config() for train in [True, False]: out = K.eval(layer.get_output(train)) # Make sure the output has the desired shape if ret_seq: assert(out.shape == (nb_samples, timesteps, output_dim)) else: assert(out.shape == (nb_samples, output_dim)) mask = layer.get_output_mask(train) # check dropout for ret_seq in [True, False]: layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, batch_input_shape=(nb_samples, timesteps, embedding_dim), dropout_W=0.5, dropout_U=0.5) layer.input = K.variable(np.ones((nb_samples, timesteps, embedding_dim))) layer.get_config() for train in [True, False]: out = K.eval(layer.get_output(train)) # Make sure the output has the desired shape if ret_seq: assert(out.shape == (nb_samples, timesteps, output_dim)) else: assert(out.shape == (nb_samples, output_dim)) mask = layer.get_output_mask(train) # check statefulness model = Sequential() model.add(embeddings.Embedding(embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(nb_samples, timesteps))) layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps))) assert(out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps))) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps))) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps))) assert(out4.max() != out5.max()) # Check masking layer.reset_states() left_padded_input = np.ones((nb_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 left_padded_input[2, :3] = 0 out6 = model.predict(left_padded_input) layer.reset_states() right_padded_input = np.ones((nb_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 right_padded_input[2, -3:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5) # check regularizers layer = layer_class(output_dim, return_sequences=ret_seq, weights=None, batch_input_shape=(nb_samples, timesteps, embedding_dim), W_regularizer=regularizers.WeightRegularizer(l1=0.01), U_regularizer=regularizers.WeightRegularizer(l1=0.01), b_regularizer='l2') layer.input = K.variable(np.ones((nb_samples, timesteps, embedding_dim))) out = K.eval(layer.get_output(train=True))
w_cons=None, l1=0, l2=0, X_learn_lstm=None init='he_normal', optimizer='Adadelta', loss='sparse_categorical_crossentropy', metrics_nn=['accuracy']): """ Same but for LSTM network """ model = Sequential() model.add(Dropout(D[0], batch_input_shape=(None, X_learn_lstm[0].shape[0], X_learn_lstm[0].shape[1]))) model.add(layers.normalization.BatchNormalization()) for i in range(n_layers): model.add(layers.recurrent.LSTM(K[i], activation='relu', W_regularizer=regularizers.WeightRegularizer(l1=0, l2=0), W_constraint=w_cons, stateful=True if n_layers > 1 else False # return_sequences=True )) model.add(layers.normalization.BatchNormalization()) model.add(Dropout(D[i+1])) model.add(layers.Dense(output_dim, init='normal', activation=act_output)) model.compile(loss=loss, optimizer=optimizer, metrics=metrics_nn) return model
def RC_CNN(lr1, lr2, input_len, input_channel=4, nb_filter=32, dense_dim=512, output_len=1, \ filter_len=11, pool_len=2, dropout=0.2, nDense=1, nConv=1): print("Using revcomp weight sharing methods.") model = keras.models.Sequential() #first layer w_reg = kr.WeightRegularizer(l1=lr1, l2=lr2) model.add( keras.layers.convolutional.RevCompConv1D(input_shape=(input_len, input_channel), nb_filter=nb_filter, activation='relu', border_mode='same', init='glorot_uniform', W_regularizer=w_reg, filter_length=filter_len)) model.add(keras.layers.normalization.RevCompConv1DBatchNorm()) model.add(keras.layers.core.Activation("relu")) #second layer if nConv > 1: for i in range(nConv - 1): model.add( keras.layers.convolutional.RevCompConv1D( nb_filter=nb_filter, border_mode='same', filter_length=filter_len)) model.add(keras.layers.normalization.RevCompConv1DBatchNorm()) model.add(keras.layers.core.Activation("relu")) #weighted sum layer model.add(keras.layers.pooling.MaxPooling1D(pool_length=pool_len)) model.add( keras.layers.convolutional.WeightedSum1D( symmetric=False, input_is_revcomp_conv=True, bias=False, init="fanintimesfanouttimestwo")) model.add( keras.layers.core.DenseAfterRevcompWeightedSum(output_dim=dense_dim, activation='linear', init='glorot_uniform', bias=True)) model.add(keras.layers.core.Activation("relu")) if nDense > 1: for i in range(nDense - 1): model.add( keras.layers.core.Dense(output_dim=dense_dim, activation='linear', init='glorot_uniform', bias=True)) model.add(keras.layers.core.Activation("relu")) model.add(Dropout(dropout)) model.add( keras.layers.core.Dense(output_dim=output_len, activation='sigmoid', init='glorot_uniform', bias=True)) return model
def seq_and_HM_CNN(lr1, lr2, input_len_seq,input_channel_seq, input_len_hm, input_channel_hm, nb_filter_seq=[20], \ nb_filter_hm=[20], dense_dim=[128], output_len=1, filter_len_seq=6,filter_len_hm=6, pool_len=2, dropout=0.2, \ nDense=1,nConv=1, add_seq=True, add_hm=True): print("Using seq_and_HM_CNN methods.") if add_seq: seq_model = keras.models.Sequential() w_reg_seq = kr.WeightRegularizer(l1=lr1, l2=lr2) seq_model.add( keras.layers.convolutional.Convolution1D( input_shape=(input_len_seq, input_channel_seq), nb_filter=nb_filter_seq[0], activation='relu', border_mode='same', init='glorot_uniform', W_regularizer=w_reg_seq, filter_length=filter_len_seq)) seq_model.add(keras.layers.core.Activation("relu")) seq_model.add(keras.layers.pooling.MaxPooling1D(pool_length=pool_len)) if len(nb_filter_seq) > 1: for nb_filter in nb_filter_seq[1:]: seq_model.add( keras.layers.convolutional.Convolution1D( nb_filter=nb_filter, border_mode='same', filter_length=filter_len_seq)) seq_model.add( keras.layers.pooling.MaxPooling1D(pool_length=pool_len)) seq_model.add(keras.layers.core.Flatten()) if add_hm == 1: hm_model = keras.models.Sequential() w_reg_hm = kr.WeightRegularizer(l1=lr1, l2=lr2) hm_model.add( keras.layers.convolutional.Convolution1D( input_shape=(input_len_hm, input_channel_hm), nb_filter=nb_filter_hm[0], activation='relu', border_mode='same', init='glorot_uniform', W_regularizer=w_reg_hm, filter_length=filter_len_hm)) hm_model.add(keras.layers.core.Activation("relu")) hm_model.add(keras.layers.pooling.MaxPooling1D(pool_length=pool_len)) if len(nb_filter_hm) > 1: for nb_filter in nb_filter_hm[1:]: hm_model.add( keras.layers.convolutional.Convolution1D( nb_filter=nb_filter, border_mode='same', filter_length=filter_len_hm)) hm_model.add( keras.layers.pooling.MaxPooling1D(pool_length=pool_len)) hm_model.add(keras.layers.core.Flatten()) if add_seq and add_hm: merged = keras.models.Merge([seq_model, hm_model], mode='concat') model = keras.models.Sequential() model.add(merged) elif add_seq: model = seq_model else: model = hm_model for dd in dense_dim: model.add( keras.layers.core.Dense(output_dim=dd, activation='linear', init='glorot_uniform', bias=True)) model.add(keras.layers.core.Activation("relu")) model.add(Dropout(dropout)) model.add( keras.layers.core.Dense(output_dim=2, activation='softmax', init='glorot_uniform', bias=True))
def _replicate_model(self, input): w_reg = kr.WeightRegularizer(l1=self.l1_decay, l2=self.l2_decay) x = kl.Dense(256, init=self.init, W_regularizer=w_reg)(input) x = kl.Activation(self.act_replicate)(x) return km.Model(input, x)
def _runner(layer_class): """ All the recurrent layers share the same interface, so we can run through them with a single function. """ # check return_sequences layer_test(layer_class, kwargs={ 'output_dim': output_dim, 'return_sequences': True }, input_shape=(3, 2, 3)) # check dropout layer_test(layer_class, kwargs={ 'output_dim': output_dim, 'dropout_U': 0.1, 'dropout_W': 0.1 }, input_shape=(3, 2, 3)) # check implementation modes for mode in ['cpu', 'mem', 'gpu']: layer_test(layer_class, kwargs={ 'output_dim': output_dim, 'consume_less': mode }, input_shape=(3, 2, 3)) # check statefulness model = Sequential() model.add( embeddings.Embedding(embedding_num, embedding_dim, mask_zero=True, input_length=timesteps, batch_input_shape=(nb_samples, timesteps))) layer = layer_class(output_dim, return_sequences=False, stateful=True, weights=None) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones((nb_samples, timesteps))) assert (out1.shape == (nb_samples, output_dim)) # train once so that the states change model.train_on_batch(np.ones((nb_samples, timesteps)), np.ones((nb_samples, output_dim))) out2 = model.predict(np.ones((nb_samples, timesteps))) # if the state is not reset, output should be different assert (out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones((nb_samples, timesteps))) assert (out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones((nb_samples, timesteps))) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones((nb_samples, timesteps))) assert (out4.max() != out5.max()) # Check masking layer.reset_states() left_padded_input = np.ones((nb_samples, timesteps)) left_padded_input[0, :1] = 0 left_padded_input[1, :2] = 0 left_padded_input[2, :3] = 0 out6 = model.predict(left_padded_input) layer.reset_states() right_padded_input = np.ones((nb_samples, timesteps)) right_padded_input[0, -1:] = 0 right_padded_input[1, -2:] = 0 right_padded_input[2, -3:] = 0 out7 = model.predict(right_padded_input) assert_allclose(out7, out6, atol=1e-5) # check regularizers layer = layer_class(output_dim, return_sequences=False, weights=None, batch_input_shape=(nb_samples, timesteps, embedding_dim), W_regularizer=regularizers.WeightRegularizer(l1=0.01), U_regularizer=regularizers.WeightRegularizer(l1=0.01), b_regularizer='l2') shape = (nb_samples, timesteps, embedding_dim) layer.set_input(K.variable(np.ones(shape)), shape=shape) K.eval(layer.output)
def test_recurrent_convolutional(): nb_row = 3 nb_col = 3 nb_filter = 5 nb_samples = 2 input_channel = 2 input_nb_row = 5 input_nb_col = 5 sequence_len = 2 for dim_ordering in ['th', 'tf']: if dim_ordering == 'th': input = np.random.rand(nb_samples, sequence_len, input_channel, input_nb_row, input_nb_col) else: # tf input = np.random.rand(nb_samples, sequence_len, input_nb_row, input_nb_col, input_channel) for return_sequences in [True, False]: # test for ouptput shape: output = layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={ 'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'border_mode': "same" }, input_shape=input.shape) output_shape = [nb_samples, input_nb_row, input_nb_col] if dim_ordering == 'th': output_shape.insert(1, nb_filter) else: output_shape.insert(3, nb_filter) if return_sequences: output_shape.insert(1, sequence_len) assert output.shape == tuple(output_shape) # No need to check statefulness for both if dim_ordering == 'th' or return_sequences: continue # Tests for statefulness model = Sequential() kwargs = { 'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'stateful': True, 'batch_input_shape': input.shape, 'border_mode': "same" } layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(input)) assert (out1.shape == tuple(output_shape)) # train once so that the states change model.train_on_batch(np.ones_like(input), np.ones_like(output)) out2 = model.predict(np.ones_like(input)) # if the state is not reset, output should be different assert (out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(input)) assert (out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(input)) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(input)) assert (out4.max() != out5.max()) # check regularizers kwargs = { 'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'stateful': True, 'batch_input_shape': input.shape, 'W_regularizer': regularizers.WeightRegularizer(l1=0.01), 'U_regularizer': regularizers.WeightRegularizer(l1=0.01), 'b_regularizer': 'l2', 'border_mode': "same" } layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.build(input.shape) output = layer(K.variable(np.ones(input.shape))) K.eval(output) # check dropout layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={ 'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, 'nb_row': nb_row, 'nb_col': nb_col, 'border_mode': "same", 'dropout_W': 0.1, 'dropout_U': 0.1 }, input_shape=input.shape)
def create_model(input_shape, num_first_filter, num_growth, depth, output_dim, dropout_rate): if (depth - 4) % 3 != 0: raise ValueError('Depth must be 3N + 4. depth: {}'.format(depth)) num_layers_of_each_block = (depth - 4) / 3 regularizer = regularizers.WeightRegularizer(l2=1e-4) img_input = Input(shape=input_shape, name='input') conv1 = Convolution2D(nb_filter=num_first_filter, nb_row=3, nb_col=3, subsample=(1, 1), init="he_normal", border_mode="same", W_regularizer=regularizer, b_regularizer=regularizer)(img_input) input_layers = [img_input, conv1] num_filters = num_first_filter # 1st block output_layers, num_added = layer_block(input_layers, num_layers_of_each_block, num_growth, dropout_rate, regularizer) # transition num_filters += num_added pool = transition_layer(output_layers, num_filters, regularizer) # 2nd block input_layers = [pool] output_layers, num_added = layer_block(input_layers, num_layers_of_each_block, num_growth, dropout_rate, regularizer) # transition num_filters += num_added pool = transition_layer(output_layers, num_filters, regularizer) # 3rd block input_layers = [pool] output_layers, num_added = layer_block(input_layers, num_layers_of_each_block, num_growth, dropout_rate, regularizer) # transition merged = merge(output_layers, mode='concat', concat_axis=1) x = BatchNormalization(mode=0, axis=1)(merged) # print('out ', x, type(x)) # import theano.tensor # print('shape', theano.tensor.shape(x)) x = Activation("relu")(x) x = AveragePooling2D((8, 8))(x) x = Flatten()(x) x = Dense(output_dim, init="he_normal")(x) model = Model(input=img_input, output=x) return model