def test_load_layers(): from keras.layers import ConvLSTM2D, TimeDistributed from keras.layers import Bidirectional, Conv2D, Input from keras.models import Model if K.backend() == 'tensorflow' or K.backend() == 'cntk': inputs = Input(shape=(10, 20, 20, 1)) else: inputs = Input(shape=(10, 1, 20, 20)) td_conv = TimeDistributed(Conv2D(15, (5, 5)))(inputs) bi_conv = Bidirectional(ConvLSTM2D(10, (3, 3)), merge_mode='concat')(td_conv) model = Model(inputs=inputs, outputs=bi_conv) weight_value_tuples = [] # TimeDistributed Conv2D layer # use 'channels_first' data format to check that # the function is being called correctly for Conv2D # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) weight_tensor_td_conv_old = list() weight_tensor_td_conv_old.append(np.zeros((15, 1, 5, 5))) weight_tensor_td_conv_old.append(np.zeros((15, ))) td_conv_layer = model.layers[1] td_conv_layer.layer.data_format = 'channels_first' weight_tensor_td_conv_new = saving.preprocess_weights_for_loading( td_conv_layer, weight_tensor_td_conv_old, original_keras_version='1') symbolic_weights = td_conv_layer.weights assert (len(symbolic_weights) == len(weight_tensor_td_conv_new)) weight_value_tuples += zip(symbolic_weights, weight_tensor_td_conv_new) # Bidirectional ConvLSTM2D layer # old ConvLSTM2D took a list of 12 weight tensors, # returns a list of 3 concatenated larger tensors. weights_bi_conv_old = [] for j in range(2): # bidirectional for i in range(4): weights_bi_conv_old.append(np.zeros((3, 3, 15, 10))) # kernel weights_bi_conv_old.append(np.zeros( (3, 3, 10, 10))) # recurrent kernel weights_bi_conv_old.append(np.zeros((10, ))) # bias bi_convlstm_layer = model.layers[2] weights_bi_conv_new = saving.preprocess_weights_for_loading( bi_convlstm_layer, weights_bi_conv_old, original_keras_version='1') symbolic_weights = bi_convlstm_layer.weights assert (len(symbolic_weights) == len(weights_bi_conv_new)) weight_value_tuples += zip(symbolic_weights, weights_bi_conv_new) K.batch_set_value(weight_value_tuples) assert np.all( K.eval(model.layers[1].weights[0]) == weight_tensor_td_conv_new[0]) assert np.all( K.eval(model.layers[1].weights[1]) == weight_tensor_td_conv_new[1]) assert np.all(K.eval(model.layers[2].weights[0]) == weights_bi_conv_new[0]) assert np.all(K.eval(model.layers[2].weights[1]) == weights_bi_conv_new[1]) assert np.all(K.eval(model.layers[2].weights[2]) == weights_bi_conv_new[2]) assert np.all(K.eval(model.layers[2].weights[3]) == weights_bi_conv_new[3]) assert np.all(K.eval(model.layers[2].weights[4]) == weights_bi_conv_new[4]) assert np.all(K.eval(model.layers[2].weights[5]) == weights_bi_conv_new[5])
def create_model(desired_sample_rate, dilation_depth, nb_stacks): # desired_sample_rate = 4410 nb_output_bins = 4 # nb_filters = 256 nb_filters = 64 # dilation_depth = 9 # # nb_stacks = 1 use_bias = False res_l2 = 0 final_l2 = 0 fragment_length = 488 + compute_receptive_field_( desired_sample_rate, dilation_depth, nb_stacks)[0] fragment_stride = 488 use_skip_connections = True learn_all_outputs = True def residual_block(x): original_x = x # TODO: initalization, regularization? # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet. tanh_out = CausalAtrousConvolution1D(nb_filters, 2, atrous_rate=2**i, border_mode='valid', causal=True, bias=use_bias, name='dilated_conv_%d_tanh_s%d' % (2**i, s), activation='tanh', W_regularizer=l2(res_l2))(x) x = layers.Dropout(0.2)(x) sigm_out = CausalAtrousConvolution1D(nb_filters, 2, atrous_rate=2**i, border_mode='valid', causal=True, bias=use_bias, name='dilated_conv_%d_sigm_s%d' % (2**i, s), activation='sigmoid', W_regularizer=l2(res_l2))(x) x = layers.Merge(mode='mul', name='gated_activation_%d_s%d' % (i, s))([tanh_out, sigm_out]) res_x = layers.Convolution1D(nb_filters, 1, border_mode='same', bias=use_bias, W_regularizer=l2(res_l2))(x) skip_x = layers.Convolution1D(nb_filters, 1, border_mode='same', bias=use_bias, W_regularizer=l2(res_l2))(x) res_x = layers.Merge(mode='sum')([original_x, res_x]) return res_x, skip_x input = Input(shape=(fragment_length, nb_output_bins), name='input_part') out = input skip_connections = [] out = CausalAtrousConvolution1D(nb_filters, 2, atrous_rate=1, border_mode='valid', causal=True, name='initial_causal_conv')(out) for s in range(nb_stacks): for i in range(0, dilation_depth + 1): out, skip_out = residual_block(out) skip_connections.append(skip_out) if use_skip_connections: out = layers.Merge(mode='sum')(skip_connections) out = layers.PReLU()(out) # out = layers.Convolution1D(nb_filter=256, filter_length=1, border_mode='same', # W_regularizer=l2(final_l2))(out) out = layers.Convolution1D(nb_filter=nb_output_bins, filter_length=3, border_mode='same')(out) out = layers.Dropout(0.5)(out) out = layers.PReLU()(out) out = layers.Convolution1D(nb_filter=nb_output_bins, filter_length=3, border_mode='same')(out) if not learn_all_outputs: raise DeprecationWarning( 'Learning on just all outputs is wasteful, now learning only inside receptive field.' ) out = layers.Lambda( lambda x: x[:, -1, :], output_shape=(out._keras_shape[-1], ))( out) # Based on gif in deepmind blog: take last output? # out = layers.Activation('softmax', name="output_softmax")(out) out = layers.PReLU()(out) # out = layers.Activation('sigmoid', name="output_sigmoid")(out) out = layers.Flatten()(out) predictions = layers.Dense(919, activation='sigmoid', name='fc1')(out) model = Model(input, predictions) # x = model.output # x = layers.Flatten()(x) # # x = layers.Dense(output_dim=1024)(x) # # x = layers.PReLU()(x) # # x = layers.Dropout(0.5)(x) # # x = layers.Dense(output_dim=919)(x) # # x = layers.Activation('sigmoid')(x) # model = Model(input=model.input, output=predictions) receptive_field, receptive_field_ms = compute_receptive_field_( desired_sample_rate, dilation_depth, nb_stacks) _log.info('Receptive Field: %d (%dms)' % (receptive_field, int(receptive_field_ms))) return model
def InceptionResNetV2_Multitask(self, params): assert len( params['INPUTS'].keys()) == 1, 'Number of inputs must be one.' assert params['INPUTS'][params['INPUTS'].keys( )[0]]['type'] == 'raw-image', 'Input must be of type "raw-image".' self.ids_inputs = params['INPUTS'].keys() self.ids_outputs = params['OUTPUTS'].keys() input_shape = params['INPUTS'][params['INPUTS'].keys() [0]]['img_size_crop'] image = Input(name=self.ids_inputs[0], shape=input_shape) ################################################## # Load Inception model pre-trained on ImageNet self.model = InceptionResNetV2(weights='imagenet', input_tensor=image) #for layer in self.model.layers: # layer.trainable = False # Recover input layer #image = self.model.get_layer(self.ids_inputs[0]).output # Recover last layer kept from original model: 'fc2' x = self.model.get_layer('avg_pool').output ################################################## #x = Flatten()(x) # Define outputs outputs_list = [] outputs_matching = {} num_classes_matching = {} if 'SORTED_OUTPUTS' in params.keys(): sorted_keys = params['SORTED_OUTPUTS'] else: sorted_keys = [] for k in params['OUTPUTS'].keys(): if params['OUTPUTS'][k]['type'] == 'sigma': sorted_keys.append(k) else: sorted_keys.insert(0, k) #for id_name, data in params['OUTPUTS'].iteritems(): for id_name in sorted_keys: data = params['OUTPUTS'][id_name] # Special output that calculates sigmas for uncertainty loss if data['type'] == 'sigma': match_output = params['OUTPUTS'][id_name]['output_id'] match_act = outputs_matching[match_output] out_sigma = ConcatenateOutputWithSigma( (None, num_classes_matching[match_output] + 1), name_suffix=id_name, name=id_name)(match_act) outputs_list.append(out_sigma) else: # Count the number of output classes num_classes = 0 with open(params['DATA_ROOT_PATH'] + '/' + data['classes'], 'r') as f: for line in f: num_classes += 1 if data['type'] == 'binary' and params['EMPTY_LABEL'] == True: num_classes += 1 # empty label # Define only a FC output layer (+ activation) per output out = Dense(num_classes)(x) out_act = Activation(data['activation'], name=id_name)(out) outputs_list.append(out_act) outputs_matching[id_name] = out_act num_classes_matching[id_name] = num_classes self.model = Model(input=image, output=outputs_list)
def test_node_construction(): #################################################### # test basics a = Input(shape=(32, ), name='input_a') b = Input(shape=(32, ), name='input_b') assert a._keras_shape == (None, 32) a_layer, a_node_index, a_tensor_index = a._keras_history b_layer, b_node_index, b_tensor_index = b._keras_history assert len(a_layer.inbound_nodes) == 1 assert a_tensor_index is 0 node = a_layer.inbound_nodes[a_node_index] assert node.outbound_layer == a_layer assert type(node.inbound_layers) is list assert node.inbound_layers == [] assert type(node.input_tensors) is list assert node.input_tensors == [a] assert type(node.input_masks) is list assert node.input_masks == [None] assert type(node.input_shapes) is list assert node.input_shapes == [(None, 32)] assert type(node.output_tensors) is list assert node.output_tensors == [a] assert type(node.output_shapes) is list assert node.output_shapes == [(None, 32)] assert type(node.output_masks) is list assert node.output_masks == [None] dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) assert len(dense.inbound_nodes) == 2 assert len(dense.outbound_nodes) == 0 assert dense.inbound_nodes[0].inbound_layers == [a_layer] assert dense.inbound_nodes[0].outbound_layer == dense assert dense.inbound_nodes[1].inbound_layers == [b_layer] assert dense.inbound_nodes[1].outbound_layer == dense assert dense.inbound_nodes[0].input_tensors == [a] assert dense.inbound_nodes[1].input_tensors == [b] # test layer properties test_layer = Dense(16, name='test_layer') a_test = test_layer(a) assert test_layer.input == a assert test_layer.output == a_test assert test_layer.input_mask is None assert test_layer.output_mask is None assert test_layer.input_shape == (None, 32) assert test_layer.output_shape == (None, 16) with pytest.raises(Exception): dense.input with pytest.raises(Exception): dense.output with pytest.raises(Exception): dense.input_mask with pytest.raises(Exception): dense.output_mask assert dense.get_input_at(0) == a assert dense.get_input_at(1) == b assert dense.get_output_at(0) == a_2 assert dense.get_output_at(1) == b_2 assert dense.get_input_shape_at(0) == (None, 32) assert dense.get_input_shape_at(1) == (None, 32) assert dense.get_output_shape_at(0) == (None, 16) assert dense.get_output_shape_at(1) == (None, 16) assert dense.get_input_mask_at(0) is None assert dense.get_input_mask_at(1) is None assert dense.get_output_mask_at(0) is None assert dense.get_output_mask_at(1) is None
def get_model_3d(kwargs): base_filters = kwargs['base_filters'] gpus = kwargs['numgpu'] loss = kwargs['loss'] numchannel = int(len(kwargs['modalities'])) inputs = Input((None, None, None, int(numchannel))) if kwargs['model'] == 'inception': conv1 = Conv3D(base_filters * 8, (3, 3, 3), activation='relu', padding='same', strides=(1, 1, 1))(inputs) conv2 = Conv3D(base_filters * 8, (3, 3, 3), activation='relu', padding='same', strides=(1, 1, 1))(conv1) inception1 = Inception3d(conv2, base_filters) inception2 = Inception3d(inception1, base_filters) inception3 = Inception3d(inception2, base_filters) convconcat1 = Conv3D(base_filters * 4, (3, 3, 3), activation='relu', padding='same', strides=(1, 1, 1))(inception3) final = Conv3D(base_filters * 4, (3, 3, 3), activation='relu', padding='same', strides=(1, 1, 1))(convconcat1) elif kwargs['model'] == 'unet': final = Unet3D(inputs, base_filters) elif kwargs['model'] == 'vnet': final = Vnet3D(inputs, base_filters) elif kwargs['model'] == 'fpn' or kwargs['model'] == 'panopticfpn': reg = 0.0001 f1, f2, f3, f4, _ = FPN3D(inputs, base_filters, reg) elif kwargs['model'] == 'densenet': final = DenseNet3D(inputs,base_filters) else: sys.exit('Model must be inception/unet/vnet/fpn.') if kwargs['model'] != 'fpn' and kwargs['model'] != 'panopticfpn': if loss == 'bce' or loss == 'dice' or loss == 'focal': final = Conv3D(1, (3, 3, 3), activation='sigmoid', padding='same', strides=(1, 1, 1))(final) else: final = Conv3D(1, (3, 3, 3), activation='relu', padding='same', strides=(1, 1, 1))(final) model = Model(inputs=inputs, outputs=final,name='some_unique_name') else: if kwargs['model'] == 'panopticfpn': if loss == 'bce' or loss == 'dice' or loss == 'focal': # Generate the semantic segmentation branch of panoptic FPN on top of feature extraction backbone # Upsampling stages for F4 # U1 f4 = BatchNormalization(axis=-1)(f4) f4 = Activation('relu')(f4) f4 = UpSampling3D(size=(2, 2, 2), name='F4_U1')(f4) # U2 f4 = Conv3D(base_filters*4, (3, 3, 3), padding='same', strides=(1, 1, 1), kernel_regularizer=l2(reg))(f4) f4 = BatchNormalization(axis=-1)(f4) f4 = Activation('relu')(f4) f4 = UpSampling3D(size=(2, 2, 2), name='F4_U2')(f4) # U3 f4 = Conv3D(base_filters*4, (3, 3, 3), padding='same', strides=(1, 1, 1), kernel_regularizer=l2(reg))(f4) f4 = BatchNormalization(axis=-1)(f4) f4 = Activation('relu')(f4) f4 = UpSampling3D(size=(2, 2, 2), name='F4_U3')(f4) # Prepare f4 = Conv3D(base_filters*4, (3, 3, 3), padding='same', strides=(1, 1, 1), kernel_regularizer=l2(reg))(f4) f4 = BatchNormalization(axis=-1)(f4) f4 = Activation('relu')(f4) # Upsampling stages for F3 # U1 f3 = BatchNormalization(axis=-1)(f3) f3 = Activation('relu')(f3) f3 = UpSampling3D(size=(2, 2, 2), name='F3_U1')(f3) # U2 f3 = Conv3D(base_filters*4, (3, 3, 3), padding='same', strides=(1, 1, 1), kernel_regularizer=l2(reg))(f3) f3 = BatchNormalization(axis=-1)(f3) f3 = Activation('relu')(f3) f3 = UpSampling3D(size=(2, 2, 2), name='F3_U2')(f3) # Prepare f3 = Conv3D(base_filters*4, (3, 3, 3), padding='same', strides=(1, 1, 1), kernel_regularizer=l2(reg))(f3) f3 = BatchNormalization(axis=-1)(f3) f3 = Activation('relu')(f3) # Upsampling stages for F2 # U1 f2 = BatchNormalization(axis=-1)(f2) f2 = Activation('relu')(f2) f2 = UpSampling3D(size=(2, 2, 2), name='F2_U1')(f2) # Prepare f2 = Conv3D(base_filters*4, (3, 3, 3), padding='same', strides=(1, 1, 1), kernel_regularizer=l2(reg))(f2) f2 = BatchNormalization(axis=-1)(f2) f2 = Activation('relu')(f2) # Prepare F1 f1 = BatchNormalization(axis=-1)(f1) f1 = Activation('relu')(f1) f3 = Add()([f4, f3]) f2 = Add()([f3, f2]) f1 = Add()([f2, f1]) f1 = Conv3D(base_filters*4, (3, 3, 3), padding='same', strides=(1,1,1), kernel_regularizer=l2(reg))(f1) f1 = BatchNormalization(axis=-1)(f1) f1 = Activation('relu')(f1) final = Conv3D(1, (3, 3, 3), activation='sigmoid', padding='same', strides=(1, 1, 1), name='Level1')(f1) else: sys.exit('Loss function for Panoptic FPN must be BCE, Dice, or Focal.') elif kwargs['model'] == 'fpn': if loss == 'bce' or loss == 'dice' or loss == 'focal': f1 = Conv3D(1, (3, 3, 3), activation='sigmoid', padding='same', strides=(1, 1, 1), name='Level1')(f1) f2 = Conv3D(1, (3, 3, 3), activation='sigmoid', padding='same', strides=(1, 1, 1), name='Level2')(f2) f3 = Conv3D(1, (3, 3, 3), activation='sigmoid', padding='same', strides=(1, 1, 1), name='Level3')(f3) f4 = Conv3D(1, (3, 3, 3), activation='sigmoid', padding='same', strides=(1, 1, 1), name='Level4')(f4) else: f1 = Conv3D(1, (3, 3, 3), activation='relu', padding='same', strides=(1, 1, 1))(f1) f2 = Conv3D(1, (3, 3, 3), activation='relu', padding='same', strides=(1, 1, 1))(f2) f3 = Conv3D(1, (3, 3, 3), activation='relu', padding='same', strides=(1, 1, 1))(f3) f4 = Conv3D(1, (3, 3, 3), activation='relu', padding='same', strides=(1, 1, 1))(f4) model = Model(inputs=inputs, outputs=final,name='some_unique_name') #print(model.summary()) return model
def test_multi_input_layer(): #################################################### # test multi-input layer a = Input(shape=(32, ), name='input_a') b = Input(shape=(32, ), name='input_b') dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) merged = merge([a_2, b_2], mode='concat', name='merge') assert merged._keras_shape == (None, 16 * 2) merge_layer, merge_node_index, merge_tensor_index = merged._keras_history assert merge_node_index == 0 assert merge_tensor_index == 0 assert len(merge_layer.inbound_nodes) == 1 assert len(merge_layer.outbound_nodes) == 0 assert len(merge_layer.inbound_nodes[0].input_tensors) == 2 assert len(merge_layer.inbound_nodes[0].inbound_layers) == 2 c = Dense(64, name='dense_2')(merged) d = Dense(5, name='dense_3')(c) model = Model(input=[a, b], output=[c, d], name='model') assert len(model.layers) == 6 print('model.input_layers:', model.input_layers) print('model.input_layers_node_indices:', model.input_layers_node_indices) print('model.input_layers_tensor_indices:', model.input_layers_tensor_indices) print('model.output_layers', model.output_layers) print('output_shape:', model.get_output_shape_for([(None, 32), (None, 32)])) assert model.get_output_shape_for([(None, 32), (None, 32)]) == [(None, 64), (None, 5)] assert model.compute_mask([a, b], [None, None]) == [None, None] print('output_shape:', model.get_output_shape_for([(None, 32), (None, 32)])) assert model.get_output_shape_for([(None, 32), (None, 32)]) == [(None, 64), (None, 5)] # we don't check names of first 2 layers (inputs) because # ordering of same-level layers is not fixed print('layers:', [layer.name for layer in model.layers]) assert [l.name for l in model.layers ][2:] == ['dense_1', 'merge', 'dense_2', 'dense_3'] print('input_layers:', [l.name for l in model.input_layers]) assert [l.name for l in model.input_layers] == ['input_a', 'input_b'] print('output_layers:', [l.name for l in model.output_layers]) assert [l.name for l in model.output_layers] == ['dense_2', 'dense_3'] # actually run model fn = K.function(model.inputs, model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)] # test get_source_inputs print(get_source_inputs(c)) assert get_source_inputs(c) == [a, b] # serialization / deserialization json_config = model.to_json() recreated_model = model_from_json(json_config) recreated_model.compile('rmsprop', 'mse') print('recreated:') print([layer.name for layer in recreated_model.layers]) print([layer.name for layer in recreated_model.input_layers]) print([layer.name for layer in recreated_model.output_layers]) assert [l.name for l in recreated_model.layers ][2:] == ['dense_1', 'merge', 'dense_2', 'dense_3'] assert [l.name for l in recreated_model.input_layers] == ['input_a', 'input_b'] assert [l.name for l in recreated_model.output_layers] == ['dense_2', 'dense_3'] fn = K.function(recreated_model.inputs, recreated_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)]
def test_functional_guide(): # MNIST from keras.layers import Input, Dense, LSTM from keras.models import Model from keras.utils import np_utils # this returns a tensor inputs = Input(shape=(784, )) # a layer instance is callable on a tensor, and returns a tensor x = Dense(64, activation='relu')(inputs) x = Dense(64, activation='relu')(x) predictions = Dense(10, activation='softmax')(x) # this creates a model that includes # the Input layer and three Dense layers model = Model(input=inputs, output=predictions) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) # the data, shuffled and split between tran and test sets X_train = np.random.random((100, 784)) Y_train = np.random.random((100, 10)) model.fit(X_train, Y_train, nb_epoch=2, batch_size=128) assert model.inputs == [inputs] assert model.outputs == [predictions] assert model.input == inputs assert model.output == predictions assert model.input_shape == (None, 784) assert model.output_shape == (None, 10) # try calling the sequential model inputs = Input(shape=(784, )) new_outputs = model(inputs) new_model = Model(input=inputs, output=new_outputs) new_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) ################################################## # multi-io ################################################## tweet_a = Input(shape=(4, 25)) tweet_b = Input(shape=(4, 25)) # this layer can take as input a matrix # and will return a vector of size 64 shared_lstm = LSTM(64) # when we reuse the same layer instance # multiple times, the weights of the layer # are also being reused # (it is effectively *the same* layer) encoded_a = shared_lstm(tweet_a) encoded_b = shared_lstm(tweet_b) # we can then concatenate the two vectors: merged_vector = merge([encoded_a, encoded_b], mode='concat', concat_axis=-1) # and add a logistic regression on top predictions = Dense(1, activation='sigmoid')(merged_vector) # we define a trainable model linking the # tweet inputs to the predictions model = Model(input=[tweet_a, tweet_b], output=predictions) model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy']) data_a = np.random.random((1000, 4, 25)) data_b = np.random.random((1000, 4, 25)) labels = np.random.random((1000, )) model.fit([data_a, data_b], labels, nb_epoch=1) model.summary() assert model.inputs == [tweet_a, tweet_b] assert model.outputs == [predictions] assert model.input == [tweet_a, tweet_b] assert model.output == predictions assert model.output == predictions assert model.input_shape == [(None, 4, 25), (None, 4, 25)] assert model.output_shape == (None, 1) assert shared_lstm.get_output_at(0) == encoded_a assert shared_lstm.get_output_at(1) == encoded_b assert shared_lstm.input_shape == (None, 4, 25)
def test_model_with_input_feed_tensor(): """We test building a model with a TF variable as input. We should be able to call fit, evaluate, predict, by only passing them data for the placeholder inputs in the model. """ import tensorflow as tf input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) model.summary() optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] model.compile(optimizer, loss, metrics=['mean_squared_error'], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch(input_b_np, [output_a_np, output_b_np]) out = model.train_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.test_on_batch({'input_b': input_b_np}, [output_a_np, output_b_np]) out = model.predict_on_batch({'input_b': input_b_np}) # test fit out = model.fit({'input_b': input_b_np}, [output_a_np, output_b_np], epochs=1, batch_size=10) out = model.fit(input_b_np, [output_a_np, output_b_np], epochs=1, batch_size=10) # test evaluate out = model.evaluate({'input_b': input_b_np}, [output_a_np, output_b_np], batch_size=10) out = model.evaluate(input_b_np, [output_a_np, output_b_np], batch_size=10) # test predict out = model.predict({'input_b': input_b_np}, batch_size=10) out = model.predict(input_b_np, batch_size=10) assert len(out) == 2 # Now test a model with a single input # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Same, without learning phase # i.e. we don't pass any data to fit the model. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) model = Model(a, a_2) model.summary() optimizer = 'rmsprop' loss = 'mse' model.compile(optimizer, loss, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, output_a_np) out = model.train_on_batch(None, output_a_np) out = model.test_on_batch(None, output_a_np) out = model.predict_on_batch(None) out = model.train_on_batch([], output_a_np) out = model.train_on_batch({}, output_a_np) # test fit out = model.fit(None, output_a_np, epochs=1, batch_size=10) out = model.fit(None, output_a_np, epochs=1, batch_size=10) # test evaluate out = model.evaluate(None, output_a_np, batch_size=10) out = model.evaluate(None, output_a_np, batch_size=10) # test predict out = model.predict(None, steps=3) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4)
def test_model_with_external_loss(): # None loss, only regularization loss. a = Input(shape=(3, ), name='input_a') a_2 = Dense(4, name='dense_1', kernel_regularizer='l1', bias_regularizer='l2')(a) dp = Dropout(0.5, name='dropout') a_3 = dp(a_2) model = Model(a, [a_2, a_3]) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) input_a_np = np.random.random((10, 3)) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # No dropout, external loss. a = Input(shape=(3, ), name='input_a') a_2 = Dense(4, name='dense_1')(a) a_3 = Dense(4, name='dense_2')(a) model = Model(a, [a_2, a_3]) model.add_loss(K.mean(a_3 + a_2)) optimizer = 'rmsprop' loss = None model.compile(optimizer, loss, metrics=['mae']) # test train_on_batch out = model.train_on_batch(input_a_np, None) out = model.test_on_batch(input_a_np, None) # fit out = model.fit(input_a_np, None) # evaluate out = model.evaluate(input_a_np, None) # Test fit with no external data at all. if K.backend() == 'tensorflow': import tensorflow as tf a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_2 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_2) model = Model(a, a_2) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # define a generator to produce x=None and y=None def data_tensors_generator(): while True: yield (None, None) generator = data_tensors_generator() # test fit_generator for framework-native data tensors out = model.fit_generator(generator, epochs=1, steps_per_epoch=3) # test evaluate_generator for framework-native data tensors out = model.evaluate_generator(generator, steps=3) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert out.shape == (10 * 3, 4) # Test multi-output model without external data. a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) a_1 = Dense(4, name='dense_1')(a) a_2 = Dropout(0.5, name='dropout')(a_1) model = Model(a, [a_1, a_2]) model.add_loss(K.mean(a_2)) model.compile(optimizer='rmsprop', loss=None, metrics=['mean_squared_error']) # test train_on_batch out = model.train_on_batch(None, None) out = model.test_on_batch(None, None) out = model.predict_on_batch(None) # test fit with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, batch_size=10) out = model.fit(None, None, epochs=1, steps_per_epoch=1) # test fit with validation data with pytest.raises(ValueError): out = model.fit(None, None, epochs=1, steps_per_epoch=None, validation_steps=2) out = model.fit(None, None, epochs=1, steps_per_epoch=2, validation_steps=2) # test evaluate with pytest.raises(ValueError): out = model.evaluate(None, None, batch_size=10) out = model.evaluate(None, None, steps=3) # test predict with pytest.raises(ValueError): out = model.predict(None, batch_size=10) out = model.predict(None, steps=3) assert len(out) == 2 assert out[0].shape == (10 * 3, 4) assert out[1].shape == (10 * 3, 4)
def test_model_custom_target_tensors(): a = Input(shape=(3, ), name='input_a') b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) y = K.placeholder([10, 4], name='y') y1 = K.placeholder([10, 3], name='y1') y2 = K.placeholder([7, 5], name='y2') model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] # test list of target tensors with pytest.raises(ValueError): model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors=[y, y1, y2]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors=[y, y1]) input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], { y: np.random.random((10, 4)), y1: np.random.random((10, 3)) }) # test dictionary of target_tensors with pytest.raises(ValueError): model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors={'does_not_exist': y2}) # test dictionary of target_tensors model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None, target_tensors={ 'dense_1': y, 'dropout': y1 }) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], { y: np.random.random((10, 4)), y1: np.random.random((10, 3)) }) if K.backend() == 'tensorflow': import tensorflow as tf # test with custom TF placeholder as target pl_target_a = tf.placeholder('float32', shape=(None, 4)) model.compile(optimizer='rmsprop', loss='mse', target_tensors={'dense_1': pl_target_a}) model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np])
def test_pandas_dataframe(): input_a = Input(shape=(3, ), name='input_a') input_b = Input(shape=(3, ), name='input_b') x = Dense(4, name='dense_1')(input_a) y = Dense(3, name='desne_2')(input_b) model_1 = Model(inputs=input_a, outputs=x) model_2 = Model(inputs=[input_a, input_b], outputs=[x, y]) optimizer = 'rmsprop' loss = 'mse' model_1.compile(optimizer=optimizer, loss=loss) model_2.compile(optimizer=optimizer, loss=loss) input_a_df = pd.DataFrame(np.random.random((10, 3))) input_b_df = pd.DataFrame(np.random.random((10, 3))) output_a_df = pd.DataFrame(np.random.random((10, 4))) output_b_df = pd.DataFrame(np.random.random((10, 3))) model_1.fit(input_a_df, output_a_df) model_2.fit([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.fit([input_a_df], [output_a_df]) model_1.fit({'input_a': input_a_df}, output_a_df) model_2.fit({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df]) model_1.predict(input_a_df) model_2.predict([input_a_df, input_b_df]) model_1.predict([input_a_df]) model_1.predict({'input_a': input_a_df}) model_2.predict({'input_a': input_a_df, 'input_b': input_b_df}) model_1.predict_on_batch(input_a_df) model_2.predict_on_batch([input_a_df, input_b_df]) model_1.predict_on_batch([input_a_df]) model_1.predict_on_batch({'input_a': input_a_df}) model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df}) model_1.evaluate(input_a_df, output_a_df) model_2.evaluate([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.evaluate([input_a_df], [output_a_df]) model_1.evaluate({'input_a': input_a_df}, output_a_df) model_2.evaluate({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df]) model_1.train_on_batch(input_a_df, output_a_df) model_2.train_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.train_on_batch([input_a_df], [output_a_df]) model_1.train_on_batch({'input_a': input_a_df}, output_a_df) model_2.train_on_batch({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df]) model_1.test_on_batch(input_a_df, output_a_df) model_2.test_on_batch([input_a_df, input_b_df], [output_a_df, output_b_df]) model_1.test_on_batch([input_a_df], [output_a_df]) model_1.test_on_batch({'input_a': input_a_df}, output_a_df) model_2.test_on_batch({ 'input_a': input_a_df, 'input_b': input_b_df }, [output_a_df, output_b_df])
def test_model_methods(): a = Input(shape=(3, ), name='input_a') b = Input(shape=(3, ), name='input_b') a_2 = Dense(4, name='dense_1')(a) dp = Dropout(0.5, name='dropout') b_2 = dp(b) model = Model([a, b], [a_2, b_2]) optimizer = 'rmsprop' loss = 'mse' loss_weights = [1., 0.5] input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) # training/testing doesn't work before compiling. with pytest.raises(RuntimeError): model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) # test train_on_batch out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.train_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # test fit out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, epochs=1, batch_size=4) # test validation_split out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5) # test validation data out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np], epochs=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np])) out = model.fit({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }, epochs=1, batch_size=4, validation_split=0.5, validation_data=({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np })) # test_on_batch out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, [output_a_np, output_b_np]) out = model.test_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }, { 'dense_1': output_a_np, 'dropout': output_b_np }) # predict_on_batch out = model.predict_on_batch([input_a_np, input_b_np]) out = model.predict_on_batch({ 'input_a': input_a_np, 'input_b': input_b_np }) # predict, evaluate input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # with sample_weight input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) sample_weight = [None, np.random.random((10, ))] out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) # test accuracy metric model.compile(optimizer, loss, metrics=['acc'], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 5 # this should also work model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # and this as well model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == 4 # test starting from non-zero initial epoch trained_epochs = [] trained_batches = [] # define tracer callback def on_epoch_begin(epoch, logs): trained_epochs.append(epoch) def on_batch_begin(batch, logs): trained_batches.append(batch) tracker_cb = LambdaCallback(on_epoch_begin=on_epoch_begin, on_batch_begin=on_batch_begin) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=5, batch_size=4, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test starting from non-zero initial epoch for generator too trained_epochs = [] def gen_data(batch_sz): while True: yield ([ np.random.random((batch_sz, 3)), np.random.random((batch_sz, 3)) ], [ np.random.random((batch_sz, 4)), np.random.random((batch_sz, 3)) ]) out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5, initial_epoch=2, callbacks=[tracker_cb]) assert trained_epochs == [2, 3, 4] # test with a custom metric function def mse(y_true, y_pred): return K.mean(K.pow(y_true - y_pred, 2)) model.compile(optimizer, loss, metrics=[mse], sample_weight_mode=None) out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) out_len = 1 + 2 * (1 + 1) # total loss + 2 outputs * (loss + metric) assert len(out) == out_len out = model.test_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np]) assert len(out) == out_len input_a_np = np.random.random((10, 3)) input_b_np = np.random.random((10, 3)) output_a_np = np.random.random((10, 4)) output_b_np = np.random.random((10, 3)) out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4, epochs=1) out = model.evaluate([input_a_np, input_b_np], [output_a_np, output_b_np], batch_size=4) out = model.predict([input_a_np, input_b_np], batch_size=4) # enable verbose for evaluate_generator out = model.evaluate_generator(gen_data(4), steps=3, verbose=1) # empty batch with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.evaluate_generator(gen_data(), steps=1) # x is not a list of numpy arrays. with pytest.raises(ValueError): out = model.predict([None]) # x does not match _feed_input_names. with pytest.raises(ValueError): out = model.predict([input_a_np, None, input_b_np]) with pytest.raises(ValueError): out = model.predict([None, input_a_np, input_b_np]) # all input/output/weight arrays should have the same number of samples. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np[:2]], [output_a_np, output_b_np], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np[:2]], sample_weight=sample_weight) with pytest.raises(ValueError): out = model.train_on_batch( [input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[sample_weight[1], sample_weight[1][:2]]) # `sample_weight` is neither a dict nor a list. with pytest.raises(TypeError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=tuple(sample_weight)) # `validation_data` is neither a tuple nor a triple. with pytest.raises(ValueError): out = model.fit([input_a_np, input_b_np], [output_a_np, output_b_np], epochs=1, batch_size=4, validation_data=([input_a_np, input_b_np], )) # `loss` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss=['mse', 'mae', 'mape']) # `loss_weights` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5}) # `loss_weights` does not match outputs. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', loss_weights=[0.5]) # `loss_weights` is invalid type. with pytest.raises(TypeError): model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5)) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'lstm': 'temporal'}) # `sample_weight_mode` does not match output_names. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode=['temporal']) # `sample_weight_mode` matches output_names partially. with pytest.raises(ValueError): model.compile(optimizer, loss='mse', sample_weight_mode={'dense_1': 'temporal'}) # `loss` does not exist. with pytest.raises(ValueError): model.compile(optimizer, loss=[]) model.compile(optimizer, loss=['mse', 'mae']) model.compile(optimizer, loss='mse', loss_weights={ 'dense_1': 0.2, 'dropout': 0.8 }) model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8]) # the rank of weight arrays should be 1. with pytest.raises(ValueError): out = model.train_on_batch( [input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=[None, np.random.random((10, 20, 30))]) model.compile(optimizer, loss='mse', sample_weight_mode={ 'dense_1': None, 'dropout': 'temporal' }) model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal']) # the rank of output arrays should be at least 3D. with pytest.raises(ValueError): out = model.train_on_batch([input_a_np, input_b_np], [output_a_np, output_b_np], sample_weight=sample_weight) model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, sample_weight_mode=None) trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), steps_per_epoch=3, epochs=5, initial_epoch=0, validation_data=RandomSequence(4), validation_steps=3, callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(3)) * 5 # steps_per_epoch will be equal to len of sequence if it's unspecified trained_epochs = [] trained_batches = [] out = model.fit_generator(generator=RandomSequence(3), epochs=5, initial_epoch=0, validation_data=RandomSequence(4), callbacks=[tracker_cb]) assert trained_epochs == [0, 1, 2, 3, 4] assert trained_batches == list(range(12)) * 5 # fit_generator will throw an exception if steps is unspecified for regular generator with pytest.raises(ValueError): def gen_data(): while True: yield (np.asarray([]), np.asarray([])) out = model.fit_generator(generator=gen_data(), epochs=5, initial_epoch=0, validation_data=gen_data(), callbacks=[tracker_cb]) # Check if generator is only accessed an expected number of times gen_counters = [0, 0] def gen_data(i): while True: gen_counters[i] += 1 yield ([np.random.random((1, 3)), np.random.random((1, 3))], [np.random.random((1, 4)), np.random.random((1, 3))]) out = model.fit_generator(generator=gen_data(0), epochs=3, steps_per_epoch=2, validation_data=gen_data(1), validation_steps=1, max_queue_size=2, workers=2) # Need range check here as filling of the queue depends on sleep in the enqueuers assert 6 <= gen_counters[0] <= 8 # 12 = (epoch * workers * validation steps * max_queue_size) assert 3 <= gen_counters[1] <= 12 gen_counters = [0] out = model.fit_generator(generator=RandomSequence(3), epochs=3, validation_data=gen_data(0), validation_steps=1, max_queue_size=2, workers=2) # 12 = (epoch * workers * validation steps * max_queue_size) # Need range check here as filling of the queue depends on sleep in the enqueuers assert 3 <= gen_counters[0] <= 12 # predict_generator output shape behavior should be consistent def expected_shape(batch_size, n_batches): return (batch_size * n_batches, 4), (batch_size * n_batches, 3) # Multiple outputs and one step. batch_size = 5 sequence_length = 1 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Multiple outputs and multiple steps. batch_size = 5 sequence_length = 2 shape_0, shape_1 = expected_shape(batch_size, sequence_length) out = model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 # Create a model with a single output. single_output_model = Model([a, b], a_2) single_output_model.compile(optimizer, loss, metrics=[], sample_weight_mode=None) # Single output and one step. batch_size = 5 sequence_length = 1 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0 # Single output and multiple steps. batch_size = 5 sequence_length = 2 shape_0, _ = expected_shape(batch_size, sequence_length) out = single_output_model.predict_generator( RandomSequence(batch_size, sequence_length=sequence_length)) assert np.shape(out) == shape_0
def build_lstm(self, output_dim, X): def max_1d(X): return keras_backend.max(X, axis=1) nb_filter = self.nb_filter loss_function = "categorical_crossentropy" maxpool = Lambda(max_1d, output_shape=(nb_filter, )) # embedded = Embedding(X.shape[0], X.shape[1], input_length=self.MAX_LENGTH)(sequence) # 4 convolution layers (each 1000 filters) conv_filters = [] S = Input(shape=[X.shape[1], X.shape[2]]) for filters in [3, 5, 7]: filtermodel = Convolution1D(nb_filter=nb_filter, filter_length=filters, border_mode="same", activation="relu", subsample_length=1)(S) # poollayer = MaxPooling1D(pool_size=2, strides=None, padding='valid')(filtermodel) # poollayer = maxpool(filtermodel) # print filtermodel.output_shape # keras.layers.pooling.MaxPooling1D(pool_size=2, strides=None, padding='valid') # maxpoollayer = maxpool(filtermodel) # print filtermodel.output_shape conv_filters.append(filtermodel) # cnn = [Convolution1D(filter_length=filters, nb_filter=10, border_mode="same") for filters in [2, 3, 5, 7]] # concatenate # merged_cnn = merge([cnn(embedded) for cnn in cnn], mode="concat") print type(conv_filters) print isinstance(conv_filters, list) print len(conv_filters) # self.model = Concatenate(conv_filters) # self.model.add(Concatenate()(conv_filters)) # # self.model.add(Reshape((1, self.model.output_shape[1]))) # # print self.model.output_shape # self.model.add(Attention(LSTM(units=64, dropout=0.2, recurrent_dropout=0.2))) # # print self.model.output_shape # # self.model.add(Reshape((1, self.model.output_shape[1]))) # # print self.model.output_shape # self.model.add(Bidirectional(LSTM(units=64, dropout=0.2, recurrent_dropout=0.2))) # # softmax output layer # self.model.add(Dense(output_dim=output_dim, activation="softmax")) #### # the complete omdel merged_cnn = Concatenate()(conv_filters) # self.model.add(Reshape((1, self.model.output_shape[1]))) # print self.model.output_shape # attentlstm = Attention(LSTM(units=64, dropout=0.2, recurrent_dropout=0.2))(conv_filters) # print self.model.output_shape # self.model.add(Reshape((1, self.model.output_shape[1]))) # print self.model.output_shape blstm = Bidirectional( LSTM(units=128, dropout=0.2, recurrent_dropout=0.2))(merged_cnn) # softmax output layer dense = Dense(output_dim=output_dim, activation="softmax")(blstm) # the complete omdel # try using different optimizers and different optimizer configs self.model = Model(input=S, output=dense) # load_weights from the checkpoint model # self.model.load_weights('./checkpoitn_models/lstm/weights.01.hdf5') rmsprop = optimizers.RMSprop(lr=0.01, decay=0.05) self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) return self.model
############################################################################# # MODEL SETUP start_time = time.time() orig_img_rows, orig_img_cols = 420, 580 img_rows, img_cols = 128, 192 img_channels = 1 blocks_per_group = 4 nb_total_blocks = 5 * blocks_per_group with tf.device('/gpu:0'): images = Input(shape=(img_rows, img_cols, img_channels)) x = Convolution2D(8, 3, 3, subsample=(1, 1), init='he_normal', border_mode='same', dim_ordering='tf')(images) x = BatchNormalization(axis=3)(x) x = Activation('relu')(x) for i in range(0, blocks_per_group): nb_filters = 8 x = stochastic_depth_residual_block(x, nb_filters=nb_filters,
def unet_model_2d(input_shape, n_labels, batch_normalization=False, initial_learning_rate=0.00001, metrics=m.dice_coefficient): """ input_shape:without batch_size,(img_height,img_width,img_depth) metrics: """ inputs = Input(input_shape) down_layer = [] layer = inputs # down_layer_1 layer = res_block_v2(layer, 64, batch_normalization=batch_normalization) down_layer.append(layer) layer = MaxPooling2D(pool_size=[2, 2], strides=[2, 2])(layer) print(str(layer.get_shape())) # down_layer_2 layer = res_block_v2(layer, 128, batch_normalization=batch_normalization) down_layer.append(layer) layer = MaxPooling2D(pool_size=[2, 2], strides=[2, 2])(layer) print(str(layer.get_shape())) # down_layer_3 layer = res_block_v2(layer, 256, batch_normalization=batch_normalization) down_layer.append(layer) layer = MaxPooling2D(pool_size=[2, 2], strides=[2, 2])(layer) print(str(layer.get_shape())) # down_layer_4 layer = res_block_v2(layer, 512, batch_normalization=batch_normalization) down_layer.append(layer) layer = MaxPooling2D(pool_size=[2, 2], strides=[2, 2])(layer) print(str(layer.get_shape())) # bottle_layer layer = res_block_v2(layer, 1024, batch_normalization=batch_normalization) print(str(layer.get_shape())) # up_layer_4 layer = up_and_concate(layer, down_layer[3]) layer = res_block_v2(layer, 512, batch_normalization=batch_normalization) print(str(layer.get_shape())) # up_layer_3 layer = up_and_concate(layer, down_layer[2]) layer = res_block_v2(layer, 256, batch_normalization=batch_normalization) print(str(layer.get_shape())) # up_layer_2 layer = up_and_concate(layer, down_layer[1]) layer = res_block_v2(layer, 128, batch_normalization=batch_normalization) print(str(layer.get_shape())) # up_layer_1 layer = up_and_concate(layer, down_layer[0]) layer = res_block_v2(layer, 64, batch_normalization=batch_normalization) print(str(layer.get_shape())) # score_layer layer = Conv2D(n_labels, [1, 1], strides=[1, 1])(layer) print(str(layer.get_shape())) # softmax layer = Activation('softmax')(layer) print(str(layer.get_shape())) outputs = layer model = Model(inputs=inputs, outputs=outputs) metrics = [metrics] model = multi_gpu_model(model, gpus=4) model.compile(optimizer=Adam(lr=initial_learning_rate), loss=m.dice_coefficient_loss, metrics=metrics) return model
def main(): parser = argparse.ArgumentParser( description='Train and test Keras embeddings') parser.add_argument('-d', '--data', help='Path to text files to train on', required=True) parser.add_argument( '-e', '--embeddings', help='Where to save embeddings (default: embeddings.npz)', default='embeddings.npz') parser.add_argument( '-v', '--vocab', help='Where to save vocabulary map (default: map.json)', default='map.json') parser.add_argument('-m', '--demo', help='Download some demo data', action='store_true', default=False) parser.add_argument('-n', '--no-train', dest='train', help='Don\'t train embeddings', action='store_false', default=True) parser.add_argument('-t', '--tokens', dest='print_tokens', help='Print tokens', action='store_true', default=False) options = parser.parse_args() if options.demo: import urllib2 mach = 'http://www.gutenberg.org/cache/epub/1232/pg1232.txt' save_path = os.path.join(options.data, 'machiavelli.txt') if not os.path.exists(save_path): print('Downloading demo data...') with open(save_path, 'w') as f: response = urllib2.urlopen(mach) f.write(response.read()) # variable arguments are passed to gensim's word2vec model if options.train: print('Training Word2Vec...') create_embeddings(options.data, options.embeddings, options.vocab, size=100, min_count=5, window=5, sg=1, iter=25) word2idx, idx2word = load_vocab(options.vocab) if options.print_tokens: print('Tokens:', ', '.join(word2idx.keys())) # cosine similarity model print('Building model...') input_a = Input(shape=(1, ), dtype='int32', name='input_a') input_b = Input(shape=(1, ), dtype='int32', name='input_b') embeddings = word2vec_embedding_layer(options.embeddings) embedding_a = embeddings(input_a) embedding_b = embeddings(input_b) similarity = merge([embedding_a, embedding_b], mode='cos', dot_axes=2) model = Model(input=[input_a, input_b], output=similarity) model.compile(optimizer='sgd', loss='mse') # optimizer and loss don't matter while True: word_a = raw_input('First word: ') if word_a not in word2idx: print('"%s" is not in the index' % word_a) continue word_b = raw_input('Second word: ') if word_b not in word2idx: print('"%s" is not in the index' % word_b) continue output = model.predict( [np.asarray([word2idx[word_a]]), np.asarray([word2idx[word_b]])]) print('%f' % output)
def testEmbeddingLayer20NewsGroup(self): """ Test Keras 'Embedding' layer returned by 'get_embedding_layer' function for a smaller version of the 20NewsGroup classification problem. """ MAX_SEQUENCE_LENGTH = 1000 # Prepare text samples and their labels # Processing text dataset texts = [] # list of text samples texts_w2v = [] # used to train the word embeddings labels = [] # list of label ids data = fetch_20newsgroups( subset='train', categories=['alt.atheism', 'comp.graphics', 'sci.space']) for index in range(len(data)): label_id = data.target[index] file_data = data.data[index] i = file_data.find('\n\n') # skip header if i > 0: file_data = file_data[i:] try: curr_str = str(file_data) sentence_list = curr_str.split('\n') for sentence in sentence_list: sentence = (sentence.strip()).lower() texts.append(sentence) texts_w2v.append(sentence.split(' ')) labels.append(label_id) except Exception: pass # Vectorize the text samples into a 2D integer tensor tokenizer = Tokenizer() tokenizer.fit_on_texts(texts) sequences = tokenizer.texts_to_sequences(texts) # word_index = tokenizer.word_index data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH) labels = to_categorical(np.asarray(labels)) x_train = data y_train = labels # prepare the embedding layer using the wrapper keras_w2v = self.model_twenty_ng keras_w2v.build_vocab(texts_w2v) keras_w2v.train(texts, total_examples=keras_w2v.corpus_count, epochs=keras_w2v.iter) keras_w2v_wv = keras_w2v.wv embedding_layer = keras_w2v_wv.get_keras_embedding() # create a 1D convnet to solve our classification task sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences = embedding_layer(sequence_input) x = Conv1D(128, 5, activation='relu')(embedded_sequences) x = MaxPooling1D(5)(x) x = Conv1D(128, 5, activation='relu')(x) x = MaxPooling1D(5)(x) x = Conv1D(128, 5, activation='relu')(x) x = MaxPooling1D(35)(x) # global max pooling x = Flatten()(x) x = Dense(128, activation='relu')(x) preds = Dense(y_train.shape[1], activation='softmax')(x) model = Model(sequence_input, preds) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) fit_ret_val = model.fit(x_train, y_train, epochs=1) # verify the type of the object returned after training # value returned is a `History` instance. # Its `history` attribute contains all information collected during training. self.assertTrue(type(fit_ret_val) == keras.callbacks.History)
def get_model(name, X_train, y_train, embeddings, batch_size, nb_epoch, max_len, max_features, nb_classes=17): print('Building model', name) # get correct loss loss_function = 'binary_crossentropy' if name == 'LSTM+ATT': # this is the placeholder tensor for the input sequences sequence = Input(shape=(max_len, ), dtype='int32') # this embedding layer will transform the sequences of integers # into vectors of size 128 embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings])(sequence) # 4 convolution layers (each 1000 filters) cnn = [ Convolution1D(filter_length=filters, nb_filter=1000, border_mode='same') for filters in [2, 3, 5, 7] ] # concatenate question = merge([cnn(embedded) for cnn in cnn], mode='concat') # create attention vector from max-pooled convoluted maxpool = Lambda( lambda x: keras_backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) attention_vector = maxpool(question) forwards = AttentionLSTM(64, attention_vector)(embedded) backwards = AttentionLSTM(64, attention_vector, go_backwards=True)(embedded) # concatenate the outputs of the 2 LSTMs answer_rnn = merge([forwards, backwards], mode='concat', concat_axis=-1) after_dropout = Dropout(0.5)(answer_rnn) # we have 17 classes output = Dense(nb_classes, activation='sigmoid')(after_dropout) model = Model(input=sequence, output=output) # try using different optimizers and different optimizer configs model.compile('adam', loss_function, metrics=[loss_function]) # model.compile('adam', 'hinge', metrics=['hinge']) print("Layers: ", model.layers) for layer in model.layers: if isinstance(layer, AttentionLSTM): print(type(layer.attention_vec)) # print('Attention vector shape:', layer.attention_vec.shape) -- doesn't print anything... piece of sh*t model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.1, verbose=1) return model if name == 'LSTM': # this is the placeholder tensor for the input sequences sequence = Input(shape=(max_len, ), dtype='int32') # this embedding layer will transform the sequences of integers # into vectors of size 128 embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings])(sequence) # apply forwards and backward LSTM forwards = LSTM(64)(embedded) backwards = LSTM(64, go_backwards=True)(embedded) # concatenate the outputs of the 2 LSTMs answer_rnn = merge([forwards, backwards], mode='concat', concat_axis=-1) after_dropout = Dropout(0.5)(answer_rnn) # we have 17 classes output = Dense(nb_classes, activation='sigmoid')(after_dropout) model = Model(input=sequence, output=output) # try using different optimizers and different optimizer configs model.compile('adam', loss_function, metrics=[loss_function]) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.1, verbose=0) return model if name == 'MLP': model = Sequential() model.add(Dense(512, input_shape=(max_len, ))) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss=loss_function, optimizer='adam', metrics=[loss_function]) model.fit(X_train, y_train, nb_epoch=nb_epoch, batch_size=batch_size, validation_split=0.1, verbose=0) return model
def test_recursion(): #################################################### # test recursion a = Input(shape=(32, ), name='input_a') b = Input(shape=(32, ), name='input_b') dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) merged = merge([a_2, b_2], mode='concat', name='merge') c = Dense(64, name='dense_2')(merged) d = Dense(5, name='dense_3')(c) model = Model(input=[a, b], output=[c, d], name='model') e = Input(shape=(32, ), name='input_e') f = Input(shape=(32, ), name='input_f') g, h = model([e, f]) # g2, h2 = model([e, f]) assert g._keras_shape == c._keras_shape assert h._keras_shape == d._keras_shape # test separate manipulation of different layer outputs i = Dense(7, name='dense_4')(h) final_model = Model(input=[e, f], output=[i, g], name='final') assert len(final_model.inputs) == 2 assert len(final_model.outputs) == 2 assert len(final_model.layers) == 4 # we don't check names of first 2 layers (inputs) because # ordering of same-level layers is not fixed print('final_model layers:', [layer.name for layer in final_model.layers]) assert [layer.name for layer in final_model.layers][2:] == ['model', 'dense_4'] print(model.compute_mask([e, f], [None, None])) assert model.compute_mask([e, f], [None, None]) == [None, None] print(final_model.get_output_shape_for([(10, 32), (10, 32)])) assert final_model.get_output_shape_for([(10, 32), (10, 32)]) == [(10, 7), (10, 64)] # run recursive model fn = K.function(final_model.inputs, final_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] # test serialization model_config = final_model.get_config() print(json.dumps(model_config, indent=4)) recreated_model = Model.from_config(model_config) fn = K.function(recreated_model.inputs, recreated_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] #################################################### # test multi-input multi-output j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) o = Input(shape=(32, ), name='input_o') p = Input(shape=(32, ), name='input_p') q, r = model([o, p]) assert n._keras_shape == (None, 5) assert q._keras_shape == (None, 64) s = merge([n, q], mode='concat', name='merge_nq') assert s._keras_shape == (None, 64 + 5) # test with single output as 1-elem list multi_io_model = Model([j, k, o, p], [s]) fn = K.function(multi_io_model.inputs, multi_io_model.outputs) fn_outputs = fn([ np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)) ]) assert [x.shape for x in fn_outputs] == [(10, 69)] # test with single output as tensor multi_io_model = Model([j, k, o, p], s) fn = K.function(multi_io_model.inputs, multi_io_model.outputs) fn_outputs = fn([ np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)) ]) # note that the output of the K.function will still be a 1-elem list assert [x.shape for x in fn_outputs] == [(10, 69)] # test serialization print('multi_io_model.layers:', multi_io_model.layers) print('len(model.inbound_nodes):', len(model.inbound_nodes)) print('len(model.outbound_nodes):', len(model.outbound_nodes)) model_config = multi_io_model.get_config() print(model_config) print(json.dumps(model_config, indent=4)) recreated_model = Model.from_config(model_config) fn = K.function(recreated_model.inputs, recreated_model.outputs) fn_outputs = fn([ np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)) ]) # note that the output of the K.function will still be a 1-elem list assert [x.shape for x in fn_outputs] == [(10, 69)] config = model.get_config() new_model = Model.from_config(config) model.summary() json_str = model.to_json() new_model = model_from_json(json_str) yaml_str = model.to_yaml() new_model = model_from_yaml(yaml_str) #################################################### # test invalid graphs # input is not an Input tensor j = Input(shape=(32, ), name='input_j') j = Dense(32)(j) k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(Exception): invalid_model = Model([j, k], [m, n]) # disconnected graph j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(Exception) as e: invalid_model = Model([j], [m, n]) # redudant outputs j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) # this should work lol # TODO: raise a warning invalid_model = Model([j, k], [m, n, n]) # redundant inputs j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(Exception): invalid_model = Model([j, k, j], [m, n]) # i have not idea what I'm doing: garbage as inputs/outputs j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(Exception): invalid_model = Model([j, k], [m, n, 0]) #################################################### # test calling layers/models on TF tensors if K._BACKEND == 'tensorflow': import tensorflow as tf j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) tf_model = Model([j, k], [m, n]) # magic j_tf = tf.placeholder(dtype=K.floatx()) k_tf = tf.placeholder(dtype=K.floatx()) m_tf, n_tf = tf_model([j_tf, k_tf]) assert not hasattr(m_tf, '_keras_shape') assert not hasattr(n_tf, '_keras_shape') assert K.int_shape(m_tf) == (None, 64) assert K.int_shape(n_tf) == (None, 5) # test merge o_tf = merge([j_tf, k_tf], mode='concat', concat_axis=1)
def isensee2017_3D(n_labels,shape,W,lr=1e-5, n_base_filters=16, depth=4, dropout_rate=0.3, n_segmentation_levels=3, optimizer=Adam, initial_learning_rate=9e-4, loss_function=weighted_dice_coefficient_loss, activation_name="sigmoid"): """ This function builds a model proposed by Isensee et al. for the BRATS 2017 competition: https://www.cbica.upenn.edu/sbia/Spyridon.Bakas/MICCAI_BraTS/MICCAI_BraTS_2017_proceedings_shortPapers.pdf This network is highly similar to the model proposed by Kayalibay et al. "CNN-based Segmentation of Medical Imaging Data", 2017: https://arxiv.org/pdf/1701.03056.pdf :param input_shape: :param n_base_filters: :param depth: :param dropout_rate: :param n_segmentation_levels: :param n_labels: :param optimizer: :param initial_learning_rate: :param loss_function: :param activation_name: :return: """ inputs = Input(shape) current_layer = inputs level_output_layers = list() level_filters = list() for level_number in range(depth): n_level_filters = (2**level_number) * n_base_filters level_filters.append(n_level_filters) if current_layer is inputs: in_conv = create_convolution_block(current_layer, n_level_filters,activation=LeakyReLU, instance_normalization=False) else: in_conv = create_convolution_block(current_layer, n_level_filters, strides=(2, 2, 2),activation=LeakyReLU, instance_normalization=False) context_output_layer = create_context_module(in_conv, n_level_filters, dropout_rate=dropout_rate) summation_layer = Add()([in_conv, context_output_layer]) level_output_layers.append(summation_layer) current_layer = summation_layer segmentation_layers = list() for level_number in range(depth - 2, -1, -1): up_sampling = create_up_sampling_module(current_layer, level_filters[level_number]) concatenation_layer = concatenate([level_output_layers[level_number], up_sampling], axis=4) localization_output = create_localization_module(concatenation_layer, level_filters[level_number]) current_layer = localization_output if level_number < n_segmentation_levels: segmentation_layers.insert(0, create_convolution_block(current_layer, n_filters=n_labels, kernel=(1, 1, 1))) # output_layer = current_layer output_layer = None for level_number in reversed(range(n_segmentation_levels)): segmentation_layer = segmentation_layers[level_number] if output_layer is None: output_layer = segmentation_layer else: output_layer = Add()([output_layer, segmentation_layer]) if level_number > 0: output_layer = UpSampling3D(size=(2, 2, 2))(output_layer) activation_block = Activation(activation_name)(output_layer) if n_labels==1: activation_block = Activation('sigmoid')(output_layer) if n_labels>1: final_convolution = Conv3D(n_labels, 1)(output_layer) o = Reshape((shape[0] * shape[1]* shape[2],n_labels), input_shape=(shape[0], shape[1], shape[2],n_labels))(final_convolution) activation_block = Activation('softmax')(o) model = Model(inputs=inputs, outputs=activation_block) if W !='': model.load_weights(W) if n_labels == 1: model.compile(loss='binary_crossentropy',optimizer=Adam(lr=lr),metrics=['accuracy']) if n_labels > 1: model.compile(loss='categorical_crossentropy',optimizer=Adam(lr=lr),metrics=['categorical_accuracy']) model.summary() return model
def test_sequential_regression(): from keras.models import Sequential, Model from keras.layers import Merge, Embedding, BatchNormalization, LSTM, InputLayer, Input # start with a basic example of using a Sequential model # inside the functional API seq = Sequential() seq.add(Dense(input_dim=10, output_dim=10)) x = Input(shape=(10, )) y = seq(x) model = Model(x, y) model.compile('rmsprop', 'mse') weights = model.get_weights() # test serialization config = model.get_config() model = Model.from_config(config) model.compile('rmsprop', 'mse') model.set_weights(weights) # more advanced model with multiple branches branch_1 = Sequential(name='branch_1') branch_1.add( Embedding(input_dim=100, output_dim=10, input_length=2, name='embed_1')) branch_1.add(LSTM(32, name='lstm_1')) branch_2 = Sequential(name='branch_2') branch_2.add(Dense(32, input_shape=(8, ), name='dense_2')) branch_3 = Sequential(name='branch_3') branch_3.add(Dense(32, input_shape=(6, ), name='dense_3')) branch_1_2 = Sequential([Merge([branch_1, branch_2], mode='concat')], name='branch_1_2') branch_1_2.add(Dense(16, name='dense_1_2-0')) # test whether impromtu input_shape breaks the model branch_1_2.add(Dense(16, input_shape=(16, ), name='dense_1_2-1')) model = Sequential([Merge([branch_1_2, branch_3], mode='concat')], name='final') model.add(Dense(16, name='dense_final')) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model.summary() x = (100 * np.random.random((100, 2))).astype('int32') y = np.random.random((100, 8)) z = np.random.random((100, 6)) labels = np.random.random((100, 16)) model.fit([x, y, z], labels, nb_epoch=1) # test if Sequential can be called in the functional API a = Input(shape=(2, ), dtype='int32') b = Input(shape=(8, )) c = Input(shape=(6, )) o = model([a, b, c]) outer_model = Model([a, b, c], o) outer_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) outer_model.fit([x, y, z], labels, nb_epoch=1) # test serialization config = outer_model.get_config() outer_model = Model.from_config(config) outer_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) outer_model.fit([x, y, z], labels, nb_epoch=1)
def seg(feature_num=128, timesteps=256, multi_grid_layer_n=1, multi_grid_n=3, input_channel=1, prog=False, out_class=2): layer_out = [] input_score = Input(shape=(timesteps, feature_num, input_channel), name="input_score_48") en = Conv2D(2**5, (7, 7), strides=(1, 1), padding="same")(input_score) layer_out.append(en) en_l1 = conv_block(en, 2**5, (3, 3), strides=(2, 2)) en_l1 = conv_block(en_l1, 2**5, (3, 3), strides=(1, 1)) layer_out.append(en_l1) en_l2 = conv_block(en_l1, 2**6, (3, 3), strides=(2, 2)) en_l2 = conv_block(en_l2, 2**6, (3, 3), strides=(1, 1)) en_l2 = conv_block(en_l2, 2**6, (3, 3), strides=(1, 1)) layer_out.append(en_l2) en_l3 = conv_block(en_l2, 2**7, (3, 3), strides=(2, 2)) en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1)) en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1)) en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1)) layer_out.append(en_l3) en_l4 = conv_block(en_l3, 2**8, (3, 3), strides=(2, 2)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) layer_out.append(en_l4) feature = en_l4 for i in range(multi_grid_layer_n): feature = BatchNormalization()(Activation("relu")(feature)) feature = Dropout(0.3)(feature) m = BatchNormalization()(Conv2D(2**9, (1, 1), strides=(1, 1), padding="same", activation="relu")(feature)) multi_grid = m for ii in range(multi_grid_n): m = BatchNormalization()(Conv2D(2**9, (3, 3), strides=(1, 1), dilation_rate=2**ii, padding="same", activation="relu")(feature)) multi_grid = concatenate([multi_grid, m]) multi_grid = Dropout(0.3)(multi_grid) feature = Conv2D(2**9, (1, 1), strides=(1, 1), padding="same")(multi_grid) layer_out.append(feature) feature = BatchNormalization()(Activation("relu")(feature)) feature = Conv2D(2**8, (1, 1), strides=(1, 1), padding="same")(feature) feature = add([feature, en_l4]) de_l1 = transpose_conv_block(feature, 2**7, (3, 3), strides=(2, 2)) layer_out.append(de_l1) skip = de_l1 de_l1 = BatchNormalization()(Activation("relu")(de_l1)) de_l1 = concatenate( [de_l1, BatchNormalization()(Activation("relu")(en_l3))]) de_l1 = Dropout(0.4)(de_l1) de_l1 = Conv2D(2**7, (1, 1), strides=(1, 1), padding="same")(de_l1) de_l1 = add([de_l1, skip]) de_l2 = transpose_conv_block(de_l1, 2**6, (3, 3), strides=(2, 2)) layer_out.append(de_l2) skip = de_l2 de_l2 = BatchNormalization()(Activation("relu")(de_l2)) de_l2 = concatenate( [de_l2, BatchNormalization()(Activation("relu")(en_l2))]) de_l2 = Dropout(0.4)(de_l2) de_l2 = Conv2D(2**6, (1, 1), strides=(1, 1), padding="same")(de_l2) de_l2 = add([de_l2, skip]) de_l3 = transpose_conv_block(de_l2, 2**5, (3, 3), strides=(2, 2)) layer_out.append(de_l3) skip = de_l3 de_l3 = BatchNormalization()(Activation("relu")(de_l3)) de_l3 = concatenate( [de_l3, BatchNormalization()(Activation("relu")(en_l1))]) de_l3 = Dropout(0.4)(de_l3) de_l3 = Conv2D(2**5, (1, 1), strides=(1, 1), padding="same")(de_l3) de_l3 = add([de_l3, skip]) de_l4 = transpose_conv_block(de_l3, 2**5, (3, 3), strides=(2, 2)) layer_out.append(de_l4) de_l4 = BatchNormalization()(Activation("relu")(de_l4)) de_l4 = Dropout(0.4)(de_l4) out = Conv2D(out_class, (1, 1), strides=(1, 1), padding="same", name='prediction')(de_l4) if (prog): model = Model(inputs=input_score, outputs=layer_out) else: model = Model(inputs=input_score, outputs=out) return model
def unet_model_3d(input_shape, pool_size=(2, 2, 2), n_labels=1, initial_learning_rate=0.00001, deconvolution=False, depth=4, n_base_filters=32, include_label_wise_dice_coefficients=False, metrics=dice_coefficient, batch_normalization=False, activation_name="sigmoid"): """ Builds the 3D UNet Keras model.f :param metrics: List metrics to be calculated during model training (default is dice coefficient). :param include_label_wise_dice_coefficients: If True and n_labels is greater than 1, model will report the dice coefficient for each label as metric. :param n_base_filters: The number of filters that the first layer in the convolution network will have. Following layers will contain a multiple of this number. Lowering this number will likely reduce the amount of memory required to train the model. :param depth: indicates the depth of the U-shape for the model. The greater the depth, the more max pooling layers will be added to the model. Lowering the depth may reduce the amount of memory required for training. :param input_shape: Shape of the input data (n_chanels, x_size, y_size, z_size). The x, y, and z sizes must be divisible by the pool size to the power of the depth of the UNet, that is pool_size^depth. :param pool_size: Pool size for the max pooling operations. :param n_labels: Number of binary labels that the model is learning. :param initial_learning_rate: Initial learning rate for the model. This will be decayed during training. :param deconvolution: If set to True, will use transpose convolution(deconvolution) instead of up-sampling. This increases the amount memory required during training. :return: Untrained 3D UNet Model """ inputs = Input(input_shape) current_layer = inputs levels = list() # add levels with max pooling for layer_depth in range(depth): layer1 = create_convolution_block( input_layer=current_layer, n_filters=n_base_filters * (2**layer_depth), batch_normalization=batch_normalization) layer2 = create_convolution_block( input_layer=layer1, n_filters=n_base_filters * (2**layer_depth) * 2, batch_normalization=batch_normalization) if layer_depth < depth - 1: current_layer = MaxPooling3D(pool_size=pool_size)(layer2) levels.append([layer1, layer2, current_layer]) else: current_layer = layer2 levels.append([layer1, layer2]) # add levels with up-convolution or up-sampling for layer_depth in range(depth - 2, -1, -1): up_convolution = get_up_convolution( pool_size=pool_size, deconvolution=deconvolution, n_filters=current_layer._keras_shape[1])(current_layer) concat = concatenate([up_convolution, levels[layer_depth][1]], axis=1) current_layer = create_convolution_block( n_filters=levels[layer_depth][1]._keras_shape[1], input_layer=concat, batch_normalization=batch_normalization) current_layer = create_convolution_block( n_filters=levels[layer_depth][1]._keras_shape[1], input_layer=current_layer, batch_normalization=batch_normalization) final_convolution = Conv3D(n_labels, (1, 1, 1))(current_layer) act = Activation(activation_name)(final_convolution) model = Model(inputs=inputs, outputs=act) if not isinstance(metrics, list): metrics = [metrics] if include_label_wise_dice_coefficients and n_labels > 1: label_wise_dice_metrics = [ get_label_dice_coefficient_function(index) for index in range(n_labels) ] if metrics: metrics = metrics + label_wise_dice_metrics else: metrics = label_wise_dice_metrics model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coefficient_loss, metrics=metrics) return model
def seg_pnn(feature_num=128, timesteps=256, multi_grid_layer_n=5, multi_grid_n=3, prev_model="melody_transfer_transpose"): layer_out = [] input_score_48 = Input(shape=(timesteps, feature_num, 1), name="input_score_48") input_score_12 = Input(shape=(timesteps, feature_num // 3, 1), name="input_score_12") me_transfer_seg = seg(multi_grid_layer_n=1, timesteps=timesteps, prog=True) me_seg = load_model(prev_model) model_copy(me_seg, me_transfer_seg) #TODO: move inside model_copy for index, layer in enumerate(me_transfer_seg.layers): me_transfer_seg.layers[index].trainable = False o_p = me_transfer_seg([input_score_12]) en_l = Conv2D(2**5, (7, 7), strides=(1, 1), padding="same")(input_score_48) o = adapter(o_p[0], 2**(5), dropout_rate=0.2) en_l = add([en_l, o]) en_l1 = conv_block(en_l, 2**5, (3, 3), strides=(2, 2)) en_l1 = conv_block(en_l1, 2**5, (3, 3), strides=(1, 1)) layer_out.append(en_l1) o = adapter(o_p[1], 2**(5), dropout_rate=0.2) en_l1 = add([en_l1, o]) en_l2 = conv_block(en_l1, 2**6, (3, 3), strides=(2, 2)) en_l2 = conv_block(en_l2, 2**6, (3, 3), strides=(1, 1)) en_l2 = conv_block(en_l2, 2**6, (3, 3), strides=(1, 1)) layer_out.append(en_l2) o = adapter(o_p[2], 2**(6), dropout_rate=0.2) en_l2 = add([en_l2, o]) en_l3 = conv_block(en_l2, 2**7, (3, 3), strides=(2, 2)) en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1)) en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1)) en_l3 = conv_block(en_l3, 2**7, (3, 3), strides=(1, 1)) layer_out.append(en_l3) o = adapter(o_p[3], 2**(7), dropout_rate=0.2) en_l3 = add([en_l3, o]) en_l4 = conv_block(en_l3, 2**8, (3, 3), strides=(2, 2)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) en_l4 = conv_block(en_l4, 2**8, (3, 3), strides=(1, 1)) layer_out.append(en_l4) o = adapter(o_p[4], 2**(8), dropout_rate=0.2) en_l4 = add([en_l4, o]) feature = en_l4 for i in range(multi_grid_layer_n): feature = BatchNormalization()(Activation("relu")(feature)) feature = Dropout(0.3)(feature) m = BatchNormalization()(Conv2D(2**9, (1, 1), strides=(1, 1), padding="same", activation="relu")(feature)) multi_grid = m for ii in range(multi_grid_n): m = BatchNormalization()(Conv2D(2**9, (3, 3), strides=(1, 1), dilation_rate=2**ii, padding="same", activation="relu")(feature)) multi_grid = concatenate([multi_grid, m]) multi_grid = Dropout(0.3)(multi_grid) feature = Conv2D(2**9, (1, 1), strides=(1, 1), padding="same")(multi_grid) o = adapter(o_p[5], 2**(9), dropout_rate=0.3) feature = add([feature, o]) feature = BatchNormalization()(Activation("relu")(feature)) feature = Dropout(0.4)(feature) feature = Conv2D(2**8, (1, 1), strides=(1, 1), padding="same")(feature) feature = add([feature, layer_out[3]]) de_l1 = transpose_conv_block(feature, 2**7, (3, 3), strides=(2, 2)) o = adapter(o_p[6], 2**(7), kernel_size=(1, 5), dropout_rate=0.4) de_l1 = add([de_l1, o]) skip = de_l1 de_l1 = BatchNormalization()(Activation("relu")(de_l1)) de_l1 = concatenate( [de_l1, BatchNormalization()(Activation("relu")(layer_out[2]))]) de_l1 = Dropout(0.4)(de_l1) de_l1 = Conv2D(2**7, (1, 1), strides=(1, 1), padding="same")(de_l1) de_l1 = add([de_l1, skip]) de_l2 = transpose_conv_block(de_l1, 2**6, (3, 3), strides=(2, 2)) o = adapter(o_p[7], 2**(6), kernel_size=(1, 5), dropout_rate=0.4) de_l2 = add([de_l2, o]) skip = de_l2 de_l2 = BatchNormalization()(Activation("relu")(de_l2)) de_l2 = concatenate( [de_l2, BatchNormalization()(Activation("relu")(layer_out[1]))]) de_l2 = Dropout(0.4)(de_l2) de_l2 = Conv2D(2**6, (1, 1), strides=(1, 1), padding="same")(de_l2) de_l2 = add([de_l2, skip]) de_l3 = transpose_conv_block(de_l2, 2**5, (3, 3), strides=(2, 2)) o = adapter(o_p[8], 2**(5), kernel_size=(1, 5), dropout_rate=0.4) de_l3 = add([de_l3, o]) skip = de_l3 de_l3 = BatchNormalization()(Activation("relu")(de_l3)) de_l3 = concatenate( [de_l3, BatchNormalization()(Activation("relu")(layer_out[0]))]) de_l3 = Dropout(0.4)(de_l3) de_l3 = Conv2D(2**5, (1, 1), strides=(1, 1), padding="same")(de_l3) de_l3 = add([de_l3, skip]) de_l4 = transpose_conv_block(de_l3, 2**5, (3, 3), strides=(2, 2)) o = adapter(o_p[9], 2**(5), kernel_size=(1, 5), dropout_rate=0.4) de_l4 = add([de_l4, o]) de_l4 = BatchNormalization()(Activation("relu")(de_l4)) de_l4 = Dropout(0.4)(de_l4) out = Conv2D(2, (1, 1), strides=(1, 1), padding="same", name='prediction')(de_l4) model = Model(inputs=[input_score_48, input_score_12], outputs=out) return model
def get_unet_3d(vol_x, vol_y, vol_z, chn, optimizer=Adam(lr=0.00001), loss=dice_coef_loss, metrics=[dice_coef]): print('Data format: ' + K.image_data_format()) if K.image_data_format() == 'channels_first': input_dims = (chn, vol_x, vol_y, vol_z) feat_axis = 1 else: input_dims = (vol_x, vol_y, vol_z, chn) feat_axis = 4 # u-net model inputs = Input(shape=input_dims) conv1 = Conv3D(32, (3, 3, 3), activation=None, padding='same')( inputs) #Conv3D --> (filters, kernel_size, ...) conv1 = BatchNormalization(axis=feat_axis, scale=False)(conv1) conv1 = Activation('relu')(conv1) conv1 = Conv3D(64, (3, 3, 3), activation=None, padding='same')(conv1) conv1 = BatchNormalization(axis=feat_axis, scale=False)(conv1) conv1 = Activation('relu')(conv1) pool1 = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2))(conv1) conv2 = Conv3D(64, (3, 3, 3), activation=None, padding='same')(pool1) conv2 = BatchNormalization(axis=feat_axis, scale=False)(conv2) conv2 = Activation('relu')(conv2) conv2 = Conv3D(128, (3, 3, 3), activation=None, padding='same')(conv2) conv2 = BatchNormalization(axis=feat_axis, scale=False)(conv2) conv2 = Activation('relu')(conv2) pool2 = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2))(conv2) conv3 = Conv3D(128, (3, 3, 3), activation=None, padding='same')(pool2) conv3 = BatchNormalization(axis=feat_axis, scale=False)(conv3) conv3 = Activation('relu')(conv3) conv3 = Conv3D(256, (3, 3, 3), activation=None, padding='same')(conv3) conv3 = BatchNormalization(axis=feat_axis, scale=False)(conv3) conv3 = Activation('relu')(conv3) pool3 = MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2))(conv3) conv4 = Conv3D(256, (3, 3, 3), activation=None, padding='same')(pool3) conv4 = BatchNormalization(axis=feat_axis, scale=False)(conv4) conv4 = Activation('relu')(conv4) conv4 = Conv3D(512, (3, 3, 3), activation=None, padding='same')(conv4) conv4 = BatchNormalization(axis=feat_axis, scale=False)(conv4) conv4 = Activation('relu')(conv4) up1 = UpSampling3D(size=(2, 2, 2))(conv4) up1 = Concatenate(axis=feat_axis)([conv3, up1]) upconv1 = Conv3D(256, (3, 3, 3), activation=None, padding='same')(up1) upconv1 = BatchNormalization(axis=feat_axis, scale=False)(upconv1) upconv1 = Activation('relu')(upconv1) upconv1 = Conv3D(256, (3, 3, 3), activation=None, padding='same')(upconv1) upconv1 = BatchNormalization(axis=feat_axis, scale=False)(upconv1) upconv1 = Activation('relu')(upconv1) up2 = UpSampling3D(size=(2, 2, 2))(upconv1) up2 = Concatenate(axis=feat_axis)([conv2, up2]) upconv2 = Conv3D(128, (3, 3, 3), activation=None, padding='same')(up2) upconv2 = BatchNormalization(axis=feat_axis, scale=False)(upconv2) upconv2 = Activation('relu')(upconv2) upconv2 = Conv3D(128, (3, 3, 3), activation=None, padding='same')(upconv2) upconv2 = BatchNormalization(axis=feat_axis, scale=False)(upconv2) upconv2 = Activation('relu')(upconv2) up3 = UpSampling3D(size=(2, 2, 2))(upconv2) up3 = Concatenate(axis=feat_axis)([conv1, up3]) upconv3 = Conv3D(64, (3, 3, 3), activation=None, padding='same')(up3) upconv3 = BatchNormalization(axis=feat_axis, scale=False)(upconv3) upconv3 = Activation('relu')(upconv3) upconv3 = Conv3D(64, (3, 3, 3), activation=None, padding='same')(upconv3) upconv3 = BatchNormalization(axis=feat_axis, scale=False)(upconv3) upconv3 = Activation('relu')(upconv3) conv_final = Conv3D(4, (3, 3, 3), activation='sigmoid', padding='same')(upconv3) model = Model(inputs=inputs, outputs=conv_final) model.summary() model.compile(optimizer=optimizer, loss=loss, metrics=metrics) return model
if (subsumpling): x1 = Convolution2D(nb_filter, 1, 1, border_mode='same', subsample=(2, 2), name=conv_name_base + '2c')(input_tensor) x = merge([x, x1], mode='sum') else: x = merge([x, input_tensor], mode='sum') x = Activation('relu')(x) return x input_shape = X_train.shape[1:] img_input = Input(shape=input_shape) x = Convolution2D(16, 3, 3, border_mode='same', name='conv1')(img_input) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) for i in range(n): x = identity_block(x, 3, 16, stage=2, block=str(i)) for i in range(n): if (i == 0): x = identity_block(x, 3, 32, stage=3, block=str(i), subsumpling=True) else: x = identity_block(x, 3, 32, stage=3, block=str(i)) for i in range(n):
axis=1)) print("Creating data...") babel_data.create("train", 1000000, True) babel_data.create("dev", 100000, False) print("Loading data...") train = data.ParallelReader("train-source.txt", "source-vocab.txt", "", "train-target.txt", "target-vocab.txt", "") dev = data.ParallelReader("dev-source.txt", "source-vocab.txt", "", "dev-target.txt", "target-vocab.txt", "") print("Building model...") # Encoder source = Input(shape=(None, ), dtype='int32', name='source') embedded = Embedding(output_dim=128, input_dim=train.source_vocab_size(), mask_zero=True)(source) last_hid = Bidirectional(LSTM(output_dim=128))(embedded) # Decoder repeated = RepeatVector(train.target.padded.shape[1])(last_hid) decoder = LSTM(output_dim=128, return_sequences=True, name="decoder1")(repeated) decoder = LSTM(output_dim=128, return_sequences=True, name="decoder2")(decoder) output = TimeDistributed( Dense(output_dim=train.target_vocab_size(), activation='softmax'))(decoder) model = Model([source], output=[output]) print("Compiling model...")
def build_model(fragment_length, nb_filters, nb_output_bins, dilation_depth, nb_stacks, use_skip_connections, learn_all_outputs, _log, desired_sample_rate, use_bias, res_l2, final_l2): def residual_block(x): original_x = x # TODO: initalization, regularization? # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet. tanh_out = layers.AtrousConvolution1D(nb_filters, 2, atrous_rate=2**i, border_mode='valid', causal=True, bias=use_bias, name='dilated_conv_%d_tanh_s%d' % (2**i, s), activation='tanh', W_regularizer=l2(res_l2))(x) sigm_out = layers.AtrousConvolution1D(nb_filters, 2, atrous_rate=2**i, border_mode='valid', causal=True, bias=use_bias, name='dilated_conv_%d_sigm_s%d' % (2**i, s), activation='sigmoid', W_regularizer=l2(res_l2))(x) x = layers.Merge(mode='mul', name='gated_activation_%d_s%d' % (i, s))([tanh_out, sigm_out]) res_x = layers.Convolution1D(nb_filters, 1, border_mode='same', bias=use_bias, W_regularizer=l2(res_l2))(x) skip_x = layers.Convolution1D(nb_filters, 1, border_mode='same', bias=use_bias, W_regularizer=l2(res_l2))(x) res_x = layers.Merge(mode='sum')([original_x, res_x]) return res_x, skip_x input = Input(shape=(fragment_length, nb_output_bins), name='input_part') out = input skip_connections = [] out = layers.AtrousConvolution1D(nb_filters, 2, atrous_rate=1, border_mode='valid', causal=True, name='initial_causal_conv')(out) for s in range(nb_stacks): for i in range(0, dilation_depth + 1): out, skip_out = residual_block(out) skip_connections.append(skip_out) if use_skip_connections: out = layers.Merge(mode='sum')(skip_connections) out = layers.Activation('relu')(out) out = layers.Convolution1D(nb_output_bins, 1, border_mode='same', W_regularizer=l2(final_l2))(out) out = layers.Activation('relu')(out) out = layers.Convolution1D(nb_output_bins, 1, border_mode='same')(out) if not learn_all_outputs: raise DeprecationWarning( 'Learning on just all outputs is wasteful, now learning only inside receptive field.' ) out = layers.Lambda( lambda x: x[:, -1, :], output_shape=(out._keras_shape[-1], ))( out) # Based on gif in deepmind blog: take last output? out = layers.Activation('softmax', name="output_softmax")(out) model = Model(input, out) receptive_field, receptive_field_ms = compute_receptive_field() _log.info('Receptive Field: %d (%dms)' % (receptive_field, int(receptive_field_ms))) return model
def InceptionResNetV2_Multitask_OntV2(self, params): assert len( params['INPUTS'].keys()) == 1, 'Number of inputs must be one.' assert params['INPUTS'][params['INPUTS'].keys( )[0]]['type'] == 'raw-image', 'Input must be of type "raw-image".' self.ids_inputs = params['INPUTS'].keys() self.ids_outputs = params['OUTPUTS'].keys() input_shape = params['INPUTS'][params['INPUTS'].keys() [0]]['img_size_crop'] image = Input(name=self.ids_inputs[0], shape=input_shape) ################################################## # Load Inception model pre-trained on ImageNet self.model = InceptionResNetV2(weights='imagenet', input_tensor=image) # Recover last layer kept from original model: 'fc2' x = self.model.get_layer('avg_pool').output ################################################## # Define outputs outputs = [] outputs_list = [] outputs_matching = {} num_classes_matching = {} if 'SORTED_OUTPUTS' in params.keys(): sorted_keys = params['SORTED_OUTPUTS'] else: sorted_keys = [] for k in params['OUTPUTS'].keys(): if params['OUTPUTS'][k]['type'] == 'sigma': sorted_keys.append(k) else: sorted_keys.insert(0, k) num_classes_list = [] for id_name in sorted_keys: data = params['OUTPUTS'][id_name] if data['type'] == 'sigma': continue else: # Count the number of output classes num_classes = 0 with open(params['DATA_ROOT_PATH'] + '/' + data['classes'], 'r') as f: for line in f: num_classes += 1 if data['type'] == 'binary' and params['EMPTY_LABEL'] == True: num_classes += 1 # empty label # Define only a FC output layer per output out = Dense(num_classes)(x) out_pact = Activation(data['activation'])(out) # Activation outputs.append(out_pact) num_classes_list.append(num_classes) n_multiclass = num_classes_list[0] n_multilabel = num_classes_list[1] total_concepts = np.sum(num_classes_list) x = Merge()(outputs) #x = Dense(total_concepts)(x) #x = BatchNormalization()(x) # -1 - 1 x = OntologyLayerV2((None, total_concepts), params["ONTOLOGY"])(x) ont_outputs = [] for idx, num_classes in enumerate(num_classes_list): if idx == 0: init_idx = 0 end_idx = num_classes else: init_idx = end_idx end_idx = end_idx + num_classes out = Lambda(lambda x: x[:, init_idx:end_idx])(x) ont_outputs.append(out) curr_output = 0 for id_name in sorted_keys: data = params['OUTPUTS'][id_name] # Special output that calculates sigmas for uncertainty loss if data['type'] == 'sigma': match_output = params['OUTPUTS'][id_name]['output_id'] match_act = outputs_matching[match_output] out_sigma = ConcatenateOutputWithSigma( (None, num_classes_matching[match_output] + 1), name_suffix=id_name, name=id_name)(match_act) outputs_list.append(out_sigma) else: out = ont_outputs[curr_output] if data['activation'] == "softmax": out_act = Activation(data['activation'], name=id_name)(out) elif data['activation'] == "sigmoid": out_act = Activation(create_relu_advanced(1.0), name=id_name)(out) #out_act = Activation(data['activation'], name=id_name)(out) outputs_list.append(out_act) outputs_matching[id_name] = out_act num_classes_matching[id_name] = num_classes_list[curr_output] curr_output = curr_output + 1 print(len(outputs_list)) self.model = Model(input=image, output=outputs_list)
def _input_batch_input(self): return Input(name=Wav2Letter.InputNames.input_batch, batch_shape=self.predictive_net.input_shape)