def get_answer(self): if self._answer is None: self._answer = Input(shape=(self.config['answer_len'],), dtype='int32', name='answer') return self._answer
def unet_model_3d_multiGPU(input_shape, pool_size=(2, 2, 2), n_labels=1, initial_learning_rate=0.00001, deconvolution=False, depth=4, n_base_filters=32, include_label_wise_dice_coefficients=False, metrics=dice_coefficient, batch_normalization=False, activation_name="sigmoid",GPU=1): """ Builds the 3D UNet Keras model.f :param metrics: List metrics to be calculated during model training (default is dice coefficient). :param include_label_wise_dice_coefficients: If True and n_labels is greater than 1, model will report the dice coefficient for each label as metric. :param n_base_filters: The number of filters that the first layer in the convolution network will have. Following layers will contain a multiple of this number. Lowering this number will likely reduce the amount of memory required to train the model. :param depth: indicates the depth of the U-shape for the model. The greater the depth, the more max pooling layers will be added to the model. Lowering the depth may reduce the amount of memory required for training. :param input_shape: Shape of the input data (n_chanels, x_size, y_size, z_size). The x, y, and z sizes must be divisible by the pool size to the power of the depth of the UNet, that is pool_size^depth. :param pool_size: Pool size for the max pooling operations. :param n_labels: Number of binary labels that the model is learning. :param initial_learning_rate: Initial learning rate for the model. This will be decayed during training. :param deconvolution: If set to True, will use transpose convolution(deconvolution) instead of up-sampling. This increases the amount memory required during training. :return: Untrained 3D UNet Model """ inputs = Input(input_shape) current_layer = inputs levels = list() # add levels with max pooling for layer_depth in range(depth): layer1 = create_convolution_block(input_layer=current_layer, n_filters=n_base_filters*(2**layer_depth), batch_normalization=batch_normalization) layer2 = create_convolution_block(input_layer=layer1, n_filters=n_base_filters*(2**layer_depth)*2, batch_normalization=batch_normalization) if layer_depth < depth - 1: current_layer = MaxPooling3D(pool_size=pool_size)(layer2) levels.append([layer1, layer2, current_layer]) else: current_layer = layer2 levels.append([layer1, layer2]) # add levels with up-convolution or up-sampling for layer_depth in range(depth-2, -1, -1): up_convolution = get_up_convolution(pool_size=pool_size, deconvolution=deconvolution, n_filters=current_layer._keras_shape[1])(current_layer) concat = concatenate([up_convolution, levels[layer_depth][1]], axis=1) current_layer = create_convolution_block(n_filters=levels[layer_depth][1]._keras_shape[1], input_layer=concat, batch_normalization=batch_normalization) current_layer = create_convolution_block(n_filters=levels[layer_depth][1]._keras_shape[1], input_layer=current_layer, batch_normalization=batch_normalization) final_convolution = Conv3D(n_labels, (1, 1, 1))(current_layer) act = Activation(activation_name)(final_convolution) cpu_model = None if (GPU <= 1): print("[INFO] training with 1 GPU...") with tf.device("/GPU:1"): model = Model(inputs=inputs, outputs=act) cpu_model=model else: print("[INFO] training with {} GPUs...".format(GPU)) # we'll store a copy of the model on *every* GPU and then combine # the results from the gradient updates on the CP with tf.device("/cpu:0"): # initialize the model model = Model(inputs=inputs, outputs=act) cpu_model = model # make the model parallel model = multi_gpu_model(model, gpus=GPU) if not isinstance(metrics, list): metrics = [metrics] if include_label_wise_dice_coefficients and n_labels > 1: label_wise_dice_metrics = [get_label_dice_coefficient_function(index) for index in range(n_labels)] if metrics: metrics = metrics + label_wise_dice_metrics else: metrics = label_wise_dice_metrics model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coefficient_loss, metrics=metrics) cpu_model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coefficient_loss, metrics=metrics) # model.__setattr__('callback_model',cpu_model) return cpu_model,model
def unet_model_3d(input_shape, downsize_filters_factor=1, pool_size=(2, 2, 2), n_labels=1, initial_learning_rate=0.01, deconvolution=False): """ Builds the 3D U-Net Keras model. The [U-Net](https://arxiv.org/abs/1505.04597) uses a fully-convolutional architecture consisting of an encoder and a decoder. The encoder is able to capture contextual information while the decoder enables precise localization. Due to the large amount of parameters, the input shape has to be small since for e.g. images of shape 144x144x144 the model already consumes 32 GB of memory. :param input_shape: Shape of the input data (x_size, y_size, z_size, n_channels). :param downsize_filters_factor: Factor to which to reduce the number of filters. Making this value larger will reduce the amount of memory the model will need during training. :param pool_size: Pool size for the max pooling operations. :param n_labels: Number of binary labels that the model is learning. :param initial_learning_rate: Initial learning rate for the model. This will be decayed during training. :param deconvolution: If set to True, will use transpose convolution(deconvolution) instead of upsamping. This increases the amount memory required during training. :return: Untrained 3D UNet Model """ inputs = Input(input_shape) conv1 = Conv3D(int(32 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(inputs) conv1 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv1) pool1 = MaxPooling3D(pool_size=pool_size)(conv1) conv2 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool1) conv2 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv2) pool2 = MaxPooling3D(pool_size=pool_size)(conv2) conv3 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool2) conv3 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv3) print(conv3.shape) pool3 = MaxPooling3D(pool_size=pool_size)(conv3) conv4 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(pool3) conv4 = Conv3D(int(512 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv4) print(conv4.shape) up5 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=2, nb_filters=int(512 / downsize_filters_factor), image_shape=input_shape[-3:])(conv4) print(up5.shape) up5 = concatenate([up5, conv3], axis=4) conv5 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up5) conv5 = Conv3D(int(256 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv5) up6 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=1, nb_filters=int(256 / downsize_filters_factor), image_shape=input_shape[-3:])(conv5) up6 = concatenate([up6, conv2], axis=4) conv6 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up6) conv6 = Conv3D(int(128 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv6) up7 = get_upconv(pool_size=pool_size, deconvolution=deconvolution, depth=0, nb_filters=int(128 / downsize_filters_factor), image_shape=input_shape[-3:])(conv6) up7 = concatenate([up7, conv1], axis=4) conv7 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(up7) conv7 = Conv3D(int(64 / downsize_filters_factor), (3, 3, 3), activation='relu', padding='same')(conv7) conv8 = Conv3D(n_labels, (1, 1, 1))(conv7) act = Activation('sigmoid')(conv8) model = Model(inputs=inputs, outputs=act) model.compile(optimizer=Adam(lr=initial_learning_rate), loss=dice_coef_loss, metrics=[dice_coef]) return model
def build_model(): x = Input(image_shape) img_1 = x img_2 = AveragePooling2D(pool_size=(2, 2))(img_1) img_3 = AveragePooling2D(pool_size=(4, 4))(img_1) img_4 = AveragePooling2D(pool_size=(8, 8))(img_1) img_5 = AveragePooling2D(pool_size=(16, 16))(img_1) def block(x, nb_channel): x = Convolution2D(nb_channel, 3, 3, border_mode='same', init='he_normal')(x) x = BatchNormalization()(x) x = LeakyReLU()(x) x = Convolution2D(nb_channel, 3, 3, border_mode='same', init='he_normal')(x) x = BatchNormalization()(x) x = LeakyReLU()(x) x = Convolution2D(nb_channel, 1, 1, border_mode='same', init='he_normal')(x) x = BatchNormalization()(x) x = LeakyReLU()(x) return x def join(x_up, x_keep): x_up = UpSampling2D((2, 2))(x_up) x_up = BatchNormalization()(x_up) x_keep = BatchNormalization()(x_keep) x = merge([x_up, x_keep], mode='concat') return x img_1 = block(img_1, 8) img_2 = block(img_2, 8) img_3 = block(img_3, 8) img_4 = block(img_4, 8) img_5 = block(img_5, 8) img_4 = join(img_5, img_4) img_4 = block(img_4, 16) img_3 = join(img_4, img_3) img_3 = block(img_3, 24) img_2 = join(img_3, img_2) img_2 = block(img_2, 32) img_1 = join(img_2, img_1) img_1 = block(img_1, 40) y = block(img_1, 3) model = Model(x, y) return model
def test_load_layers(): from keras.layers import ConvLSTM2D, TimeDistributed, Bidirectional, Conv2D, Input from keras.models import Model if K.backend() == 'tensorflow' or K.backend() == 'cntk': inputs = Input(shape=(10, 20, 20, 1)) else: inputs = Input(shape=(10, 1, 20, 20)) td_conv = TimeDistributed(Conv2D(15, (5, 5)))(inputs) bi_convlstm2d = Bidirectional(ConvLSTM2D(10, (3, 3)), merge_mode='concat')(td_conv) model = Model(inputs=inputs, outputs=bi_convlstm2d) weight_value_tuples = [] # TimeDistributed Conv2D layer # use 'channels_first' data format to check that the function is being called correctly for Conv2D # old: (filters, stack_size, kernel_rows, kernel_cols) # new: (kernel_rows, kernel_cols, stack_size, filters) weight_tensor_td_conv_old = list() weight_tensor_td_conv_old.append(np.zeros((15, 1, 5, 5))) weight_tensor_td_conv_old.append(np.zeros((15, ))) td_conv_layer = model.layers[1] td_conv_layer.layer.data_format = 'channels_first' weight_tensor_td_conv_new = topology.preprocess_weights_for_loading( td_conv_layer, weight_tensor_td_conv_old, original_keras_version='1') symbolic_weights = td_conv_layer.weights assert (len(symbolic_weights) == len(weight_tensor_td_conv_new)) weight_value_tuples += zip(symbolic_weights, weight_tensor_td_conv_new) # Bidirectional ConvLSTM2D layer # old ConvLSTM2D took a list of 12 weight tensors, returns a list of 3 concatenated larger tensors. weight_tensor_bi_convlstm_old = [] for j in range(2): # bidirectional for i in range(4): weight_tensor_bi_convlstm_old.append(np.zeros( (3, 3, 15, 10))) # kernel weight_tensor_bi_convlstm_old.append(np.zeros( (3, 3, 10, 10))) # recurrent kernel weight_tensor_bi_convlstm_old.append(np.zeros((10, ))) # bias bi_convlstm_layer = model.layers[2] weight_tensor_bi_convlstm_new = topology.preprocess_weights_for_loading( bi_convlstm_layer, weight_tensor_bi_convlstm_old, original_keras_version='1') symbolic_weights = bi_convlstm_layer.weights assert (len(symbolic_weights) == len(weight_tensor_bi_convlstm_new)) weight_value_tuples += zip(symbolic_weights, weight_tensor_bi_convlstm_new) K.batch_set_value(weight_value_tuples) assert np.all( K.eval(model.layers[1].weights[0]) == weight_tensor_td_conv_new[0]) assert np.all( K.eval(model.layers[1].weights[1]) == weight_tensor_td_conv_new[1]) assert np.all( K.eval(model.layers[2].weights[0]) == weight_tensor_bi_convlstm_new[0]) assert np.all( K.eval(model.layers[2].weights[1]) == weight_tensor_bi_convlstm_new[1]) assert np.all( K.eval(model.layers[2].weights[2]) == weight_tensor_bi_convlstm_new[2]) assert np.all( K.eval(model.layers[2].weights[3]) == weight_tensor_bi_convlstm_new[3]) assert np.all( K.eval(model.layers[2].weights[4]) == weight_tensor_bi_convlstm_new[4]) assert np.all( K.eval(model.layers[2].weights[5]) == weight_tensor_bi_convlstm_new[5])
def rgb_to_grayscale(input): """Average out each pixel across its 3 RGB layers resulting in a grayscale image""" return K.mean(input, axis=3) def rgb_to_grayscale_output_shape(input_shape): return input_shape[:-1] nb_val_samples = 5000 img_width = 299 img_height = 299 print("Building model...") input_tensor = Input(shape=(img_width, img_height, 3)) # Creating CNN cnn_model = Xception(weights='imagenet', include_top=False, input_tensor=input_tensor) x = cnn_model.output cnn_bottleneck = GlobalAveragePooling2D()(x) # Creating RNN x = Lambda(rgb_to_grayscale, rgb_to_grayscale_output_shape)(input_tensor) x = Reshape((23, 3887))(x) # 23 timesteps, input dim of each timestep 3887 x = LSTM(2048, return_sequences=True)(x) rnn_output = LSTM(2048)(x) # Merging both cnn bottleneck and rnn's output wise element wise multiplication x = merge([cnn_bottleneck, rnn_output], mode='mul')
def test_multi_input_layer(): #################################################### # test multi-input layer a = Input(shape=(32, ), name='input_a') b = Input(shape=(32, ), name='input_b') dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) merged = layers.concatenate([a_2, b_2], name='merge') assert merged._keras_shape == (None, 16 * 2) merge_layer, merge_node_index, merge_tensor_index = merged._keras_history assert merge_node_index == 0 assert merge_tensor_index == 0 assert len(merge_layer.inbound_nodes) == 1 assert len(merge_layer.outbound_nodes) == 0 assert len(merge_layer.inbound_nodes[0].input_tensors) == 2 assert len(merge_layer.inbound_nodes[0].inbound_layers) == 2 c = Dense(64, name='dense_2')(merged) d = Dense(5, name='dense_3')(c) model = Model(inputs=[a, b], outputs=[c, d], name='model') assert len(model.layers) == 6 print('model.input_layers:', model.input_layers) print('model.input_layers_node_indices:', model.input_layers_node_indices) print('model.input_layers_tensor_indices:', model.input_layers_tensor_indices) print('model.output_layers', model.output_layers) print('output_shape:', model.compute_output_shape([(None, 32), (None, 32)])) assert model.compute_output_shape([(None, 32), (None, 32)]) == [(None, 64), (None, 5)] print('mask:', model.compute_mask([a, b], [None, None])) assert model.compute_mask([a, b], [None, None]) == [None, None] print('output_shape:', model.compute_output_shape([(None, 32), (None, 32)])) assert model.compute_output_shape([(None, 32), (None, 32)]) == [(None, 64), (None, 5)] # we don't check names of first 2 layers (inputs) because # ordering of same-level layers is not fixed print('layers:', [layer.name for layer in model.layers]) assert [l.name for l in model.layers ][2:] == ['dense_1', 'merge', 'dense_2', 'dense_3'] print('input_layers:', [l.name for l in model.input_layers]) assert [l.name for l in model.input_layers] == ['input_a', 'input_b'] print('output_layers:', [l.name for l in model.output_layers]) assert [l.name for l in model.output_layers] == ['dense_2', 'dense_3'] # actually run model fn = K.function(model.inputs, model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)] # test get_source_inputs print(get_source_inputs(c)) assert get_source_inputs(c) == [a, b] # serialization / deserialization json_config = model.to_json() recreated_model = model_from_json(json_config) recreated_model.compile('rmsprop', 'mse') print('recreated:') print([layer.name for layer in recreated_model.layers]) print([layer.name for layer in recreated_model.input_layers]) print([layer.name for layer in recreated_model.output_layers]) assert [l.name for l in recreated_model.layers ][2:] == ['dense_1', 'merge', 'dense_2', 'dense_3'] assert [l.name for l in recreated_model.input_layers] == ['input_a', 'input_b'] assert [l.name for l in recreated_model.output_layers] == ['dense_2', 'dense_3'] fn = K.function(recreated_model.inputs, recreated_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)]
def test_recursion(): #################################################### # test recursion a = Input(shape=(32, ), name='input_a') b = Input(shape=(32, ), name='input_b') dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) merged = layers.concatenate([a_2, b_2], name='merge') c = Dense(64, name='dense_2')(merged) d = Dense(5, name='dense_3')(c) model = Model(inputs=[a, b], outputs=[c, d], name='model') e = Input(shape=(32, ), name='input_e') f = Input(shape=(32, ), name='input_f') g, h = model([e, f]) # g2, h2 = model([e, f]) assert g._keras_shape == c._keras_shape assert h._keras_shape == d._keras_shape # test separate manipulation of different layer outputs i = Dense(7, name='dense_4')(h) final_model = Model(inputs=[e, f], outputs=[i, g], name='final') assert len(final_model.inputs) == 2 assert len(final_model.outputs) == 2 assert len(final_model.layers) == 4 # we don't check names of first 2 layers (inputs) because # ordering of same-level layers is not fixed print('final_model layers:', [layer.name for layer in final_model.layers]) assert [layer.name for layer in final_model.layers][2:] == ['model', 'dense_4'] print(model.compute_mask([e, f], [None, None])) assert model.compute_mask([e, f], [None, None]) == [None, None] print(final_model.compute_output_shape([(10, 32), (10, 32)])) assert final_model.compute_output_shape([(10, 32), (10, 32)]) == [(10, 7), (10, 64)] # run recursive model fn = K.function(final_model.inputs, final_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] # test serialization model_config = final_model.get_config() print(json.dumps(model_config, indent=4)) recreated_model = Model.from_config(model_config) fn = K.function(recreated_model.inputs, recreated_model.outputs) input_a_np = np.random.random((10, 32)) input_b_np = np.random.random((10, 32)) fn_outputs = fn([input_a_np, input_b_np]) assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] #################################################### # test multi-input multi-output j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) o = Input(shape=(32, ), name='input_o') p = Input(shape=(32, ), name='input_p') q, r = model([o, p]) assert n._keras_shape == (None, 5) assert q._keras_shape == (None, 64) s = layers.concatenate([n, q], name='merge_nq') assert s._keras_shape == (None, 64 + 5) # test with single output as 1-elem list multi_io_model = Model([j, k, o, p], [s]) fn = K.function(multi_io_model.inputs, multi_io_model.outputs) fn_outputs = fn([ np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)) ]) assert [x.shape for x in fn_outputs] == [(10, 69)] # test with single output as tensor multi_io_model = Model([j, k, o, p], s) fn = K.function(multi_io_model.inputs, multi_io_model.outputs) fn_outputs = fn([ np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)) ]) # note that the output of the K.function will still be a 1-elem list assert [x.shape for x in fn_outputs] == [(10, 69)] # test serialization print('multi_io_model.layers:', multi_io_model.layers) print('len(model.inbound_nodes):', len(model.inbound_nodes)) print('len(model.outbound_nodes):', len(model.outbound_nodes)) model_config = multi_io_model.get_config() print(model_config) print(json.dumps(model_config, indent=4)) recreated_model = Model.from_config(model_config) fn = K.function(recreated_model.inputs, recreated_model.outputs) fn_outputs = fn([ np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)), np.random.random((10, 32)) ]) # note that the output of the K.function will still be a 1-elem list assert [x.shape for x in fn_outputs] == [(10, 69)] config = model.get_config() Model.from_config(config) model.summary() json_str = model.to_json() model_from_json(json_str) yaml_str = model.to_yaml() model_from_yaml(yaml_str) #################################################### # test invalid graphs # input is not an Input tensor j = Input(shape=(32, ), name='input_j') j = Dense(32)(j) k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(TypeError): Model([j, k], [m, n]) # disconnected graph j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(RuntimeError): Model([j], [m, n]) # redundant outputs j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) # this should work with a warning Model([j, k], [m, n, n]) # redundant inputs j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(ValueError): Model([j, k, j], [m, n]) # i have not idea what I'm doing: garbage as inputs/outputs j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) with pytest.raises(TypeError): Model([j, k], [m, n, 0]) #################################################### # test calling layers/models on TF tensors if K._BACKEND == 'tensorflow': import tensorflow as tf j = Input(shape=(32, ), name='input_j') k = Input(shape=(32, ), name='input_k') m, n = model([j, k]) tf_model = Model([j, k], [m, n]) j_tf = tf.placeholder(dtype=K.floatx()) k_tf = tf.placeholder(dtype=K.floatx()) m_tf, n_tf = tf_model([j_tf, k_tf]) assert m_tf.get_shape().as_list() == [None, 64] assert n_tf.get_shape().as_list() == [None, 5] # test merge layers.concatenate([j_tf, k_tf], axis=1) layers.add([j_tf, k_tf]) # test tensor input x = tf.placeholder(shape=(None, 2), dtype=K.floatx()) InputLayer(input_tensor=x) x = Input(tensor=x) Dense(2)(x)
def test_node_construction(): #################################################### # test basics a = Input(shape=(32, ), name='input_a') b = Input(shape=(32, ), name='input_b') assert a._keras_shape == (None, 32) a_layer, a_node_index, a_tensor_index = a._keras_history b_layer, b_node_index, b_tensor_index = b._keras_history assert len(a_layer.inbound_nodes) == 1 assert a_tensor_index is 0 node = a_layer.inbound_nodes[a_node_index] assert node.outbound_layer == a_layer assert isinstance(node.inbound_layers, list) assert node.inbound_layers == [] assert isinstance(node.input_tensors, list) assert node.input_tensors == [a] assert isinstance(node.input_masks, list) assert node.input_masks == [None] assert isinstance(node.input_shapes, list) assert node.input_shapes == [(None, 32)] assert isinstance(node.output_tensors, list) assert node.output_tensors == [a] assert isinstance(node.output_shapes, list) assert node.output_shapes == [(None, 32)] assert isinstance(node.output_masks, list) assert node.output_masks == [None] dense = Dense(16, name='dense_1') a_2 = dense(a) b_2 = dense(b) assert len(dense.inbound_nodes) == 2 assert len(dense.outbound_nodes) == 0 assert dense.inbound_nodes[0].inbound_layers == [a_layer] assert dense.inbound_nodes[0].outbound_layer == dense assert dense.inbound_nodes[1].inbound_layers == [b_layer] assert dense.inbound_nodes[1].outbound_layer == dense assert dense.inbound_nodes[0].input_tensors == [a] assert dense.inbound_nodes[1].input_tensors == [b] assert dense.inbound_nodes[0].get_config()['inbound_layers'] == ['input_a'] assert dense.inbound_nodes[1].get_config()['inbound_layers'] == ['input_b'] # test layer properties test_layer = Dense(16, name='test_layer') a_test = test_layer(a) assert K.int_shape(test_layer.kernel) == (32, 16) assert test_layer.input == a assert test_layer.output == a_test assert test_layer.input_mask is None assert test_layer.output_mask is None assert test_layer.input_shape == (None, 32) assert test_layer.output_shape == (None, 16) with pytest.raises(AttributeError): dense.input with pytest.raises(AttributeError): dense.output with pytest.raises(AttributeError): dense.input_mask with pytest.raises(AttributeError): dense.output_mask assert dense.get_input_at(0) == a assert dense.get_input_at(1) == b assert dense.get_output_at(0) == a_2 assert dense.get_output_at(1) == b_2 assert dense.get_input_shape_at(0) == (None, 32) assert dense.get_input_shape_at(1) == (None, 32) assert dense.get_output_shape_at(0) == (None, 16) assert dense.get_output_shape_at(1) == (None, 16) assert dense.get_input_mask_at(0) is None assert dense.get_input_mask_at(1) is None assert dense.get_output_mask_at(0) is None assert dense.get_output_mask_at(1) is None
def build(self): ''' 1. Build Code Representation Model ''' logger.debug('Building Code Representation Model') methname = Input(shape=(self.data_params['methname_len'], ), dtype='int32', name='methname') apiseq = Input(shape=(self.data_params['apiseq_len'], ), dtype='int32', name='apiseq') tokens = Input(shape=(self.data_params['tokens_len'], ), dtype='int32', name='tokens') ## method name representation ## #1.embedding init_emb_weights = np.load( self.config['workdir'] + self.model_params['init_embed_weights_methname'] ) if self.model_params[ 'init_embed_weights_methname'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [ init_emb_weights ] embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=init_emb_weights, mask_zero= False, #Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers in the model must support masking, otherwise an exception will be raised. name='embedding_methname') methname_embedding = embedding(methname) dropout = Dropout(0.25, name='dropout_methname_embed') methname_dropout = dropout(methname_embedding) methname_conv1 = Conv1D(100, 2, padding='valid', activation='relu', strides=1, name='methname_conv1') methname_conv2 = Conv1D(100, 3, padding='valid', activation='relu', strides=1, name='methname_conv2') methname_conv3 = Conv1D(100, 4, padding='valid', activation='relu', strides=1, name='methname_conv3') methname_conv1_out = methname_conv1(methname_dropout) methname_conv2_out = methname_conv2(methname_dropout) methname_conv3_out = methname_conv3(methname_dropout) dropout = Dropout(0.25, name='dropout_methname_conv') methname_conv1_dropout = dropout(methname_conv1_out) methname_conv2_dropout = dropout(methname_conv2_out) methname_conv3_dropout = dropout(methname_conv3_out) merged_methname = Concatenate(name='methname_merge', axis=1)([ methname_conv1_dropout, methname_conv2_dropout, methname_conv3_dropout ]) ## API Sequence Representation ## #1.embedding init_emb_weights = np.load( self.config['workdir'] + self.model_params['init_embed_weights_api'] ) if self.model_params['init_embed_weights_api'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [ init_emb_weights ] embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), #weights=weights, mask_zero= False, #Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_apiseq') apiseq_embedding = embedding(apiseq) dropout = Dropout(0.25, name='dropout_apiseq_embed') apiseq_dropout = dropout(apiseq_embedding) api_conv1 = Conv1D(100, 2, padding='valid', activation='relu', strides=1, name='api_conv1') api_conv2 = Conv1D(100, 3, padding='valid', activation='relu', strides=1, name='api_conv2') api_conv3 = Conv1D(100, 4, padding='valid', activation='relu', strides=1, name='api_conv3') api_conv1_out = api_conv1(apiseq_dropout) api_conv2_out = api_conv2(apiseq_dropout) api_conv3_out = api_conv3(apiseq_dropout) dropout = Dropout(0.25, name='dropout_api_conv') api_conv1_dropout = dropout(api_conv1_out) api_conv2_dropout = dropout(api_conv2_out) api_conv3_dropout = dropout(api_conv3_out) merged_api = Concatenate(name='api_merge', axis=1)( [api_conv1_dropout, api_conv2_dropout, api_conv3_dropout]) ## Tokens Representation ## #1.embedding init_emb_weights = np.load( self.config['workdir'] + self.model_params['init_embed_weights_tokens'] ) if self.model_params[ 'init_embed_weights_tokens'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [ init_emb_weights ] embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=init_emb_weights, #mask_zero=True,#Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_tokens') tokens_embedding = embedding(tokens) dropout = Dropout(0.25, name='dropout_tokens_embed') tokens_dropout = dropout(tokens_embedding) tokens_conv1 = Conv1D(100, 2, padding='valid', activation='relu', strides=1, name='tokens_conv1') tokens_conv2 = Conv1D(100, 3, padding='valid', activation='relu', strides=1, name='tokens_conv2') tokens_conv3 = Conv1D(100, 4, padding='valid', activation='relu', strides=1, name='tokens_conv3') tokens_conv1_out = tokens_conv1(tokens_dropout) tokens_conv2_out = tokens_conv2(tokens_dropout) tokens_conv3_out = tokens_conv3(tokens_dropout) dropout = Dropout(0.25, name='dropout_tokens_conv') tokens_conv1_dropout = dropout(tokens_conv1_out) tokens_conv2_dropout = dropout(tokens_conv2_out) tokens_conv3_dropout = dropout(tokens_conv3_out) merged_tokens = Concatenate(name='tokens_merge', axis=1)( [tokens_conv1_dropout, tokens_conv2_dropout, tokens_conv3_dropout]) # merge code# merged_code = Concatenate(name='code_merge', axis=1)( [merged_methname, merged_api, merged_tokens]) #(122,200) ''' 2. Build Desc Representation Model ''' ## Desc Representation ## logger.debug('Building Desc Representation Model') desc = Input(shape=(self.data_params['desc_len'], ), dtype='int32', name='desc') #1.embedding init_emb_weights = np.load( self.config['workdir'] + self.model_params['init_embed_weights_desc'] ) if self.model_params['init_embed_weights_desc'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [ init_emb_weights ] embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=init_emb_weights, mask_zero= False, #Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_desc') desc_embedding = embedding(desc) dropout = Dropout(0.25, name='dropout_desc_embed') desc_dropout = dropout(desc_embedding) #2. rnn desc_conv1 = Conv1D(100, 2, padding='valid', activation='relu', strides=1, name='desc_conv1') desc_conv2 = Conv1D(100, 3, padding='valid', activation='relu', strides=1, name='desc_conv2') desc_conv3 = Conv1D(100, 4, padding='valid', activation='relu', strides=1, name='desc_conv3') desc_conv1_out = desc_conv1(desc_dropout) desc_conv2_out = desc_conv2(desc_dropout) desc_conv3_out = desc_conv3(desc_dropout) dropout = Dropout(0.25, name='dropout_desc_conv') desc_conv1_dropout = dropout(desc_conv1_out) desc_conv2_dropout = dropout(desc_conv2_out) desc_conv3_dropout = dropout(desc_conv3_out) merged_desc = Concatenate(name='desc_merge', axis=1)( [desc_conv1_dropout, desc_conv2_dropout, desc_conv3_dropout]) #AP networks# attention = AttentionLayer(name='attention_layer') # (122,60) attention_out = attention([merged_code, merged_desc]) gmp_1 = GlobalMaxPooling1D(name='blobalmaxpool_colum') att_1 = gmp_1(attention_out) activ1 = Activation('softmax', name='AP_active_colum') att_1_next = activ1(att_1) dot1 = Dot(axes=1, normalize=False, name='column_dot') desc_out = dot1([att_1_next, merged_desc]) attention_trans_layer = Lambda( lambda x: K.permute_dimensions(x, (0, 2, 1)), name='trans_attention') attention_transposed = attention_trans_layer(attention_out) gmp_2 = GlobalMaxPooling1D(name='blobalmaxpool_row') att_2 = gmp_2(attention_transposed) activ2 = Activation('softmax', name='AP_active_row') att_2_next = activ2(att_2) dot2 = Dot(axes=1, normalize=False, name='row_dot') code_out = dot2([att_2_next, merged_code]) self._code_repr_model = Model(inputs=[methname, apiseq, tokens, desc], outputs=[code_out], name='desc_repr_model') print('\nsummary of code representation model') self._code_repr_model.summary() fname = self.config['workdir'] + 'models/' + self.model_params[ 'model_name'] + '/_desc_repr_model.png' self._desc_repr_model = Model(inputs=[methname, apiseq, tokens, desc], outputs=[desc_out], name='code_repr_model') print('\nsummary of description representation model') self._desc_repr_model.summary() """ 3: calculate the cosine similarity between code and desc """ logger.debug('Building similarity model') code_repr = self._code_repr_model([methname, apiseq, tokens, desc]) desc_repr = self._desc_repr_model([methname, apiseq, tokens, desc]) cos_sim = Dot(axes=1, normalize=True, name='cos_sim')([code_repr, desc_repr]) sim_model = Model(inputs=[methname, apiseq, tokens, desc], outputs=[cos_sim], name='sim_model') self._sim_model = sim_model #for model evaluation print("\nsummary of similarity model") self._sim_model.summary() fname = self.config['workdir'] + 'models/' + self.model_params[ 'model_name'] + '/_sim_model.png' ''' 4:Build training model ''' good_sim = sim_model( [self.methname, self.apiseq, self.tokens, self.desc_good]) # similarity of good output bad_sim = sim_model( [self.methname, self.apiseq, self.tokens, self.desc_bad]) #similarity of bad output loss = Lambda(lambda x: K.maximum( 1e-6, self.model_params['margin'] - x[0] + x[1]), output_shape=lambda x: x[0], name='loss')([good_sim, bad_sim]) logger.debug('Building training model') self._training_model = Model(inputs=[ self.methname, self.apiseq, self.tokens, self.desc_good, self.desc_bad ], outputs=[loss], name='training_model') print('\nsummary of training model') self._training_model.summary() fname = self.config['workdir'] + 'models/' + self.model_params[ 'model_name'] + '/_training_model.png'
def build(self): ''' 1. Build Code Representation Model ''' logger.debug('Building Code Representation Model') methname = Input(shape=(self.data_params['methname_len'], ), dtype='int32', name='methname') apiseq = Input(shape=(self.data_params['apiseq_len'], ), dtype='int32', name='apiseq') tokens = Input(shape=(self.data_params['tokens_len'], ), dtype='int32', name='tokens') ## method name representation ## #1.embedding init_emb_weights = np.load( self.config['workdir'] + self.model_params['init_embed_weights_methname'] ) if self.model_params[ 'init_embed_weights_methname'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [ init_emb_weights ] embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=init_emb_weights, mask_zero= False, #Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers in the model must support masking, otherwise an exception will be raised. name='embedding_methname') methname_embedding = embedding(methname) dropout = Dropout(0.25, name='dropout_methname_embed') methname_dropout = dropout(methname_embedding) #2.rnn f_rnn = LSTM(self.model_params.get('n_lstm_dims', 128), recurrent_dropout=0.2, return_sequences=True, name='lstm_methname_f') b_rnn = LSTM(self.model_params.get('n_lstm_dims', 128), return_sequences=True, recurrent_dropout=0.2, name='lstm_methname_b', go_backwards=True) methname_f_rnn = f_rnn(methname_dropout) methname_b_rnn = b_rnn(methname_dropout) dropout = Dropout(0.25, name='dropout_methname_rnn') methname_f_dropout = dropout(methname_f_rnn) methname_b_dropout = dropout(methname_b_rnn) #3.maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpool_methname') methname_pool = Concatenate(name='concat_methname_lstms')( [maxpool(methname_f_dropout), maxpool(methname_b_dropout)]) activation = Activation('tanh', name='active_methname') methname_repr = activation(methname_pool) ## API Sequence Representation ## #1.embedding embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), #weights=weights, mask_zero= False, #Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_apiseq') apiseq_embedding = embedding(apiseq) dropout = Dropout(0.25, name='dropout_apiseq_embed') apiseq_dropout = dropout(apiseq_embedding) #2.rnn f_rnn = LSTM(self.model_params.get('n_lstm_dims', 100), return_sequences=True, recurrent_dropout=0.2, name='lstm_apiseq_f') b_rnn = LSTM(self.model_params.get('n_lstm_dims', 100), return_sequences=True, recurrent_dropout=0.2, name='lstm_apiseq_b', go_backwards=True) apiseq_f_rnn = f_rnn(apiseq_dropout) apiseq_b_rnn = b_rnn(apiseq_dropout) dropout = Dropout(0.25, name='dropout_apiseq_rnn') apiseq_f_dropout = dropout(apiseq_f_rnn) apiseq_b_dropout = dropout(apiseq_b_rnn) #3.maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpool_apiseq') apiseq_pool = Concatenate(name='concat_apiseq_lstms')( [maxpool(apiseq_f_dropout), maxpool(apiseq_b_dropout)]) activation = Activation('tanh', name='active_apiseq') apiseq_repr = activation(apiseq_pool) ## Tokens Representation ## #1.embedding init_emb_weights = np.load( self.config['workdir'] + self.model_params['init_embed_weights_tokens'] ) if self.model_params[ 'init_embed_weights_tokens'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [ init_emb_weights ] embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=init_emb_weights, #mask_zero=True,#Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_tokens') tokens_embedding = embedding(tokens) dropout = Dropout(0.25, name='dropout_tokens_embed') tokens_dropout = dropout(tokens_embedding) #4.maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpool_tokens') tokens_pool = maxpool(tokens_dropout) activation = Activation('tanh', name='active_tokens') tokens_repr = activation(tokens_pool) ## concatenate the representation of code ## merged_methname_api = Concatenate(name='merge_methname_api')( [methname_repr, apiseq_repr]) merged_code_repr = Concatenate(name='merge_coderepr')( [merged_methname_api, tokens_repr]) code_repr = Dense(self.model_params.get('n_hidden', 400), activation='tanh', name='dense_coderepr')(merged_code_repr) self._code_repr_model = Model(inputs=[methname, apiseq, tokens], outputs=[code_repr], name='code_repr_model') print('\nsummary of code representation model') self._code_repr_model.summary() fname = self.config['workdir'] + 'models/' + self.model_params[ 'model_name'] + '/_code_repr_model.png' #plot_model(self._code_repr_model, show_shapes=True, to_file=fname) ''' 2. Build Desc Representation Model ''' ## Desc Representation ## logger.debug('Building Desc Representation Model') desc = Input(shape=(self.data_params['desc_len'], ), dtype='int32', name='desc') #1.embedding init_emb_weights = np.load( self.config['workdir'] + self.model_params['init_embed_weights_desc'] ) if self.model_params['init_embed_weights_desc'] is not None else None init_emb_weights = init_emb_weights if init_emb_weights is None else [ init_emb_weights ] embedding = Embedding( input_dim=self.data_params['n_words'], output_dim=self.model_params.get('n_embed_dims', 100), weights=init_emb_weights, mask_zero= True, #Whether 0 in the input is a special "padding" value that should be masked out. #If set True, all subsequent layers must support masking, otherwise an exception will be raised. name='embedding_desc') desc_embedding = embedding(desc) dropout = Dropout(0.25, name='dropout_desc_embed') desc_dropout = dropout(desc_embedding) #2. rnn f_rnn = LSTM(self.model_params.get('n_lstm_dims', 100), return_sequences=True, recurrent_dropout=0.2, name='lstm_desc_f') b_rnn = LSTM(self.model_params.get('n_lstm_dims', 100), return_sequences=True, recurrent_dropout=0.2, name='lstm_desc_b', go_backwards=True) desc_f_rnn = f_rnn(desc_dropout) desc_b_rnn = b_rnn(desc_dropout) dropout = Dropout(0.25, name='dropout_desc_rnn') desc_f_dropout = dropout(desc_f_rnn) desc_b_dropout = dropout(desc_b_rnn) #3. maxpooling maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2]), name='maxpool_desc') desc_pool = Concatenate(name='concat_desc_rnns')( [maxpool(desc_f_dropout), maxpool(desc_b_dropout)]) activation = Activation('tanh', name='active_desc') desc_repr = activation(desc_pool) self._desc_repr_model = Model(inputs=[desc], outputs=[desc_repr], name='desc_repr_model') print('\nsummary of desc representation model') self._desc_repr_model.summary() fname = self.config['workdir'] + 'models/' + self.model_params[ 'model_name'] + '/_desc_repr_model.png' #plot_model(self._desc_repr_model, show_shapes=True, to_file=fname) """ 3: calculate the cosine similarity between code and desc """ logger.debug('Building similarity model') code_repr = self._code_repr_model([methname, apiseq, tokens]) desc_repr = self._desc_repr_model([desc]) cos_sim = Dot(axes=1, normalize=True, name='cos_sim')([code_repr, desc_repr]) sim_model = Model(inputs=[methname, apiseq, tokens, desc], outputs=[cos_sim], name='sim_model') self._sim_model = sim_model #for model evaluation print("\nsummary of similarity model") self._sim_model.summary() fname = self.config['workdir'] + 'models/' + self.model_params[ 'model_name'] + '/_sim_model.png' #plot_model(self._sim_model, show_shapes=True, to_file=fname) ''' 4:Build training model ''' good_sim = sim_model( [self.methname, self.apiseq, self.tokens, self.desc_good]) # similarity of good output bad_sim = sim_model( [self.methname, self.apiseq, self.tokens, self.desc_bad]) #similarity of bad output loss = Lambda(lambda x: K.maximum( 1e-6, self.model_params['margin'] - x[0] + x[1]), output_shape=lambda x: x[0], name='loss')([good_sim, bad_sim]) logger.debug('Building training model') self._training_model = Model(inputs=[ self.methname, self.apiseq, self.tokens, self.desc_good, self.desc_bad ], outputs=[loss], name='training_model') print('\nsummary of training model') self._training_model.summary() fname = self.config['workdir'] + 'models/' + self.model_params[ 'model_name'] + '/_training_model.png'
print('After:', image_paths.shape, angles.shape) # visualize a single batch of the data X, y = generate_training_data_for_visualization(image_paths, angles) visualize_dataset(X, y) # split into train/test sets image_paths_train, image_paths_test, angles_train, angles_test = train_test_split( image_paths, angles, test_size=0.05, random_state=42) print('Train:', image_paths_train.shape, angles_train.shape) print('Test:', image_paths_test.shape, angles_test.shape) if RunModel: # setup model input and the hard coded normalisation inputs = Input(shape=(66, 200, 3)) nimg = Lambda(lambda x: x / 127.5 - 1.0)(inputs) # -- Start of the NVIDIA portion of the model # First 3 convolutional layers with 5x5 kernel each and filter size 24,36 and 48, stride (2,2) # ELU activation layers and l2 kernel_regularizers x = Conv2D(filters=24, kernel_size=(5, 5), strides=(2, 2), padding='valid', activation='elu', kernel_regularizer=l2(0.001))(nimg) x = Conv2D(filters=36, kernel_size=(5, 5), strides=(2, 2), padding='valid',
y = Conv3D(1, (3, 3, 3), strides=(1, 1, 1), padding='same', kernel_initializer='he_normal', data_format='channels_last')(y) y = BatchNormalization(axis=channel_axis)(y) out1 = Activation('sigmoid')(y) # decoder-2 out2 = GlobalMaxPooling3D(data_format='channels_last')(x) out2 = Dense(7, activation='softmax')(out2) return out1, out2 # %% S3DAE network inputs = Input(input_shape) out1, out2 = c3da_ae(inputs) model_fi = Model(inputs=inputs, outputs=[out1, out2]) model_fi.summary() sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) model_fi.compile(optimizer=sgd, loss=['binary_crossentropy', 'categorical_crossentropy'], loss_weights=[0.7, 1], metrics=['accuracy']) # %% Network training