def CNN_Video(nb_channels=3, dropoutRate = 0.5, act='relu', k_size=3, d_layer = 512, k_regularizer = regularizers.l1(0.001), img_size=32,time_slot = 100,num_color_chan=1): """ Deep convolutional 3D neural network with softmax classifier :param nb_channels: number of class :param dropoutRate: drop-out rate of last layer :param act: activation function :param k_size: convolutional kernel size :param k_regularizer: kernel regularizer :param d_layer: number of hidden unit in the last layer :param img_size: image size :param time_slot: number of frames/images in a video, length of the video :param num_color_chan = number of color channel in the image/frame, no RGB values used real values of electrodes are used :param input_dimension: size of the input Expecting 100x32x32x1 video data as input Conv3D<32> - Conv3D<32> - Conv3D<32> - Conv3D<32> - MaxPool3D<2,2,2> - Conv3D<64> - Conv3D<64> - MaxPool3D<2,2,2> - Dense<512> - Dense<3> """ strides = None # In each convolutional layer, 10 consecutive images are convolved kernel = (10, k_size, k_size) print('PARAMETERS OF MODELS: ', act, ' ', k_size, ' ', d_layer) model = Sequential() # add layers model.add(Conv3D(32, kernel_size=kernel, input_shape=(time_slot,img_size,img_size,num_color_chan), activation=act)) model.add(Conv3D(32, kernel_size=kernel, padding='same', kernel_initializer='glorot_uniform', activation=act )) model.add(Conv3D(32, kernel_size=kernel, padding='same', kernel_initializer='glorot_uniform', activation=act )) model.add(Conv3D(32, kernel_size=kernel, padding='same', kernel_initializer='glorot_uniform', activation=act )) model.add(MaxPooling3D(pool_size=kernel, strides=strides, data_format='channels_last')) # new layer model.add(Conv3D(64, kernel_size=kernel, padding='same', kernel_initializer='glorot_uniform', activation=act)) model.add(Conv3D(64, kernel_size=kernel, padding='same', kernel_initializer='glorot_uniform', activation=act)) model.add(MaxPooling3D(pool_size=(2,2,2),strides=strides, data_format='channels_last')) # flatten and check model.add(Flatten()) model.add(Dense(d_layer)) model.add(Dropout(rate=dropoutRate)) model.add(Dense(nb_channels, activation='softmax')) return model
def cnn_3d(): #3DCNN base model img_in3D = Input(shape=(3, 120, 160, 3), name='img_in') x = img_in3D x = Cropping3D(cropping=((0, 0), (60, 0), (0, 0)))(x) x = Convolution3D(8, (3, 3, 3), strides=(1, 2, 2), activation='relu')(x) x = MaxPooling3D(pool_size=(1, 2, 2))(x) x = BatchNormalization()(x) x = Dropout(0.1)(x) x = Flatten(name='flattened')(x) x = Dense(50, activation='relu')(x) x = Dropout(0.2)(x) angle_out = Dense(15, activation='softmax', name='angle_out')(x) throttle_out = Dense(1, activation='relu', name='throttle_out')(x) model = Model(inputs=[img_in3D], outputs=[angle_out, throttle_out]) model.compile(optimizer='adam', loss={ 'angle_out': 'categorical_crossentropy', 'throttle_out': 'mean_absolute_error' }, loss_weights={ 'angle_out': 0.9, 'throttle_out': 0.01 }) model.summary() return model
def attention_block_oktay(g, x, nr_of_convolutions): """ Following the original paper and implementation at https://github.com/ozan-oktay/Attention-Gated-Networks """ g1 = Convolution3D(nr_of_convolutions, kernel_size=1, strides=1, padding='same', use_bias=True)(g) g1 = BatchNormalization()(g1) x1 = MaxPooling3D([2, 2, 2])(x) x1 = Convolution3D(nr_of_convolutions, kernel_size=1, strides=1, padding='same', use_bias=True)(x1) x1 = BatchNormalization()(x1) psi = Concatenate()([g1, x1]) psi = Activation(activation='relu')(psi) psi = Convolution3D(1, kernel_size=1, strides=1, padding='same', use_bias=True)(psi) psi = BatchNormalization()(psi) psi = Activation(activation='sigmoid')(psi) return multiply([x, psi])
def encoder_block_3(x, nr_of_convolutions, use_bn=False, spatial_dropout=None): x_before_downsampling = convolution_block_3(x, nr_of_convolutions, use_bn, spatial_dropout) x = MaxPooling3D((2, 2, 2))(x_before_downsampling) return x, x_before_downsampling
def encoder_block(x, nr_of_convolutions, use_bn=False, spatial_dropout=None): x_before_downsampling = convolution_block(x, nr_of_convolutions, use_bn, spatial_dropout) downsample = [2, 2, 2] for i in range(1, 4): if x.shape[i] <= 4: downsample[i - 1] = 1 x = MaxPooling3D(downsample)(x_before_downsampling) return x, x_before_downsampling
def discriminator(): model = Sequential() model.add( Conv3D(64, (3, 3, 3), activation='relu', padding="same", input_shape=(20, 5, 97, 1))) model.add(MaxPooling3D((2, 2, 2))) model.add(Conv3D(128, (3, 3, 3), activation='relu', padding="same")) model.add(MaxPooling3D((2, 2, 2))) model.add(Conv3D(512, (3, 3, 3), activation='relu', padding="same")) model.add(Dropout(0.2)) model.add(Conv3D(1024, (3, 3, 3), activation='relu', padding="same")) model.add(LeakyReLU(0.2)) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=RMSProp(), metrics=['acc']) return model
def pooling_combo3D(x, window=(2, 2, 2), prefix='3d_unet_pooling'): x = concatenate([ MaxPooling3D(pool_size=window, data_format='channels_last', name=prefix + "_max")(x), AveragePooling3D(pool_size=window, data_format='channels_last', name=prefix + "_avg")(x) ], axis=-1, name=prefix + "_max_and_avg") return x
def create_model(learning_rate, num_dense_layers, num_epochs, num_conv_layers, kernel_size, num_filters): """ Hyper-parameters: learning_rate: Learning-rate for the optimizer. num_dense_layers: Number of dense layers. num_conv_layers: Number of conv layers. kernel_size: kernel_size function for conv layers num_filters Number of Filters """ # Add an input layer which is similar to a feed_dict in TensorFlow. # Note that the input-shape must be a tuple containing the image-size. input_first = (Input(shape=(sequence_length, img_width, img_width, 1), name='images')) input_next = input_first # model.add(Lambda(lambda x: tf.image.rgb_to_grayscale(x))) # for i in range(num_conv_layers): # name = 'layer_conv_{0}'.format(i+1) # # First convolutional layer. # # There are many hyper-parameters in this layer, but we only # # want to optimize the kernel_size-function in this example. # factor = 1 # filters = num_filters # if (i == 0): # factor = 1 # filters = 1 # else: # factor = 2*i filters = 0 j = 0 for i in range(num_conv_layers): name = 'layer_convlstm_{0}'.format(i+1) if (i == 0): filters = 1 else: filters = num_filters input_next = (ConvLSTM2D(kernel_size=kernel_size, input_shape=(sequence_length, img_width*pow(0.5, j), img_width*pow(0.5, j), filters), strides=1, filters=num_filters, padding='same', activation="relu", name=name, return_sequences=True))(input_next) input_next = (MaxPooling3D(pool_size=(1, 4, 4), strides=None, padding='same', data_format=None))(input_next) input_next = (BatchNormalization(input_shape=(sequence_length, img_width*pow(0.5, i+2), img_width*pow(0.5, i+2), num_filters)))(input_next) j += 2
def _transmit_block(x, is_last): bn_scale = PARAMS['bn_scale'] activation = PARAMS['activation'] kernel_initializer = PARAMS['kernel_initializer'] weight_decay = PARAMS['weight_decay'] compression = PARAMS['compression'] x = BatchNormalization(scale=bn_scale, axis=-1)(x) x = activation()(x) if is_last: x = GlobalAvgPool3D()(x) else: *_, f = x.get_shape().as_list() x = Conv3D(f // compression, kernel_size=(1, 1, 1), padding='same', use_bias=True, kernel_initializer=kernel_initializer, kernel_regularizer=l2_penalty(weight_decay))(x) x = MaxPooling3D((2, 2, 2), padding='valid')(x) # x = AveragePooling3D((2, 2, 2), padding='valid')(x) return x
def encoder_block_pyramid(x, input_ds, nr_of_convolutions, use_bn=False, spatial_dropout=None): pyramid_conv = Convolution3D(filters=nr_of_convolutions, kernel_size=(3, 3, 3), padding='same', activation='relu')(input_ds) x = Concatenate(axis=-1)([pyramid_conv, x]) x_before_downsampling = convolution_block(x, nr_of_convolutions, use_bn, spatial_dropout) downsample = [2, 2, 2] for i in range(1, 4): if x.shape[i] <= 4: downsample[i - 1] = 1 x = MaxPooling3D(downsample)(x_before_downsampling) return x, x_before_downsampling
def build_3d_cnn(w, h, d, s, num_outputs): #Credit: https://github.com/jessecha/DNRacing/blob/master/3D_CNN_Model/model.py ''' w : width h : height d : depth s : n_stacked ''' input_shape = (s, h, w, d) model = Sequential() #First layer #model.add(Cropping3D(cropping=((0,0), (50,10), (0,0)), input_shape=input_shape) ) #trim pixels off top # Second layer model.add( Conv3D(filters=16, kernel_size=(3, 3, 3), strides=(1, 3, 3), data_format='channels_last', padding='same', input_shape=input_shape)) model.add(Activation('relu')) model.add( MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding='valid', data_format=None)) # Third layer model.add( Conv3D(filters=32, kernel_size=(3, 3, 3), strides=(1, 1, 1), data_format='channels_last', padding='same')) model.add(Activation('relu')) model.add( MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding='valid', data_format=None)) # Fourth layer model.add( Conv3D(filters=64, kernel_size=(3, 3, 3), strides=(1, 1, 1), data_format='channels_last', padding='same')) model.add(Activation('relu')) model.add( MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding='valid', data_format=None)) # Fifth layer model.add( Conv3D(filters=128, kernel_size=(3, 3, 3), strides=(1, 1, 1), data_format='channels_last', padding='same')) model.add(Activation('relu')) model.add( MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding='valid', data_format=None)) # Fully connected layer model.add(Flatten()) model.add(Dense(256)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(256)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_outputs)) #model.add(Activation('tanh')) return model
def SlowFast_Network(clip_shape=[64, 224, 224, 3], num_class=400, alpha=8, beta=1 / 8, tau=16, method='T_conv'): """Instantiates the SlowFast_Network architecture. Arguments: clip_shape: video_clip_shape num_class: numbers of videos class alpha: mentioned in paper beta: mentioned in paper tau: mentioned in paper method: one of ['T_conv','T_sample','TtoC_sum','TtoC_concat'] mentioned in paper Returns: A Keras model instance. Raises: ValueError: in case of invalid argument for `method` """ clip_shape = clip_shape slow_input_shape = [ int(clip_shape[0] / tau), clip_shape[1], clip_shape[2], clip_shape[3] ] fast_input_shape = [ int(slow_input_shape[0] * alpha), slow_input_shape[1], slow_input_shape[2], slow_input_shape[3] ] print('slow_path_input_shape', slow_input_shape) print('fast_path_input_shape', fast_input_shape) slow_input = Input(shape=slow_input_shape) fast_input = Input(shape=fast_input_shape) if K.image_data_format() == 'channels_last': bn_axis = 4 else: bn_axis = 1 # ---fast pathway--- x_fast = Conv3D(64, (5, 7, 7), strides=(1, 2, 2), padding='same', name='fast_conv1')(fast_input) x_fast = BatchNormalization(axis=bn_axis, name='fast_bn_conv1')(x_fast) x_fast = Activation('relu')(x_fast) pool1_fast = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), name='poo1_fast')(x_fast) x_fast = conv_block( pool1_fast, [1, 3, 3], [int(64 * beta), int(64 * beta), int(256 * beta)], stage=2, block='a', path='fast', strides=(1, 1, 1), non_degenerate_temporal_conv=True) x_fast = identity_block( x_fast, [1, 3, 3], [int(64 * beta), int(64 * beta), int(256 * beta)], stage=2, path='fast', block='b', non_degenerate_temporal_conv=True) res2_fast = identity_block( x_fast, [1, 3, 3], [int(64 * beta), int(64 * beta), int(256 * beta)], stage=2, path='fast', block='c', non_degenerate_temporal_conv=True) x_fast = conv_block( res2_fast, [1, 3, 3], [int(128 * beta), int(128 * beta), int(512 * beta)], stage=3, path='fast', block='a', non_degenerate_temporal_conv=True) x_fast = identity_block( x_fast, [1, 3, 3], [int(128 * beta), int(128 * beta), int(512 * beta)], stage=3, path='fast', block='b', non_degenerate_temporal_conv=True) x_fast = identity_block( x_fast, [1, 3, 3], [int(128 * beta), int(128 * beta), int(512 * beta)], stage=3, path='fast', block='c', non_degenerate_temporal_conv=True) res3_fast = identity_block( x_fast, [1, 3, 3], [int(128 * beta), int(128 * beta), int(512 * beta)], stage=3, path='fast', block='d', non_degenerate_temporal_conv=True) x_fast = conv_block( res3_fast, [1, 3, 3], [int(256 * beta), int(256 * beta), int(1024 * beta)], stage=4, path='fast', block='a', non_degenerate_temporal_conv=True) x_fast = identity_block( x_fast, [1, 3, 3], [int(256 * beta), int(256 * beta), int(1024 * beta)], stage=4, path='fast', block='b', non_degenerate_temporal_conv=True) x_fast = identity_block( x_fast, [1, 3, 3], [int(256 * beta), int(256 * beta), int(1024 * beta)], stage=4, path='fast', block='c', non_degenerate_temporal_conv=True) x_fast = identity_block( x_fast, [1, 3, 3], [int(256 * beta), int(256 * beta), int(1024 * beta)], stage=4, path='fast', block='d', non_degenerate_temporal_conv=True) x_fast = identity_block( x_fast, [1, 3, 3], [int(256 * beta), int(256 * beta), int(1024 * beta)], stage=4, path='fast', block='e', non_degenerate_temporal_conv=True) res4_fast = identity_block( x_fast, [1, 3, 3], [int(256 * beta), int(256 * beta), int(1024 * beta)], stage=4, path='fast', block='f', non_degenerate_temporal_conv=True) x_fast = conv_block( res4_fast, [1, 3, 3], [int(512 * beta), int(512 * beta), int(2048 * beta)], stage=5, path='fast', block='a', non_degenerate_temporal_conv=True) x_fast = identity_block( x_fast, [1, 3, 3], [int(512 * beta), int(512 * beta), int(2048 * beta)], stage=5, path='fast', block='b', non_degenerate_temporal_conv=True) res5_fast = identity_block( x_fast, [1, 3, 3], [int(512 * beta), int(512 * beta), int(2048 * beta)], stage=5, path='fast', block='c', non_degenerate_temporal_conv=True) # ---slow pathway--- x = Conv3D(64, (1, 7, 7), strides=(1, 2, 2), padding='same', name='slow_conv1')(slow_input) x = BatchNormalization(axis=bn_axis, name='slow_bn_conv1')(x) x = Activation('relu')(x) pool1 = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), name='poo1_slow')(x) pool1_conection = lateral_connection(pool1_fast, pool1, alpha=alpha, beta=beta) x = conv_block(pool1_conection, [1, 3, 3], [64, 64, 256], stage=2, block='a', strides=(1, 1, 1), path='slow') x = identity_block(x, [1, 3, 3], [64, 64, 256], stage=2, block='b', path='slow') res2 = identity_block(x, [1, 3, 3], [64, 64, 256], stage=2, block='c', path='slow') res2_conection = lateral_connection(res2_fast, res2, alpha=alpha, beta=beta) x = conv_block(res2_conection, [1, 3, 3], [128, 128, 512], stage=3, block='a', path='slow') x = identity_block(x, [1, 3, 3], [128, 128, 512], stage=3, block='b', path='slow') x = identity_block(x, [1, 3, 3], [128, 128, 512], stage=3, block='c', path='slow') res3 = identity_block(x, [1, 3, 3], [128, 128, 512], stage=3, block='d', path='slow') res3_conection = lateral_connection(res3_fast, res3, alpha=alpha, beta=beta) x = conv_block(res3_conection, [1, 3, 3], [256, 256, 1024], stage=4, block='a', path='slow', non_degenerate_temporal_conv=True) x = identity_block(x, [1, 3, 3], [256, 256, 1024], stage=4, block='b', path='slow', non_degenerate_temporal_conv=True) x = identity_block(x, [1, 3, 3], [256, 256, 1024], stage=4, block='c', path='slow', non_degenerate_temporal_conv=True) x = identity_block(x, [1, 3, 3], [256, 256, 1024], stage=4, block='d', path='slow', non_degenerate_temporal_conv=True) x = identity_block(x, [1, 3, 3], [256, 256, 1024], stage=4, block='e', path='slow', non_degenerate_temporal_conv=True) res4 = identity_block(x, [1, 3, 3], [256, 256, 1024], stage=4, block='f', path='slow', non_degenerate_temporal_conv=True) res4_conection = lateral_connection(res4_fast, res4, alpha=alpha, beta=beta) x = conv_block(res4_conection, [1, 3, 3], [512, 512, 2048], stage=5, block='a', path='slow', non_degenerate_temporal_conv=True) x = identity_block(x, [1, 3, 3], [512, 512, 2048], stage=5, block='b', path='slow', non_degenerate_temporal_conv=True) res5 = identity_block(x, [1, 3, 3], [512, 512, 2048], stage=5, block='c', path='slow', non_degenerate_temporal_conv=True) fast_output = GlobalAveragePooling3D(name='avg_pool_fast')(res5_fast) slow_output = GlobalAveragePooling3D(name='avg_pool_slow')(res5) concat_output = Concatenate(axis=-1)([slow_output, fast_output]) output = Dense(num_class, activation='softmax', name='fc')(concat_output) # Create model. inputs = [slow_input, fast_input] output = output model = Model(inputs, output, name='slowfast_resnet50') return model
def CLRNet(input_shape=None, classes=10, block='bottleneck', residual_unit='v2', repetitions=None, initial_filters=64, activation='softmax', include_top=True, input_tensor=None, dropout=None, transition_dilation_rate=(1, 1), initial_strides=(2, 2), initial_kernel_size=(7, 7), initial_pooling='max', final_pooling=None, top='classification'): """Builds a custom ResNet like architecture. Defaults to CLRNet50 v2. Args: input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` dim ordering) or `(3, 224, 224)` (with `channels_first` dim ordering). It should have exactly 3 dimensions, and width and height should be no smaller than 8. E.g. `(224, 224, 3)` would be one valid value. classes: The number of outputs at final softmax layer block: The block function to use. This is either `'basic'` or `'bottleneck'`. The original paper used `basic` for layers < 50. repetitions: Number of repetitions of various block units. At each block unit, the number of filters are doubled and the input size is halved. Default of None implies the CLRNet50v2 values of [3, 4, 6, 3]. residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu conv. See [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027) for details. dropout: None for no dropout, otherwise rate of dropout from 0 to 1. Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper. transition_dilation_rate: Dilation rate for transition layers. For semantic segmentation of images use a dilation rate of (2, 2). initial_strides: Stride of the very first residual unit and MaxPooling2D call, with default (2, 2), set to (1, 1) for small images like cifar. initial_kernel_size: kernel size of the very first convolution, (7, 7) for imagenet and (3, 3) for small image datasets like tiny imagenet and cifar. See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details. initial_pooling: Determine if there will be an initial pooling layer, 'max' for imagenet and None for small image datasets. See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details. final_pooling: Optional pooling mode for feature extraction at the final model layer when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. top: Defines final layers to evaluate based on a specific problem type. Options are 'classification' for ImageNet style problems, 'segmentation' for problems like the Pascal VOC dataset, and None to exclude these layers entirely. Returns: The keras `Model`. """ if activation not in ['softmax', 'sigmoid', None]: raise ValueError( 'activation must be one of "softmax", "sigmoid", or None') if activation == 'sigmoid' and classes != 1: raise ValueError( 'sigmoid activation can only be used when classes = 1') if repetitions is None: repetitions = [3, 4, 6, 3] _handle_dim_ordering() if len(input_shape) != 4: raise Exception( "Input shape should be a tuple (frames,nb_channels, nb_rows, nb_cols)" ) if block == 'basic': block_fn = basic_block elif block == 'bottleneck': block_fn = bottleneck elif isinstance(block, six.string_types): block_fn = _string_to_function(block) else: block_fn = block if residual_unit == 'v2': residual_unit = _bn_relu_conv elif residual_unit == 'v1': residual_unit = _conv_bn_relu elif isinstance(residual_unit, six.string_types): residual_unit = _string_to_function(residual_unit) else: residual_unit = residual_unit # Permute dimension order if necessary if K.image_data_format() == 'channels_first': input_shape = (input_shape[1], input_shape[2], input_shape[0]) img_input = Input(shape=input_shape, tensor=input_tensor) x = _conv_bn_relu(filters=initial_filters, kernel_size=initial_kernel_size, strides=initial_strides)(img_input) if initial_pooling == 'max': # x = MaxPooling3D(pool_size=(3, 3, 3), strides=initial_strides, padding="same")(x) x = MaxPooling3D(pool_size=(1, 3, 3), strides=None, padding="same")(x) block = x filters = initial_filters for i, r in enumerate(repetitions): transition_dilation_rates = [transition_dilation_rate] * r transition_strides = [(1, 1)] * r if transition_dilation_rate == (1, 1): transition_strides[0] = (2, 2) block = _residual_block( block_fn, filters=filters, stage=i, blocks=r, is_first_layer=(i == 0), dropout=dropout, transition_dilation_rates=transition_dilation_rates, transition_strides=transition_strides, residual_unit=residual_unit)(block) filters *= 2 # Last activation x = _bn_relu2(block) # Classifier block if include_top and top is 'classification': x = GlobalAveragePooling3D()(x) x = Dense(units=classes, activation=activation, kernel_initializer="he_normal")(x) elif include_top and top is 'segmentation': x = ConvLSTM2D(classes, (1, 1), activation='linear', padding='same', return_sequences=True)(x) if K.image_data_format() == 'channels_first': channel, row, col = input_shape else: row, col, channel = input_shape x = Reshape((row * col, classes))(x) x = Activation(activation)(x) x = Reshape((row, col, classes))(x) elif final_pooling == 'avg': x = GlobalAveragePooling3D()(x) elif final_pooling == 'max': x = GlobalMaxPooling3D()(x) model = Model(inputs=img_input, outputs=x) return model
def first_layer(self, x, scope): with tf.name_scope(scope): x = Conv3D(filters=self.init_filters, kernel_size=(7, 7, 7), strides=(2, 2, 2), padding='same')(x) x = MaxPooling3D(pool_size=(3, 3, 3), strides=(2, 2, 2), padding="same")(x) norm = BatchNormalization(axis=-1)(x, training=self.training) return Activation("relu")(norm)
def Inception_Inflated3d(include_top=True, weights=None, input_tensor=None, input_shape=None, dropout_prob=0.0, endpoint_logit=True, classes=400): """Instantiates the Inflated 3D Inception v1 architecture. Optionally loads weights pre-trained on Kinetics. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Note that the default input frame(image) size for this model is 224x224. # Arguments include_top: whether to include the the classification layer at the top of the network. weights: one of `None` (random initialization) or 'kinetics_only' (pre-training on Kinetics dataset only). or 'imagenet_and_kinetics' (pre-training on ImageNet and Kinetics datasets). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(NUM_FRAMES, 224, 224, 3)` (with `channels_last` data format) or `(NUM_FRAMES, 3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels. NUM_FRAMES should be no smaller than 8. The authors used 64 frames per example for training and testing on kinetics dataset Also, Width and height should be no smaller than 32. E.g. `(64, 150, 150, 3)` would be one valid value. dropout_prob: optional, dropout probability applied in dropout layer after global average pooling layer. 0.0 means no dropout is applied, 1.0 means dropout is applied to all features. Note: Since Dropout is applied just before the classification layer, it is only useful when `include_top` is set to True. endpoint_logit: (boolean) optional. If True, the model's forward pass will end at producing logits. Otherwise, softmax is applied after producing the logits to produce the class probabilities prediction. Setting this parameter to True is particularly useful when you want to combine results of rgb model and optical flow model. - `True` end model forward pass at logit output - `False` go further after logit to produce softmax predictions Note: This parameter is only useful when `include_top` is set to True. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in WEIGHTS_NAME or weights is None or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or %s' % str(WEIGHTS_NAME) + ' ' 'or a valid path to a file containing `weights` values') if weights in WEIGHTS_NAME and include_top and classes != 400: raise ValueError('If using `weights` as one of these %s, with `include_top`' ' as true, `classes` should be 400' % str(WEIGHTS_NAME)) # Determine proper input shape input_shape = _obtain_input_shape( input_shape, default_frame_size=224, min_frame_size=32, default_num_frames=64, min_num_frames=8, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 4 # Downsampling via convolution (spatial and temporal) x = conv3d_bn(img_input, 64, 7, 7, 7, strides=(2, 2, 2), padding='same', name='Conv3d_1a_7x7') # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_2a_3x3')(x) x = conv3d_bn(x, 64, 1, 1, 1, strides=(1, 1, 1), padding='same', name='Conv3d_2b_1x1') x = conv3d_bn(x, 192, 3, 3, 3, strides=(1, 1, 1), padding='same', name='Conv3d_2c_3x3') # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_3a_3x3')(x) # Mixed 3b branch_0 = conv3d_bn(x, 64, 1, 1, 1, padding='same', name='Conv3d_3b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_3b_1a_1x1') branch_1 = conv3d_bn(branch_1, 128, 3, 3, 3, padding='same', name='Conv3d_3b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_3b_2a_1x1') branch_2 = conv3d_bn(branch_2, 32, 3, 3, 3, padding='same', name='Conv3d_3b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 32, 1, 1, 1, padding='same', name='Conv3d_3b_3b_1x1') ######edit############## q = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3b_3a_3x3')(x) q = conv3d_bn(q, 32, 1, 1, 1, padding='same', name='Conv3d_3b_3b_1x1') ######################## x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3b') # Mixed 3c branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_1a_1x1') branch_1 = conv3d_bn(branch_1, 192, 3, 3, 3, padding='same', name='Conv3d_3c_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_3c_2a_1x1') branch_2 = conv3d_bn(branch_2, 96, 3, 3, 3, padding='same', name='Conv3d_3c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_3c_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3c') # Downsampling (spatial and temporal) x = MaxPooling3D((3, 3, 3), strides=(2, 2, 2), padding='same', name='MaxPool2d_4a_3x3')(x) # Mixed 4b branch_0 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_4b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_4b_1a_1x1') branch_1 = conv3d_bn(branch_1, 208, 3, 3, 3, padding='same', name='Conv3d_4b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_4b_2a_1x1') branch_2 = conv3d_bn(branch_2, 48, 3, 3, 3, padding='same', name='Conv3d_4b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4b_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4b') # Mixed 4c branch_0 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4c_0a_1x1') branch_1 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4c_1a_1x1') branch_1 = conv3d_bn(branch_1, 224, 3, 3, 3, padding='same', name='Conv3d_4c_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4c_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4c_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4c') # Mixed 4d branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_1a_1x1') branch_1 = conv3d_bn(branch_1, 256, 3, 3, 3, padding='same', name='Conv3d_4d_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4d_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4d_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4d_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4d_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4d') # Mixed 4e branch_0 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4e_0a_1x1') branch_1 = conv3d_bn(x, 144, 1, 1, 1, padding='same', name='Conv3d_4e_1a_1x1') branch_1 = conv3d_bn(branch_1, 288, 3, 3, 3, padding='same', name='Conv3d_4e_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4e_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4e_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4e_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4e_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4e') # Mixed 4f branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_4f_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4f_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_4f_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4f_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_4f_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4f_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_4f_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4f') # Downsampling (spatial and temporal) x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same', name='MaxPool2d_5a_2x2')(x) # Mixed 5b branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_5b_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5b') # Mixed 5c branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1') branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1') branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3') branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5c') if include_top: # Classification block x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) x = Dropout(dropout_prob)(x) x = conv3d_bn(x, classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1') num_frames_remaining = int(x.shape[1]) x = Reshape((num_frames_remaining, classes))(x) # logits (raw scores for each class) x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False), output_shape=lambda s: (s[0], s[2]))(x) if not endpoint_logit: x = Activation('softmax', name='prediction')(x) else: h = int(x.shape[2]) w = int(x.shape[3]) x = AveragePooling3D((2, h, w), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) inputs = img_input # create model model = Model(inputs, x, name='i3d_inception') # load weights if weights in WEIGHTS_NAME: if weights == WEIGHTS_NAME[0]: # rgb_kinetics_only if include_top: weights_url = WEIGHTS_PATH['rgb_kinetics_only'] model_name = 'i3d_inception_rgb_kinetics_only.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['rgb_kinetics_only'] model_name = 'i3d_inception_rgb_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[1]: # flow_kinetics_only if include_top: weights_url = WEIGHTS_PATH['flow_kinetics_only'] model_name = 'i3d_inception_flow_kinetics_only.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['flow_kinetics_only'] model_name = 'i3d_inception_flow_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[2]: # rgb_imagenet_and_kinetics if include_top: weights_url = WEIGHTS_PATH['rgb_imagenet_and_kinetics'] model_name = 'i3d_inception_rgb_imagenet_and_kinetics.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['rgb_imagenet_and_kinetics'] model_name = 'i3d_inception_rgb_imagenet_and_kinetics_no_top.h5' elif weights == WEIGHTS_NAME[3]: # flow_imagenet_and_kinetics if include_top: weights_url = WEIGHTS_PATH['flow_imagenet_and_kinetics'] model_name = 'i3d_inception_flow_imagenet_and_kinetics.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['flow_imagenet_and_kinetics'] model_name = 'i3d_inception_flow_imagenet_and_kinetics_no_top.h5' downloaded_weights_path = get_file(model_name, weights_url, cache_subdir='models') model.load_weights(downloaded_weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first' and K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your keras config ' 'at ~/.keras/keras.json.') elif weights is not None: model.load_weights(weights) return model
def layers(self): input_layer = Input(self.real_input_shape, self.batch_size) # 16x128x128x3 net = TimeDistributed( Conv2D(filters=16, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(input_layer) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) net = TimeDistributed( Conv2D(filters=16, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(net) # 16x64x64x16 net = TimeDistributed( Conv2D(filters=32, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) net = TimeDistributed( Conv2D(filters=32, kernel_size=3, use_bias=True, data_format='channels_last', padding='same'))(net) net = BatchNormalization()(net) net = TimeDistributed(LeakyReLU(alpha=self.leak))(net) net = TimeDistributed(Dropout(self.dropout))(net) net = TimeDistributed(MaxPooling2D(pool_size=(2, 2)))(net) # 16x32x32x32 net = ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=True)(net) net = BatchNormalization()(net) net = LeakyReLU(alpha=self.leak)(net) net = ConvLSTM2D(filters=64, kernel_size=(3, 3), padding='same', return_sequences=True)(net) net = BatchNormalization()(net) net = LeakyReLU(alpha=self.leak)(net) net = Dropout(self.dropout)(net) net = MaxPooling3D(pool_size=(2, 2, 2))(net) # 8x16x16x64 net = ConvLSTM2D(filters=128, kernel_size=(3, 3), padding='same', return_sequences=True)(net) net = BatchNormalization()(net) net = LeakyReLU(alpha=self.leak)(net) net = ConvLSTM2D(filters=128, kernel_size=(3, 3), padding='same', return_sequences=True)(net) net = BatchNormalization()(net) net = LeakyReLU(alpha=self.leak)(net) net = Dropout(self.dropout)(net) net = MaxPooling3D(pool_size=(2, 2, 2))(net) # 4x8x8x128 net = ConvLSTM2D(filters=256, kernel_size=(3, 3), padding='same', return_sequences=True)(net) net = BatchNormalization()(net) net = LeakyReLU(alpha=self.leak)(net) net = ConvLSTM2D(filters=256, kernel_size=(3, 3), padding='same', return_sequences=True)(net) net = BatchNormalization()(net) net = LeakyReLU(alpha=self.leak)(net) net = Dropout(self.dropout)(net) net = MaxPooling3D(pool_size=(2, 2, 2))(net) # 2x4x4x256 net = ConvLSTM2D(filters=512, kernel_size=(3, 3), padding='same', return_sequences=True)(net) net = BatchNormalization()(net) net = LeakyReLU(alpha=self.leak)(net) net = ConvLSTM2D(filters=512, kernel_size=(3, 3), padding='same', return_sequences=True)(net) net = BatchNormalization()(net) net = LeakyReLU(alpha=self.leak)(net) net = Dropout(self.dropout)(net) net = MaxPooling3D(pool_size=(2, 2, 2))(net) # 1x2x2x512 # reshape for sequence removal # there should be only one element of sequence at this point so it is just dimension reduction net = Reshape((2, 2, 512))(net) # 2x2x512 # variational encoder output (distributions) mean = Conv2D(filters=self.latent_size, kernel_size=(1, 1), padding='same', name="mean_convolution")(net) mean = MaxPool2D(pool_size=(2, 2), name="mean_max_pooling")(mean) mean = Flatten(name="mean_flatten")(mean) mean = Dense(self.latent_size, name="mean")(mean) stddev = Conv2D(filters=self.latent_size, kernel_size=(1, 1), padding='same', name="stddev_convolution")(net) stddev = MaxPool2D(pool_size=(2, 2), name="stddev_max_pooling")(stddev) stddev = Flatten(name="stddev_flatten")(stddev) stddev = Dense(self.latent_size, name="stddev")(stddev) return input_layer, [mean, stddev]
labels_cat = np_utils.to_categorical(labels) print("Binarization of classes.. done") """ Definition of the 3D CNN model """ # DEFINE MODEL model = Sequential() #feature extraction model.add( Conv3D(filters=32, kernel_size=(LENGTH_VIDEO - 2, IMAGE_HEIGHT - 5, IMAGE_WIDTH - 5), input_shape=(LENGTH_VIDEO, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS))) model.add(MaxPooling3D(pool_size=(2, 2, 2))) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Flatten()) #Classification model.add(Dense(512, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(NB_CLASSES + 1, activation='softmax')) model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
def get_net(): # Level 1 input = Input((input_dim, input_dim, input_dim, 1)) conv1 = Conv3D(32, (3, 3, 3), activation="relu", padding="same")(input) batch1 = BatchNormalization()(conv1) conv1 = Conv3D(64, (3, 3, 3), activation="relu", padding="same")(batch1) batch1 = BatchNormalization()(conv1) # Level 2 pool2 = MaxPooling3D((2, 2, 2))(batch1) conv2 = Conv3D(64, (3, 3, 3), activation="relu", padding="same")(pool2) batch2 = BatchNormalization()(conv2) conv2 = Conv3D(128, (3, 3, 3), activation="relu", padding="same")(batch2) batch2 = BatchNormalization()(conv2) # Level 3 pool3 = MaxPooling3D((2, 2, 2))(batch2) conv3 = Conv3D(128, (3, 3, 3), activation="relu", padding="same")(pool3) batch3 = BatchNormalization()(conv3) conv3 = Conv3D(256, (3, 3, 3), activation="relu", padding="same")(batch3) batch3 = BatchNormalization()(conv3) # Level 4 pool4 = MaxPooling3D((2, 2, 2))(batch3) conv4 = Conv3D(256, (3, 3, 3), activation="relu", padding="same")(pool4) batch4 = BatchNormalization()(conv4) conv4 = Conv3D(512, (3, 3, 3), activation="relu", padding="same")(batch4) batch4 = BatchNormalization()(conv4) # Level 3 up5 = Conv3DTranspose(512, (2, 2, 2), strides=(2, 2, 2), padding="same", activation="relu")(batch4) merge5 = concatenate([up5, batch3]) conv5 = Conv3D(256, (3, 3, 3), activation="relu")(merge5) batch5 = BatchNormalization()(conv5) conv5 = Conv3D(256, (3, 3, 3), activation="relu")(batch5) batch5 = BatchNormalization()(conv5) # Level 2 up6 = Conv3DTranspose(256, (2, 2, 2), strides=(2, 2, 2), activation="relu")(batch5) merge6 = concatenate( [up6, Cropping3D(cropping=((4, 4), (4, 4), (4, 4)))(batch2)]) conv6 = Conv3D(128, (3, 3, 3), activation="relu")(merge6) batch6 = BatchNormalization()(conv6) conv6 = Conv3D(128, (3, 3, 3), activation="relu")(batch6) batch6 = BatchNormalization()(conv6) # Level 1 up7 = Conv3DTranspose(128, (2, 2, 2), strides=(2, 2, 2), padding="same", activation="relu")(batch6) merge7 = concatenate( [up7, Cropping3D(cropping=((12, 12), (12, 12), (12, 12)))(batch1)]) conv7 = Conv3D(64, (3, 3, 3), activation="relu")(merge7) batch7 = BatchNormalization()(conv7) conv7 = Conv3D(64, (3, 3, 3), activation="relu")(batch7) batch7 = BatchNormalization()(conv7) # Output dim is (36, 36, 36) preds = Conv3D(1, (1, 1, 1), activation="sigmoid")(batch7) model = Model(inputs=input, outputs=preds) model.compile(optimizer=Adam(lr=0.001, decay=0.00), loss=weighted_binary_crossentropy, metrics=[ axon_precision, axon_recall, f1_score, artifact_precision, edge_axon_precision, adjusted_accuracy ]) return model