def CapsNetBasic(input_shape, n_class=2): x = layers.Input(shape=input_shape) # Layer 1: Just a conventional Conv2D layer conv1 = layers.Conv2D(filters=256, kernel_size=5, strides=1, padding='same', activation='relu', name='conv1')(x) # Reshape layer to be 1 capsule x [filters] atoms _, H, W, C = conv1.get_shape() conv1_reshaped = layers.Reshape((H.value, W.value, 1, C.value))(conv1) # Layer 1: Primary Capsule: Conv cap with routing 1 primary_caps = ConvCapsuleLayer(kernel_size=5, num_capsule=8, num_atoms=32, strides=1, padding='same', routings=1, name='primarycaps')(conv1_reshaped) # Layer 4: Convolutional Capsule: 1x1 seg_caps = ConvCapsuleLayer(kernel_size=1, num_capsule=1, num_atoms=16, strides=1, padding='same', routings=3, name='seg_caps')(primary_caps) # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape. out_seg = Length(num_classes=n_class, seg=True, name='out_seg')(seg_caps) # Decoder network. _, H, W, C, A = seg_caps.get_shape() y = layers.Input(shape=input_shape[:-1] + (1, )) masked_by_y = Mask()( [seg_caps, y] ) # The true label is used to mask the output of capsule layer. For training masked = Mask()( seg_caps) # Mask using the capsule with maximal length. For prediction def shared_decoder(mask_layer): recon_remove_dim = layers.Reshape( (H.value, W.value, A.value))(mask_layer) recon_1 = layers.Conv2D(filters=64, kernel_size=1, padding='same', kernel_initializer='he_normal', activation='relu', name='recon_1')(recon_remove_dim) recon_2 = layers.Conv2D(filters=128, kernel_size=1, padding='same', kernel_initializer='he_normal', activation='relu', name='recon_2')(recon_1) out_recon = layers.Conv2D(filters=1, kernel_size=1, padding='same', kernel_initializer='he_normal', activation='sigmoid', name='out_recon')(recon_2) return out_recon # Models for training and evaluation (prediction) train_model = models.Model(inputs=[x, y], outputs=[out_seg, shared_decoder(masked_by_y)]) eval_model = models.Model(inputs=x, outputs=[out_seg, shared_decoder(masked)]) # manipulate model noise = layers.Input(shape=((H.value, W.value, C.value, A.value))) noised_seg_caps = layers.Add()([seg_caps, noise]) masked_noised_y = Mask()([noised_seg_caps, y]) manipulate_model = models.Model(inputs=[x, y, noise], outputs=shared_decoder(masked_noised_y)) return train_model, eval_model, manipulate_model
def CapsNet_nogradientstop( input_shape, n_class, routings ): # best testing results! val 0.13xx testX cnn1 200 1 cnn2 150 9 drop1 0.68 drop20.68 n_channels 50 kernel_size 20,dropout1 x = layers.Input(shape=input_shape) conv1 = layers.Conv1D(filters=200, kernel_size=1, strides=1, padding='valid', kernel_initializer='he_normal', activation='relu', name='conv1')(x) #conv1=BatchNormalization()(conv1) conv1 = Dropout(0.7)(conv1) conv2 = layers.Conv1D(filters=200, kernel_size=9, strides=1, padding='valid', kernel_initializer='he_normal', activation='relu', name='conv2')(conv1) #conv1=BatchNormalization()(conv1) conv2 = Dropout(0.75)(conv2) #0.75 valx loss has 0.1278! primarycaps = PrimaryCap(conv2, dim_capsule=8, n_channels=60, kernel_size=20, kernel_initializer='he_normal', strides=1, padding='valid', dropout=0.2) dim_capsule_dim2 = 10 #Capsule layer. Routing algorithm works here. digitcaps_c = CapsuleLayer_nogradient_stop(num_capsule=n_class, dim_capsule=dim_capsule_dim2, num_routing=routings, name='digitcaps', kernel_initializer='he_normal', dropout=0.1)(primarycaps) #digitcaps_c = CapsuleLayer(num_capsule=n_class, dim_capsule=dim_capsule_dim2, num_routing=routings,name='digitcaps',kernel_initializer='he_normal')(primarycaps) digitcaps = Extract_outputs(dim_capsule_dim2)(digitcaps_c) weight_c = Extract_weight_c(dim_capsule_dim2)(digitcaps_c) out_caps = Length(name='capsnet')(digitcaps) # Decoder network. y = layers.Input(shape=(n_class, )) masked_by_y = Mask()( [digitcaps, y] ) # The true label is used to mask the output of capsule layer. For training masked = Mask( )(digitcaps) # Mask using the capsule with maximal length. For prediction # Shared Decoder model in training and prediction decoder = Sequential(name='decoder') decoder.add( layers.Dense(512, activation='relu', input_dim=dim_capsule_dim2 * n_class)) decoder.add(layers.Dense(1024, activation='relu')) decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid')) decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon')) # Models for training and evaluation (prediction) train_model = Model([x, y], [out_caps, decoder(masked_by_y)]) eval_model = Model(x, [out_caps, decoder(masked)]) weight_c_model = Model(x, weight_c) # manipulate model noise = layers.Input(shape=(n_class, dim_capsule_dim2)) noised_digitcaps = layers.Add()([digitcaps, noise]) masked_noised_y = Mask()([noised_digitcaps, y]) manipulate_model = Model([x, y, noise], decoder(masked_noised_y)) return train_model, eval_model, manipulate_model, weight_c_model
''' import keras from keras import layers import numpy as np latent_dim = 32 height = 32 width = 32 channels = 3 generator_input = keras.Input(shape=(latent_dim, )) # 首先,将输入转换为16x16 128通道的feature map x = layers.Dense(128 * 16 * 16)(generator_input) x = layers.LeakyReLU()(x) x = layers.Reshape((16, 16, 128))(x) # 然后,添加卷积层 x = layers.Conv2D(256, 5, padding='same')(x) x = layers.LeakyReLU()(x) # 上采样至 32 x 32 x = layers.Conv2DTranspose(256, 4, strides=2, padding='same')(x) x = layers.LeakyReLU()(x) # 添加更多的卷积层 x = layers.Conv2D(256, 5, padding='same')(x) x = layers.LeakyReLU()(x) x = layers.Conv2D(256, 5, padding='same')(x) x = layers.LeakyReLU()(x)
def MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000, **kwargs): """Instantiates the MobileNet architecture. # Arguments input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(224, 224, 3)` (with `channels_last` data format) or (3, 224, 224) (with `channels_first` data format). It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(200, 200, 3)` would be one valid value. alpha: controls the width of the network. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. depth_multiplier: depth multiplier for depthwise convolution (also called the resolution multiplier) dropout: dropout rate include_top: whether to include the fully-connected layer at the top of the network. weights: one of `None` (random initialization), 'imagenet' (pre-training on ImageNet), or the path to the weights file to be loaded. input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. """ if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError( 'If using `weights` as `"imagenet"` with `include_top` ' 'as true, `classes` should be 1000') # Determine proper input shape and default size. if input_shape is None: default_size = 224 else: if backend.image_data_format() == 'channels_first': rows = input_shape[1] cols = input_shape[2] else: rows = input_shape[0] cols = input_shape[1] if rows == cols and rows in [128, 160, 192, 224]: default_size = rows else: default_size = 224 input_shape = _obtain_input_shape(input_shape, default_size=default_size, min_size=32, data_format=backend.image_data_format(), require_flatten=include_top, weights=weights) if backend.image_data_format() == 'channels_last': row_axis, col_axis = (0, 1) else: row_axis, col_axis = (1, 2) rows = input_shape[row_axis] cols = input_shape[col_axis] if weights == 'imagenet': if depth_multiplier != 1: raise ValueError('If imagenet weights are being loaded, ' 'depth multiplier must be 1') if alpha not in [0.25, 0.50, 0.75, 1.0]: raise ValueError('If imagenet weights are being loaded, ' 'alpha can be one of' '`0.25`, `0.50`, `0.75` or `1.0` only.') if rows != cols or rows not in [128, 160, 192, 224]: if rows is None: rows = 224 warnings.warn('MobileNet shape is undefined.' ' Weights for input shape ' '(224, 224) will be loaded.') else: raise ValueError('If imagenet weights are being loaded, ' 'input must have a static square shape ' '(one of (128, 128), (160, 160), ' '(192, 192), or (224, 224)). ' 'Input shape provided = %s' % (input_shape, )) if backend.image_data_format() != 'channels_last': warnings.warn('The MobileNet family of models is only available ' 'for the input data format "channels_last" ' '(width, height, channels). ' 'However your settings specify the default ' 'data format "channels_first" (channels, width, height).' ' You should set `image_data_format="channels_last"` ' 'in your Keras config located at ~/.keras/keras.json. ' 'The model being returned right now will expect inputs ' 'to follow the "channels_last" data format.') backend.set_image_data_format('channels_last') old_data_format = 'channels_first' else: old_data_format = None if input_tensor is None: img_input = layers.Input(shape=input_shape) else: if not backend.is_keras_tensor(input_tensor): img_input = layers.Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor x = _conv_block(img_input, 32, alpha, strides=(2, 2)) x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, strides=(2, 2), block_id=2) x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, strides=(2, 2), block_id=4) x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, strides=(2, 2), block_id=6) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, strides=(2, 2), block_id=12) x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) if include_top: if backend.image_data_format() == 'channels_first': shape = (int(1024 * alpha), 1, 1) else: shape = (1, 1, int(1024 * alpha)) x = layers.GlobalAveragePooling2D()(x) x = layers.Reshape(shape, name='reshape_1')(x) x = layers.Dropout(dropout, name='dropout')(x) x = layers.Conv2D(classes, (1, 1), padding='same', name='conv_preds')(x) x = layers.Activation('softmax', name='act_softmax')(x) x = layers.Reshape((classes, ), name='reshape_2')(x) else: if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = keras_utils.get_source_inputs(input_tensor) else: inputs = img_input # Create model. model = models.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) # Load weights. if weights == 'imagenet': if backend.image_data_format() == 'channels_first': raise ValueError('Weights for "channels_first" format ' 'are not available.') if alpha == 1.0: alpha_text = '1_0' elif alpha == 0.75: alpha_text = '7_5' elif alpha == 0.50: alpha_text = '5_0' else: alpha_text = '2_5' if include_top: model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) weight_path = BASE_WEIGHT_PATH + model_name weights_path = keras_utils.get_file(model_name, weight_path, cache_subdir='models') else: model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) weight_path = BASE_WEIGHT_PATH + model_name weights_path = keras_utils.get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_path) elif weights is not None: model.load_weights(weights) if old_data_format: backend.set_image_data_format(old_data_format) return model
def model_ContextSum(p, embedding_matrix, max_sent_len, n_out): print("Parameters:", p) # Take sentence encoded as indices and convert it to embeddings sentence_input = layers.Input(shape=(max_sent_len, ), dtype='int32', name='sentence_input') # Repeat the input N times for each edge x = layers.RepeatVector(MAX_EDGES_PER_GRAPH)(sentence_input) word_embeddings = layers.wrappers.TimeDistributed( layers.Embedding(output_dim=embedding_matrix.shape[1], input_dim=embedding_matrix.shape[0], input_length=max_sent_len, weights=[embeddings], mask_zero=True, trainable=False))(x) word_embeddings = layers.Dropout(p['dropout1'])(word_embeddings) # Take token markers that identify entity positions, convert to position embeddings entity_markers = layers.Input(shape=( MAX_EDGES_PER_GRAPH, max_sent_len, ), dtype='int8', name='entity_markers') pos_embeddings = layers.wrappers.TimeDistributed( layers.Embedding(output_dim=p['position_emb'], input_dim=POSITION_VOCAB_SIZE, input_length=max_sent_len, mask_zero=True, embeddings_regularizer=regularizers.l2(), trainable=True))(entity_markers) # Merge word and position embeddings and apply the specified amount of RNN layers for i in range(p["rnn1_layers"] - 1): lstm_layer = layers.LSTM(p['units1'], return_sequences=True) if p['bidirectional']: lstm_layer = layers.Bidirectional(lstm_layer) x = layers.wrappers.TimeDistributed(lstm_layer)(x) lstm_layer = layers.LSTM(p['units1'], return_sequences=False) if p['bidirectional']: lstm_layer = layers.Bidirectional(lstm_layer) sentence_matrix = layers.wrappers.TimeDistributed(lstm_layer)(x) # Take the vector of the sentences with the target entity pair layers_to_concat = [] num_units = p['units1'] * (2 if p['bidirectional'] else 1) for i in range(MAX_EDGES_PER_GRAPH): sentence_vector = layers.Lambda( lambda l: l[:, i], output_shape=(num_units, ))(sentence_matrix) if i == 0: context_vectors = layers.Lambda( lambda l: l[:, i + 1:], output_shape=(MAX_EDGES_PER_GRAPH - 1, num_units))(sentence_matrix) elif i == MAX_EDGES_PER_GRAPH - 1: context_vectors = layers.Lambda( lambda l: l[:, :i], output_shape=(MAX_EDGES_PER_GRAPH - 1, num_units))(sentence_matrix) else: context_vectors = layers.Lambda( lambda l: K.concatenate([l[:, :i], l[:, i + 1:]], axis=1), output_shape=(MAX_EDGES_PER_GRAPH - 1, num_units))(sentence_matrix) context_vector = GlobalSumPooling1D()(context_vectors) edge_vector = layers.concatenate([sentence_vector, context_vector]) edge_vector = layers.Reshape((1, num_units * 2))(edge_vector) layers_to_concat.append(edge_vector) edge_vectors = layers.Concatenate(1)(layers_to_concat) # Apply softmax edge_vectors = layers.Dropout(p['dropout1'])(edge_vectors) main_output = layers.wrappers.TimeDistributed( layers.Dense(n_out, activation="softmax", name='main_output'))(edge_vectors) model = models.Model(inputs=[sentence_input, entity_markers], outputs=[main_output]) model.compile(optimizer=p['optimizer'], loss=masked_categorical_crossentropy, metrics=['accuracy']) return model
def CapsNet(input_shape, n_class, routings): """ A Capsule Network on MNIST. :param input_shape: data shape, 3d, [width, height, channels] :param n_class: number of classes :param routings: number of routing iterations :return: Two Keras Models, the first one used for training, and the second one for evaluation. `eval_model` can also be used for training. """ x = layers.Input(shape=input_shape) # Layer 1: Just a conventional Conv2D layer conv1 = layers.Conv2D(filters=256, kernel_size=9, strides=1, padding='valid', activation='relu', name='conv1')(x) # Layer 2: Conv2D layer with `squash` activation, then reshape to [None, num_capsule, dim_capsule] primarycaps = PrimaryCap(conv1, dim_capsule=8, n_channels=32, kernel_size=9, strides=2, padding='valid') # Layer 3: Capsule layer. Routing algorithm works here. digitcaps = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings, name='digitcaps')(primarycaps) # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape. # If using tensorflow, this will not be necessary. :) out_caps = Length(name='capsnet')(digitcaps) # Decoder network. y = layers.Input(shape=(n_class, )) masked_by_y = Mask()( [digitcaps, y] ) # The true label is used to mask the output of capsule layer. For training masked = Mask( )(digitcaps) # Mask using the capsule with maximal length. For prediction # Shared Decoder model in training and prediction decoder = models.Sequential(name='decoder') decoder.add(layers.Dense(512, activation='relu', input_dim=16 * n_class)) decoder.add(layers.Dense(1024, activation='relu')) decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid')) decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon')) # Models for training and evaluation (prediction) train_model = models.Model([x, y], [out_caps, decoder(masked_by_y)]) eval_model = models.Model(x, [out_caps, decoder(masked)]) # manipulate model noise = layers.Input(shape=(n_class, 16)) noised_digitcaps = layers.Add()([tf.squeeze(digitcaps, [1, 3]), noise]) masked_noised_y = Mask()([noised_digitcaps, y]) manipulate_model = models.Model([x, y, noise], decoder(masked_noised_y)) return train_model, eval_model, manipulate_model
#x = conv2d_bn(x, 32, 3, 3, strides=(1, 1), padding='same') #x = conv2d_bn(x, 32, 3, 3, strides=(1, 1), padding='same') def multiply(x, n): x_prime = tf.reshape(x, (-1, n, 1)) x_transpose = tf.transpose(x_prime, perm=[0, 2, 1]) return tf.matmul(x_transpose, x_prime) #Lambda(lambda x: multiply(x, n), output_shape =(n, n)) # Input is 100 * 5 matrix seq_input = Input(shape=(100, 5)) # convert to tensor and get 10 layers x = layers.Reshape((100, 5, 1))(seq_input) x = Conv2D(filters=10, kernel_size=(3, 3), strides=(1, 1), padding='same')(x) # get outer product to get 100*100 matrix for each layer final = {} def matmul(mat_x): y = K.tf.matmul(mat_x, mat_x, transpose_b=True) return y def multiply(x, n=100): x_prime = tf.reshape(x, (-1, n, 5)) x_transpose = tf.transpose(x_prime, perm=[0, 2, 1]) return tf.matmul(x_prime, x_transpose)
masked = Mask()( digitcaps) # Mask using the capsule with maximal length. For prediction out_caps = Length(num_classes=3, name='capsnet')(digitcaps) #==================================== n_class = '' input_shape = '' #Decoder #============= # Btara's comment: similar here decoder is only used for image reconstruction decoder = Sequential(name='decoder') decoder.add(layers.Dense(512, activation='relu', input_dim=16 * n_class)) decoder.add(layers.Dense(1024, activation='relu')) decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid')) decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon')) #============== # Btara's comment: Seems right for this part, just note that the eval model is not needed (I think) # if we don't have the decoder section train_model = models.Model( inputs=[x, y], outputs=[out_caps, decoder(masked_by_y)], ) eval_model = models.Model(inputs=x, outputs=[out_caps, decoder(masked)]) # Btara's comment: Don't have to make our own function for train_generator if we use image data generator # see https://keras.io/preprocessing/image/ def train_generator(x, y, batch_size, shift_fraction=0.): train_datagen = ImageDataGenerator(
def yolo_vgg3_model(regularizer=None): CELL_DIM = NUM_BOX * (5 + NUM_CLASS) initializer = "glorot_normal" # Input Layer X_input = L.Input((MODEL_DIM, MODEL_DIM, 3)) X = X_input # 448 x 448 x 3 vgg_model = VGG16(include_top=False, weights='imagenet', input_tensor=X) for vgg_layer in vgg_model.layers: vgg_layer.trainable = False X = vgg_model.output X = L.BatchNormalization(axis=3)(X) # 7 x 7 x 512 X = L.Conv2D(512, kernel_size=(1, 1), padding="same", kernel_initializer=initializer, kernel_regularizer=regularizer)(X) X = L.BatchNormalization(axis=3)(X) X = L.LeakyReLU()(X) X = L.Conv2D(512, kernel_size=(3, 3), padding="same", kernel_initializer=initializer, kernel_regularizer=regularizer)(X) X = L.BatchNormalization(axis=3)(X) X = L.LeakyReLU()(X) X = L.Conv2D(1024, kernel_size=(1, 1), padding="same", kernel_initializer=initializer, kernel_regularizer=regularizer)(X) X = L.BatchNormalization(axis=3)(X) X = L.LeakyReLU()(X) X = L.Conv2D(1024, kernel_size=(3, 3), padding="same", kernel_initializer=initializer, kernel_regularizer=regularizer)(X) X = L.BatchNormalization(axis=3)(X) X = L.LeakyReLU()(X) X = L.MaxPooling2D((2, 2), strides=(2, 2))(X) # 7 x 7 x 1024 X = L.Conv2D(CELL_DIM, kernel_size=(3, 3), padding="same", kernel_initializer=initializer, kernel_regularizer=regularizer)(X) X = L.BatchNormalization(axis=3)(X) X = L.LeakyReLU()(X) X = L.Conv2D(CELL_DIM // 2, kernel_size=(1, 1), padding="same", kernel_initializer=initializer, kernel_regularizer=regularizer)(X) X = L.BatchNormalization(axis=3)(X) X = L.LeakyReLU()(X) X = L.Conv2D(CELL_DIM, kernel_size=(3, 3), padding="same", kernel_initializer=initializer, kernel_regularizer=regularizer)(X) X = L.BatchNormalization(axis=3)(X) X = L.LeakyReLU()(X) # 7 x 7 x 100 X_BBox = L.Conv2D(NUM_BOX * 5, kernel_size=(1, 1), kernel_initializer=initializer, kernel_regularizer=regularizer)(X) X_BBox = L.Reshape((GRID_SIZE, GRID_SIZE, NUM_BOX, -1))(X_BBox) X_BBox = L.Activation('sigmoid', name="ActBBox")(X_BBox) X_Class = L.Conv2D(NUM_BOX * NUM_CLASS, kernel_size=(1, 1), kernel_initializer=initializer, kernel_regularizer=regularizer)(X) X_Class = L.Reshape((GRID_SIZE, GRID_SIZE, NUM_BOX, -1))(X_Class) X_Class = L.Activation('softmax', name="ActClass")(X_Class) X = L.Concatenate(axis=-1)([X_BBox, X_Class]) model = Model(inputs=X_input, outputs=X, name="yolo_vgg3") return model
def build_model(self): chm_input = Input(shape=(20, 20, 1), name="chm") rgb_input = Input(shape=(200, 200, 3), name="rgb") hsi_input = Input(shape=(20, 20, 3), name="hsi") las_input = Input(shape=(40, 40, 70, 1), name="las") # RGB downsample network rgb_down = layers.Conv2D(3, 5, activation="relu")(rgb_input) rgb_down = layers.Conv2D(3, 5, activation="relu")(rgb_down) rgb_down = layers.MaxPool2D(2)(rgb_down) rgb_down = layers.Conv2D(8, 5, activation="relu")(rgb_down) rgb_down = layers.Conv2D(8, 5, activation="relu")(rgb_down) rgb_down = layers.MaxPool2D(2)(rgb_down) rgb_down = layers.Conv2D(16, 4, activation="relu")(rgb_down) rgb_down = layers.Conv2D(16, 4, activation="relu")(rgb_down) rgb_down = layers.MaxPool2D(2, name="rgb_down")(rgb_down) rgb_down = layers.Conv2D(32, 4, activation="relu")(rgb_down) rgb_down = layers.Conv2D(32, 4, activation="relu")(rgb_down) rgb_down = layers.Flatten()(rgb_down) """ # HSI upsample network hsi_up = layers.Conv2D(3, 2, activation="relu", padding="same")(hsi_input) hsi_up = layers.UpSampling2D(3)(hsi_up) #hsi_up = layers.Dropout(0.4)(hsi_up) hsi_up = layers.Conv2D(3, 4, activation="relu")(hsi_up) hsi_up = layers.Conv2D(3, 4, activation="relu")(hsi_up) hsi_up = layers.UpSampling2D(2)(hsi_up) #hsi_up = layers.Dropout(0.4)(hsi_up) hsi_up = layers.Conv2D(3, 5, activation="relu")(hsi_up) hsi_up = layers.Conv2D(3, 5, activation="relu")(hsi_up) hsi_up = layers.UpSampling2D(2, name="hsi_up")(hsi_up) #hsi_up = layers.Dropout(0.4)(hsi_up) # CHM upsample network chm_up = layers.Conv2D(1, 2, activation="relu", padding="same")(chm_input) chm_up = layers.UpSampling2D(3)(chm_up) #chm_up = layers.Dropout(0.4)(chm_up) chm_up = layers.Conv2D(1, 4, activation="relu")(chm_up) chm_up = layers.Conv2D(1, 4, activation="relu")(chm_up) chm_up = layers.UpSampling2D(2)(chm_up) #chm_up = layers.Dropout(0.4)(chm_up) chm_up = layers.Conv2D(1, 5, activation="relu")(chm_up) chm_up = layers.Conv2D(1, 5, activation="relu")(chm_up) chm_up = layers.UpSampling2D(2, name="chm_up")(chm_up) #chm_up = layers.Dropout(0.4)(chm_up) # High-res network high_res = layers.Concatenate(axis=3)([rgb_input, hsi_up, chm_up]) high_res = layers.Conv2D(10, 5, activation="relu")(high_res) high_res = layers.Conv2D(10, 5, activation="relu")(high_res) high_res = layers.Conv2D(5, 5, activation="relu", name="high_res")(high_res) high_res = layers.Flatten()(high_res) """ # Low-res network low_res = layers.Concatenate(axis=3)([hsi_input, chm_input]) low_res = layers.Conv2D(4, 2, activation="relu", padding="same")(low_res) low_res = layers.Conv2D(8, 2, activation="relu", padding="same")(low_res) low_res = layers.Conv2D(8, 2, activation="relu", padding="same")(low_res) low_res = layers.MaxPool2D(2)(low_res) low_res = layers.Conv2D(16, 2, activation="relu", padding="same")(low_res) low_res = layers.Conv2D(16, 2, activation="relu", padding="same")(low_res) low_res = layers.MaxPool2D(2)(low_res) low_res = layers.Conv2D(32, 2, activation="relu", padding="same")(low_res) low_res = layers.Conv2D(32, 2, activation="relu", padding="same")(low_res) low_res = layers.MaxPool2D(2)(low_res) low_res = layers.Conv2D(64, 2, activation="relu", padding="same")(low_res) low_res = layers.Conv2D(64, 2, activation="relu", padding="same")(low_res) low_res = layers.Flatten()(low_res) # Las 3D network las_net = layers.Conv3D(2, 4, activation="relu", padding="same")(las_input) las_net = layers.Conv3D(2, 4, activation="relu", padding="same")(las_net) las_net = layers.MaxPool3D(2)(las_net) las_net = layers.Conv3D(8, 4, activation="relu", padding="same")(las_net) las_net = layers.Conv3D(8, 4, activation="relu", padding="same")(las_net) las_net = layers.MaxPool3D(2)(las_net) las_net = layers.Conv3D(16, 4, activation="relu", padding="same")(las_net) las_net = layers.Conv3D(16, 4, activation="relu", padding="same")(las_net) las_net = layers.MaxPool3D(2)(las_net) las_net = layers.Conv3D(32, 4, activation="relu", padding="same")(las_net) las_net = layers.Conv3D(32, 4, activation="relu", padding="same", name="las_net")(las_net) las_net = layers.Flatten()(las_net) # Combine networks with fully connected layers fully_con = layers.concatenate([low_res, las_net, rgb_down]) fully_con = layers.Dropout(0.1)(fully_con) fully_con = layers.Dense(256)(fully_con) fully_con = layers.Dropout(0.4)(fully_con) fully_con = layers.Dense(256)(fully_con) fully_con = layers.Dropout(0.4)(fully_con) fully_con = layers.Dense(256)(fully_con) fully_con = layers.Dropout(0.4)(fully_con) fully_con = layers.Dense(256)(fully_con) fully_con = layers.Dropout(0.0)(fully_con) output_bounding = layers.Dense(120, kernel_regularizer=keras.regularizers.l2(0.0001))(fully_con) output_bounding = layers.Reshape((30, 4), name="bounds")(output_bounding) output_class = layers.Dense(30, activation="sigmoid", kernel_regularizer=keras.regularizers.l2(0.0001), name="labels")(fully_con) self.model = Model( inputs=[rgb_input, chm_input, hsi_input, las_input], outputs=[output_bounding, output_class], ) self.model.summary()
def main(): np.set_printoptions(threshold=np.nan) number_of_classes = 3 input_shape = (64, 64, 1) x = layers.Input(shape=input_shape) ''' Inputs to the model are MRI images which are down-sampled to 64 × 64 from 512 × 512, in order to reduce the number of parameters in the model and decrease the training time. Second (First?) layer is a convolutional layer with 64 × 9 × 9 filters and stride of 1 which leads to 64 feature maps of size 56×56. ''' conv1 = layers.Conv2D(64, (9, 9), activation='relu', name="FirstLayer")(x) ''' The second layer is a Primary Capsule layer resulting from 256×9×9 convolutions with strides of 2. ''' primaryCaps = PrimaryCap(inputs=conv1, dim_capsule=8, n_channels=32, kernel_size=9, strides=2, padding='valid') ''' This layer consists of 32 “Component Capsules” with dimension of 8 each of which has feature maps of size 24×24 (i.e., each Component Capsule contains 24 × 24 localized individual Capsules). ''' #capLayer1 = CapsuleLayer( # num_capsule=32, dim_capsule=8, routings=3, name="SecondLayer")(primaryCaps) # num_capsule=4, dim_capsule=8, routings=3, name="SecondLayer")(primaryCaps) ''' Final capsule layer includes 3 capsules, referred to as “Class Capsules,’ ’one for each type of candidate brain tumor. The dimension of these capsules is 16. ''' capLayer2 = CapsuleLayer(num_capsule=3, dim_capsule=16, routings=2, name="ThirdLayer")(primaryCaps) # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape. # If using tensorflow, this will not be necessary. :) out_caps = Length(name='capsnet')(capLayer2) # Decoder network. y = layers.Input(shape=(number_of_classes, )) # The true label is used to mask the output of capsule layer. For training masked_by_y = Mask()([capLayer2, y]) # Mask using the capsule with maximal length. For prediction masked = Mask()(capLayer2) # Shared Decoder model in training and prediction decoder = models.Sequential(name='decoder') decoder.add( layers.Dense(512, activation='relu', input_dim=16 * number_of_classes)) decoder.add(layers.Dense(1024, activation='relu')) decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid')) decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon')) # Models for training and evaluation (prediction) train_model = models.Model([x, y], [out_caps, decoder(masked_by_y)]) eval_model = models.Model(x, [out_caps, decoder(masked)]) # Probably don't need the below chunk of code ? noise = layers.Input(shape=(number_of_classes, 16)) noised_capLayer2 = layers.Add()([capLayer2, noise]) masked_noised_y = Mask()([noised_capLayer2, y]) manipulate_model = models.Model([x, y, noise], decoder(masked_noised_y)) train_data_directory = 'train/' validation_data_directory = 'test/' bsize = 32 image_datagen = ImageDataGenerator() # train_generator = image_datagen.flow_from_directory( # train_data_directory, # color_mode='grayscale', # target_size=(image_resize_height, image_resize_weight), # batch_size=20, # class_mode='categorical') train_generator = create_generator(train_data_directory, batch_size=bsize) # validation_generator = image_datagen.flow_from_directory( # validation_data_directory, # color_mode='grayscale', # target_size=(image_resize_height, image_resize_weight), # batch_size=20, # class_mode='categorical') validation_generator = create_generator(validation_data_directory, batch_size=bsize) # for x, y in train_generator: # print("x shape: ", x.shape) # print("y shape: ", y.shape) # break # for x, y in validation_generator: # print("val x shape: ", x.shape) # print("val y shape: ", y.shape) # break print(train_model.summary()) train_model.compile( optimizer="rmsprop", # Improved backprop algorithm loss='mse', # "Misprediction" measure # loss='sparse_categorical_crossentropy', # "Misprediction" measure metrics=['accuracy'] # Report CCE value as we train ) hst = train_model.fit_generator(train_generator, steps_per_epoch=72, epochs=8, validation_data=validation_generator, validation_steps=24, verbose=1).history train_model.save('Test.h5')
inner = MaxPooling2D(pool_size=(2, 2), name='s2-maxpool')(inner) inner = Conv2D(256, (3, 3), padding='same', name='s3-conv1')(inner) #inner = Dropout(0.3,name='s3-dropout1')(inner) inner = layers.BatchNormalization(name='s3-batchnorm')(inner) inner = layers.advanced_activations.LeakyReLU(0.1, name='s3-conv2-leakyrelu')(inner) inner = MaxPooling2D(pool_size=(2, 1), name='s3-maxpool')(inner) inner = Conv2D(256, (4, 1), name='s4-conv1')(inner) inner = Dropout(0.3, name='s3-dropout2')(inner) inner = layers.advanced_activations.LeakyReLU(0.1, name='s4-conv1-leakyrelu')(inner) inner = Conv2D(labelsn, (1, 1), name='s4-conv2')(inner) inner = layers.Reshape((TIMESTEP, labelsn), name='y_pred_nosoftmax')(inner) ##may risk y_pred = layers.Activation('softmax', name='y_pred')(inner) labels = Input(name='the_labels', shape=[labelmaxn], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int32') label_length = Input(name='label_length', shape=[1], dtype='int32') loss_out = layers.Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')( [y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) # the loss calc occurs elsewhere, so use a dummy lambda func for the loss model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred
def LvqCapsNet(input_shape): input_img = Input(shape=input_shape) # Block 1 caps0 = Capsule() caps0.add(Conv2D(32 + 1, (3, 3), padding='same', kernel_initializer=glorot_normal())) caps0.add(BatchNormalization()) caps0.add(Activation('relu')) caps0.add(Dropout(0.25)) x = caps0(input_img) # Block 2 caps1 = Capsule() caps1.add(Conv2D(64 + 1, (3, 3), padding='same', kernel_initializer=glorot_normal())) caps1.add(BatchNormalization()) caps1.add(Activation('relu')) caps1.add(Dropout(0.25)) x = caps1(x) # Block 3 caps2 = Capsule() caps2.add(Conv2D(64 + 1, (3, 3), padding='same', kernel_initializer=RandomNormal(stddev=0.01))) caps2.add(BatchNormalization()) caps2.add(Activation('relu')) caps2.add(Dropout(0.25)) x = caps2(x) # Block 4 caps3 = Capsule(prototype_distribution=32) caps3.add(Conv2D(64 + 1, (5, 5), strides=2, padding='same', kernel_initializer=RandomNormal(stddev=0.01))) caps3.add(BatchNormalization()) caps3.add(Activation('relu')) caps3.add(Dropout(0.25)) x = caps3(x) # Block 5 caps4 = Capsule() caps4.add(Conv2D(32 + 1, (3, 3), padding='same', kernel_initializer=RandomNormal(stddev=0.01))) caps4.add(Dropout(0.25)) x = caps4(x) # Block 6 caps5 = Capsule() caps5.add(Conv2D(64 + 1, (3, 3), padding='same', kernel_initializer=glorot_normal())) caps5.add(Dropout(0.25)) x = caps5(x) # Block 7 caps6 = Capsule() caps6.add(Conv2D(64 + 1, (3, 3), padding='same', kernel_initializer=glorot_normal())) caps6.add(SplitModule()) caps6.add(Activation('relu'), scope_keys=1) caps6.add(Flatten(), scope_keys=1) x = caps6(x) # Caps1 caps7 = Capsule(prototype_distribution=(1, 8 * 8)) caps7.add(InputModule(signal_shape=(-1, 64), init_diss_initializer=None, trainable=False)) diss7 = TangentDistance(squared_dissimilarity=False, epsilon=1.e-12, linear_factor=0.66, projected_atom_shape=16) caps7.add(diss7) caps7.add(GibbsRouting(norm_axis='channels', trainable=False)) x = caps7(x) # Caps2 caps8 = Capsule(prototype_distribution=(1, 4 * 4)) caps8.add(Reshape((8, 8, 64))) caps8.add(Conv2D(64, (3, 3), padding='same', kernel_initializer=glorot_normal())) caps8.add(Conv2D(64, (3, 3), padding='same', kernel_initializer=glorot_normal())) caps8.add(InputModule(signal_shape=(8 * 8, 64), init_diss_initializer=None, trainable=False)) diss8 = TangentDistance(projected_atom_shape=16, squared_dissimilarity=False, epsilon=1.e-12, linear_factor=0.66, signal_output='signals') caps8.add(diss8) caps8.add(GibbsRouting(norm_axis='channels', trainable=False)) x = caps8(x) # Caps3 digit_caps = Capsule(prototype_distribution=(1, 10)) digit_caps.add(Reshape((4, 4, 64))) digit_caps.add(Conv2D(128, (3, 3), padding='same', kernel_initializer=glorot_normal())) digit_caps.add(InputModule(signal_shape=128, init_diss_initializer=None, trainable=False)) diss = RestrictedTangentDistance(projected_atom_shape=16, epsilon=1.e-12, squared_dissimilarity=False, linear_factor=0.66, signal_output='signals') digit_caps.add(diss) digit_caps.add(GibbsRouting(norm_axis='channels', trainable=False, diss_regularizer=MaxValue(alpha=0.0001))) digit_caps.add(DissimilarityTransformation(probability_transformation='neg_softmax', name='lvq_caps')) digitcaps = digit_caps(x) # intermediate model for Caps2; used for visualizations input_diss8 = [Input((4, 4, 64)), Input((16,))] model_vis_caps2 = models.Model(input_diss8, digit_caps(list_to_dict(input_diss8))) # Decoder network. y = layers.Input(shape=(10,)) masked_by_y = Mask()([digitcaps[0], y]) # Shared Decoder model in training and prediction decoder = models.Sequential(name='decoder') decoder.add(layers.Dense(512, activation='relu', input_dim=128 * 10)) decoder.add(layers.Dense(1024, activation='relu')) decoder.add(layers.Dense(np.prod((28, 28, 1)), activation='sigmoid')) decoder.add(layers.Reshape(target_shape=(28, 28, 1), name='out_recon')) # Models for training and evaluation (prediction) model = models.Model([input_img, y], [digitcaps[2], decoder(masked_by_y)]) return model, decoder, model_vis_caps2
def define_graph(self, mode, options): assert mode in ['training', 'inference'] box_pred_method = options['box_pred_method'] print(f'box_pred_method: {box_pred_method}') assert box_pred_method in [ 'lbf_guided', 'regress_landmark', 'regress_segbox', 'gt_segbox'] batch_size = options['images_per_gpu'] heads = options['heads'] num_heads = len(heads) print(f'num_heads={num_heads}') num_masks = 0 for class_ids in heads: num_masks += len(class_ids) assert num_masks == len(options['class_names']) print(f'num_masks={num_masks}') head_label_names = [] for class_ids in heads: names_this_head = [] for class_id in class_ids: names_this_head += options['class_names'][class_id] head_label_names.append(names_this_head) assert len(head_label_names) == len(heads) h = w = options['image_size'] # assert h > 0 and w > 0 and h % 2**6 == 0 and w % 2**6 == 0 if 'landmark_box_paddings448' in options: delta = options.get('landmark_box_padding_additional_ratio', 0.0) molded_padding_dict = { name: np.array(padding, np.float32) / 448.0 + np.array([-delta, -delta, +delta, +delta], np.float32) for name, padding in options['landmark_box_paddings448'].items() } else: raise RuntimeError('padding information required') pprint(molded_padding_dict) # mean landmark68 pts mean_molded_landmark68_pts = tf.stack( [utils.MEAN_MOLDED_LANDMARK68_PTS], name='mean_molded_landmark68_pts') # mean head boxes mean_molded_head_boxes = utils.extract_landmark68_boxes_graph( mean_molded_landmark68_pts, head_label_names, molded_padding_dict) dropout_rate = options.get('dropout_rate', 0.0) print(f'dropout_rate={dropout_rate}') # Inputs input_molded_image = KL.Input( shape=[h, w, 3], name="input_molded_image") # molded input_molded_image_exist = KL.Input( shape=[1], name='input_molded_image_exist', dtype=tf.uint8) print('input: %s' % input_molded_image.name) print('input_molded_image_exist.shape: {}, {}'.format( input_molded_image_exist.shape, input_molded_image_exist._keras_shape)) if mode == 'training': input_gt_masks = KL.Input( shape=[num_masks, h, w], name="input_gt_masks") input_gt_masks_exist = KL.Input( shape=[1], name='input_gt_masks_exist', dtype=tf.uint8) print('input_gt_masks_exist.shape: {}, {}'.format( input_gt_masks_exist.shape, input_gt_masks_exist._keras_shape)) molded_gt_masks = KL.Lambda(lambda xx: tf.cast(xx, tf.float32))( input_gt_masks) if box_pred_method == 'lbf_guided': input_molded_lbf_landmark68_pts = KL.Input( shape=[68, 2], dtype=tf.float32, name="input_molded_lbf_landmark68_pts") input_molded_lbf_landmark68_pts_exist = KL.Input( shape=[1], name='input_molded_lbf_landmark68_pts_exist', dtype=tf.uint8) print('input_molded_lbf_landmark68_pts_exist.shape: {}, {}'.format( input_molded_lbf_landmark68_pts_exist.shape, input_molded_lbf_landmark68_pts_exist._keras_shape)) elif box_pred_method == 'regress_landmark': if mode == 'training': input_gt_molded_landmark68_pts = KL.Input( shape=[68, 2], dtype=tf.float32, name='input_gt_molded_landmark68_pts') input_gt_molded_landmark68_pts_exist = KL.Input( shape=[1], name='input_gt_molded_landmark68_pts_exist', dtype=tf.uint8) elif box_pred_method == 'regress_segbox': def _box_to_std_deform(box): return utils.compute_box_deform(mean_molded_head_boxes, box) def _std_deform_to_box(deform): return utils.apply_box_deform(mean_molded_head_boxes, deform) if mode == 'training': input_gt_molded_head_boxes = KL.Input( shape=[num_heads, 4], dtype=tf.float32, name='input_gt_molded_head_boxes') input_gt_molded_head_boxes_exist = KL.Input( shape=[1], name='input_gt_molded_head_boxes_exist', dtype=tf.uint8) # get box deforms input_gt_head_box_deforms = KL.Lambda( _box_to_std_deform, name='input_gt_head_box_deforms')( input_gt_molded_head_boxes) elif box_pred_method == 'gt_segbox': input_gt_molded_head_boxes = KL.Input( shape=[num_heads, 4], dtype=tf.float32, name='input_gt_molded_head_boxes') input_gt_molded_head_boxes_exist = KL.Input( shape=[1], name='input_gt_molded_head_boxes_exist', dtype=tf.uint8) # Construct Backbone Network box_from = options.get('box_from', 'P2') def _expand_boxes_by_ratio(boxes, rel_ratio): y1, x1, y2, x2 = tf.split(boxes, 4, axis=-1) cy = (y1 + y2) / 2.0 cx = (x1 + x2) / 2.0 h2 = (y2 - y1) / 2.0 w2 = (x2 - x1) / 2.0 yy1 = cy - h2 * (1 + rel_ratio) xx1 = cx - w2 * (1 + rel_ratio) yy2 = cy + h2 * (1 + rel_ratio) xx2 = cx + w2 * (1 + rel_ratio) return tf.concat([yy1, xx1, yy2, xx2], axis=-1) if options['backbone'] == 'vgg16': print('making vgg16 backbone') C1, C2, C3, C4, C5 = vgg16_graph(input_molded_image) assert box_from == 'C5' mrcnn_feature_maps = [C5] elif options['backbone'] == 'vgg16fpn': print('making vgg16fpn backbone') C1, C2, C3, C4, C5 = vgg16_graph(input_molded_image) P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5]) if box_from == 'P2': box_feature = P2 elif box_from == 'C5': box_feature = C5 elif box_from == 'C4': box_feature = C4 mrcnn_feature_maps = [P2, P3, P4, P5] elif options['backbone'] == 'vgg16fpnP2': print('making vgg16fpnP2 backbone') C1, C2, C3, C4, C5 = vgg16_graph(input_molded_image) P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5]) if box_from == 'P2': box_feature = P2 elif box_from == 'C5': box_feature = C5 elif box_from == 'C4': box_feature = C4 mrcnn_feature_maps = [P2] elif options['backbone'] == 'resnet50': C1, C2, C3, C4, _ = resnet_graph( input_molded_image, 'resnet50', False) assert box_from == 'C4' box_feature = C4 mrcnn_feature_maps = [C4] elif options['backbone'] == 'resnet50fpn': print('making resnet50fpn backbone') C1, C2, C3, C4, C5 = resnet_graph( input_molded_image, 'resnet50', True) P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5]) if box_from == 'P2': box_feature = P2 elif box_from == 'C5': box_feature = C5 elif box_from == 'C4': box_feature = C4 mrcnn_feature_maps = [P2, P3, P4, P5] elif options['backbone'] == 'resnet50fpnP2': print('making resnet50fpnP2 backbone') C1, C2, C3, C4, C5 = resnet_graph( input_molded_image, 'resnet50', True) P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5]) if box_from == 'P2': box_feature = P2 elif box_from == 'C5': box_feature = C5 elif box_from == 'C4': box_feature = C4 mrcnn_feature_maps = [P2] elif options['backbone'] == 'resnet50fpnC4': C1, C2, C3, C4, C5 = resnet_graph( input_molded_image, 'resnet50', True) P2, P3, P4, P5, _ = build_fpn([C1, C2, C3, C4, C5]) if box_from == 'P2': box_feature = P2 elif box_from == 'C5': box_feature = C5 elif box_from == 'C4': box_feature = C4 mrcnn_feature_maps = [C4] else: raise NotImplementedError() if box_pred_method in ['regress_landmark', 'regress_segbox']: # get box and optionally landmarks with tf.name_scope('box_neck'): x = box_feature box_neck_conv_num = options['box_neck_conv_num'] for k in range(box_neck_conv_num): x = KL.Conv2D(320, (3, 3), strides=(1, 1), padding='same', name=f'box_conv{k}')(x) x = KL.BatchNormalization(name=f'box_convbn{k}')(x) x = KL.Activation('relu')(x) x = KL.Conv2D(1280, (1, 1), name='box_conv_last')(x) x = KL.BatchNormalization(name=f'box_convbn_last')(x) x = KL.GlobalAveragePooling2D()(x) x = KL.Dropout(dropout_rate)(x) box_feature = x print(f'box_feature.shape={box_feature.shape}') if box_pred_method == 'lbf_guided': molded_head_boxes = KL.Lambda( lambda xx: utils.extract_landmark68_boxes_graph( xx, head_label_names, molded_padding_dict), name='molded_head_boxes')(input_molded_lbf_landmark68_pts) elif box_pred_method == 'regress_landmark': x = box_feature x = KL.Dense(68 * 2, name='box_landmark_fc')(x) x = KL.Reshape((68, 2))(x) # landmark68 offsets pred_molded_landmark68_pts = KL.Lambda( lambda xx: xx + mean_molded_landmark68_pts, name='pred_molded_landmark68_pts')(x) molded_head_boxes = KL.Lambda( lambda xx: utils.extract_landmark68_boxes_graph( xx, head_label_names, molded_padding_dict), name='molded_head_boxes')(pred_molded_landmark68_pts) # compute landmark loss if mode == 'training': # Point loss def _l2_loss(pts1, pts2): # (batch, 68, 2) return tf.reduce_mean( tf.norm(pts1 - pts2, axis=-1), axis=-1) landmark68_loss = KL.Lambda(lambda xx: _l2_loss(xx[0], xx[1]))( [pred_molded_landmark68_pts, input_gt_molded_landmark68_pts]) landmark68_loss = KL.Lambda( lambda xx: tf.where( tf.reshape(xx[0] > 0, tf.shape(xx[1])), xx[1], tf.zeros_like(xx[1])), name='landmark68_loss')([ input_gt_molded_landmark68_pts_exist, landmark68_loss]) print('landmark68_loss.shape={}, {}'.format( landmark68_loss.shape, landmark68_loss._keras_shape)) elif box_pred_method == 'regress_segbox': x = box_feature x = KL.Dense(num_heads * 4, name='box_fc')(x) use_rpn_box_loss = options.get('use_rpn_box_loss', True) print(f'use_rpn_box_loss={use_rpn_box_loss}') if use_rpn_box_loss: pred_head_box_deforms = KL.Reshape( (num_heads, 4))(x) # box deforms pred_molded_head_boxes = KL.Lambda( _std_deform_to_box, name='pred_molded_head_boxes')( pred_head_box_deforms) head_box_padding_ratio = options['head_box_padding_ratio'] molded_head_boxes = KL.Lambda(lambda xx: tf.stop_gradient( xx + tf.constant([ - head_box_padding_ratio, - head_box_padding_ratio, head_box_padding_ratio, head_box_padding_ratio ], tf.float32)), name='molded_head_boxes')(pred_molded_head_boxes) # compute segbox loss if mode == 'training': # Box loss use_soft_l1_loss = options.get('use_soft_l1_loss', True) def _l1_loss(box_deform1, box_deform2): # (batch, num_heads, 4) if use_soft_l1_loss: return tf.reduce_mean( tf.sqrt(tf.square(box_deform1 - box_deform2) + K.epsilon()), axis=[1, 2]) else: return tf.reduce_mean(tf.abs(box_deform1 - box_deform2), axis=[1, 2]) box_loss = KL.Lambda(lambda xx: _l1_loss(xx[0], xx[1]))( [input_gt_head_box_deforms, pred_head_box_deforms]) box_loss = KL.Lambda( lambda xx: tf.where(tf.reshape( xx[0] > 0, tf.shape(xx[1])), xx[1], tf.zeros_like(xx[1])), name='box_loss')([ input_gt_molded_head_boxes_exist, box_loss]) print('box_loss.shape={}, {}'.format( box_loss.shape, box_loss._keras_shape)) else: pred_molded_head_boxes = KL.Reshape((num_heads, 4))(x) head_box_padding_ratio = options['head_box_padding_ratio'] molded_head_boxes = KL.Lambda(lambda xx: tf.stop_gradient( xx + tf.constant([ - head_box_padding_ratio, - head_box_padding_ratio, head_box_padding_ratio, head_box_padding_ratio ], tf.float32)), name='molded_head_boxes')(pred_molded_head_boxes) # compute segbox loss if mode == 'training': # Box loss use_soft_l1_loss = options.get('use_soft_l1_loss', True) def _l1_loss(box_deform1, box_deform2): # (batch, num_heads, 4) if use_soft_l1_loss: return tf.reduce_mean( tf.sqrt(tf.square(box_deform1 - box_deform2) + K.epsilon()), axis=[1, 2]) else: return tf.reduce_mean(tf.abs(box_deform1 - box_deform2), axis=[1, 2]) box_loss = KL.Lambda(lambda xx: _l1_loss(xx[0], xx[1]))( [input_gt_molded_head_boxes, pred_molded_head_boxes]) box_loss = KL.Lambda( lambda xx: tf.where(tf.reshape( xx[0] > 0, tf.shape(xx[1])), xx[1], tf.zeros_like(xx[1])), name='box_loss')([ input_gt_molded_head_boxes_exist, box_loss]) print('box_loss.shape={}, {}'.format( box_loss.shape, box_loss._keras_shape)) elif box_pred_method == 'gt_segbox': head_box_padding_ratio = options['head_box_padding_ratio'] molded_head_boxes = KL.Lambda(lambda xx: tf.stop_gradient( xx + tf.constant([ - head_box_padding_ratio, - head_box_padding_ratio, head_box_padding_ratio, head_box_padding_ratio ], tf.float32)), name='molded_head_boxes')(input_gt_molded_head_boxes) if 'fixed_head_box' in options: # replace certain molded_head_boxes with assigned ones fixed_head_box = options['fixed_head_box'] fixed_head_box_flags = np.zeros((num_heads), np.uint8) fixed_head_box_values = np.zeros((num_heads, 4), np.float32) for head_id, box in fixed_head_box.items(): fixed_head_box_flags[head_id] = 1 fixed_head_box_values[head_id, :] = np.array(box, np.float32) print(f'fixed_head_box_flags={fixed_head_box_flags}') print(f'fixed_head_box_values={fixed_head_box_values}') fixed_head_box_flags = tf.tile( tf.expand_dims(tf.expand_dims( tf.constant(fixed_head_box_flags), 0), -1), [tf.shape(molded_head_boxes)[0], 1, 4]) fixed_head_box_values = tf.tile( tf.expand_dims(tf.constant(fixed_head_box_values), 0), [tf.shape(molded_head_boxes)[0], 1, 1]) molded_head_boxes = KL.Lambda(lambda xx: tf.where( fixed_head_box_flags, fixed_head_box_values, xx))(molded_head_boxes) # visualize pts and boxes # with tf.name_scope('boxes_pts'): # def _show_boxes_pts(im, boxes, pts=None): # return visualize.tf_display_boxes_pts( # im, boxes, pts, utils.MEAN_PIXEL) # show_num = min(batch_size, 3) # if box_pred_method == 'regress_landmark': # label_pts = [('pred_molded_landmark68_pts', # pred_molded_landmark68_pts)] # if mode == 'training': # label_pts.append( # ('input_gt_molded_landmark68_pts', input_gt_molded_landmark68_pts)) # for label, pts in label_pts: # plot_ims = [] # for k in range(show_num): # im = tfplot.ops.plot(_show_boxes_pts, [ # input_molded_image[k, :, :, :], # molded_head_boxes[k, :, :], # pts[k, :, :]]) # plot_ims.append(im) # plot_ims = tf.stack(plot_ims, axis=0) # tf.summary.image( # name=label, tensor=plot_ims) # else: # plot_ims = [] # for k in range(show_num): # im = tfplot.ops.plot(_show_boxes_pts, [ # input_molded_image[k, :, :, :], # molded_head_boxes[k, :, :]]) # plot_ims.append(im) # plot_ims = tf.stack(plot_ims, axis=0) # tf.summary.image( # name='molded_head_boxes', tensor=plot_ims) # Construct Head Networks head_class_nums = [len(class_ids) for class_ids in heads] # ROI Pooling pool_size = options.get('pool_size', 56) deconv_num = options.get('deconv_num', 2) conv_num = options.get('conv_num', 1) molded_head_boxes = KL.Lambda(tf.stop_gradient)(molded_head_boxes) aligned = PyramidROIAlignAll( [pool_size, pool_size], name="roi_align_mask")( [molded_head_boxes] + mrcnn_feature_maps) # print(aligned._keras_shape) fg_masks = [None] * num_heads bg_masks = [None] * num_heads def _slice_lambda(index): return lambda xx: xx[:, index, :, :, :] head_mask_features = [None] * num_heads for i in range(num_heads): x = KL.Lambda(_slice_lambda(i))(aligned) for k in range(conv_num): x = KL.Conv2D( 256, (3, 3), padding="same", name=f"mrcnn_mask_conv{k+1}_{i}")(x) x = BatchNorm(axis=-1, name=f'mrcnn_mask_bn{k+1}_{i}')(x) x = KL.Activation('relu')(x) if dropout_rate > 0: x = KL.Dropout(dropout_rate)(x) if deconv_num == 1: # to be compatible with previous trained models x = KL.Conv2DTranspose( 256, (2, 2), strides=2, activation="relu", name="mrcnn_mask_deconv_%d" % i)(x) else: for k in range(deconv_num): x = KL.Conv2DTranspose( 256, (2, 2), strides=2, activation="relu", name="mrcnn_mask_deconv%d_%d" % (k + 1, i))(x) # [batch, h, w, 256] head_mask_features[i] = x mask_feature_size = pool_size * 2**deconv_num for i in range(num_heads): x = head_mask_features[i] num_classes_this_head = head_class_nums[i] assert num_classes_this_head > 0 x = KL.Conv2D( 1 + num_classes_this_head, (1, 1), strides=1, name='mrcnn_mask_conv_last_%d' % i, activation='linear')(x) x = KL.Lambda( lambda xx: tf.nn.softmax(xx, dim=-1), name="mrcnn_fullmask_%d" % i)(x) # [batch, height, width, num_classes] # [batch, num_classes, height, width] fg_masks[i] = KL.Lambda( lambda xx: tf.transpose(xx[:, :, :, 1:], [0, 3, 1, 2]), name='mrcnn_fg_mask_%d' % i)(x) # [batch, height, width] bg_masks[i] = KL.Lambda( lambda xx: xx[:, :, :, 0], name='mrcnn_bg_mask_%d' % i)(x) print(fg_masks[i]._keras_shape, fg_masks[i].shape, bg_masks[i]._keras_shape, bg_masks[i].shape) if len(fg_masks) > 1: mrcnn_fg_masks = KL.Lambda( lambda xx: tf.concat(xx, axis=1), name='mrcnn_fg_masks')(fg_masks) else: mrcnn_fg_masks = KL.Lambda( lambda xx: xx, name='mrcnn_fg_masks')(fg_masks[0]) if len(bg_masks) > 1: mrcnn_bg_masks = KL.Lambda( lambda xx: tf.stack(xx, axis=1), name='mrcnn_bg_masks')(bg_masks) else: mrcnn_bg_masks = KL.Lambda( lambda xx: tf.expand_dims(xx, axis=1), name='mrcnn_bg_masks')(bg_masks[0]) # [batch, num_masks+num_heads, height, width] mrcnn_masks = KL.Concatenate( axis=1, name='mrcnn_masks')([mrcnn_fg_masks, mrcnn_bg_masks]) print('mrcnn_masks.shape={}, {}'.format(mrcnn_masks.shape, mrcnn_masks._keras_shape)) def _tile_by_head_classes(data): tiled = [None] * num_masks for i, class_ids in enumerate(heads): for class_id in class_ids: tiled[class_id] = data[:, i] assert None not in tiled return tf.stack(tiled, axis=1) # Unmold masks back to image view def _unmold_mask(masks, boxes): # masks: (batch, num_masks, h, w) # boxes: (batch, num_heads, 4) mask_h, mask_w = tf.shape(masks)[2], tf.shape(masks)[3] # (batch, num_masks, 4) boxes = _tile_by_head_classes(boxes) masks = tf.reshape(masks, (-1, mask_h, mask_w)) boxes = tf.reshape(boxes, (-1, 4)) unmolded_masks = inverse_box_crop(masks, boxes, [h, w]) unmolded_masks = tf.reshape(unmolded_masks, (-1, num_masks, h, w)) return unmolded_masks output_masks = KL.Lambda( lambda xx: _unmold_mask(xx[0], xx[1]), name='output_masks')([mrcnn_fg_masks, molded_head_boxes]) print('output_masks.shape={}, {}'.format( output_masks.shape, output_masks._keras_shape)) # if options.get('full_view_mask_loss', False): if mode == "training": head_mask_shape = [mask_feature_size, mask_feature_size] print('head_mask_shape={}'.format(head_mask_shape)) # mask loss # extract target gt fg masks def _extract_gt_fg_batched(gt_masks, boxes): # gt_masks: [batch, num_masks, h, w] # boxes: [batch, num_heads, 4] # [batch * num_masks, h, w, 1] gt_masks = tf.reshape(gt_masks, [-1, h, w, 1]) # [batch, num_masks, 4] boxes = _tile_by_head_classes(boxes) # [batch * num_masks, 4] boxes = tf.reshape(boxes, [-1, 4]) # [batch * num_masks, mask_h, mask_w] target_masks = tf.image.crop_and_resize( gt_masks, boxes, tf.range(tf.shape(gt_masks)[0]), head_mask_shape) target_masks = tf.reshape(target_masks, [-1, num_masks] + head_mask_shape) return target_masks target_gt_fg_masks = KL.Lambda( lambda xx: _extract_gt_fg_batched(xx[0], xx[1]))( [molded_gt_masks, molded_head_boxes]) # extract target gt bg masks def _extract_gt_bg_batched(gt_masks, boxes): # gt_masks: [batch, num_masks, h, w] # boxes: [batch, num_heads, 4] gt_bg_masks = [None] * num_heads for i, class_ids in enumerate(heads): gt_masks_this_head = [None] * len(class_ids) for j, class_id in enumerate(class_ids): # each of [batch, h, w] gt_masks_this_head[j] = gt_masks[:, class_id, :, :] # [batch, len(class_ids), h, w] gt_masks_this_head = tf.stack(gt_masks_this_head, axis=1) # [batch, h, w] gt_bg_masks[i] = 1.0 - tf.reduce_max( gt_masks_this_head, axis=1) # [batch, num_heads, h, w] gt_bg_masks = tf.stack(gt_bg_masks, axis=1) # [batch * num_heads, h, w, 1] gt_bg_masks = tf.reshape(gt_bg_masks, [-1, h, w, 1]) # [batch * num_heads, 4] boxes = tf.reshape(boxes, [-1, 4]) # [batch * num_heads, mask_h, mask_w] target_masks = tf.image.crop_and_resize( gt_bg_masks, boxes, tf.range(tf.shape(gt_bg_masks)[0]), head_mask_shape, extrapolation_value=1) # !!! target_masks = tf.reshape(target_masks, [-1, num_heads] + head_mask_shape) return target_masks target_gt_bg_masks = KL.Lambda( lambda xx: _extract_gt_bg_batched(xx[0], xx[1]))( [molded_gt_masks, molded_head_boxes]) target_gt_masks = KL.Concatenate( axis=1, name='target_gt_masks')( [target_gt_fg_masks, target_gt_bg_masks]) print('target_gt_masks.shape={}, {}'.format( target_gt_masks.shape, target_gt_masks._keras_shape)) mask_loss_im = KL.Lambda( lambda xx: K.binary_crossentropy(target=xx[0], output=xx[1]), name="mask_ls_im")([target_gt_masks, mrcnn_masks]) print('mask_loss_im.shape: {} {}'.format(mask_loss_im._keras_shape, mask_loss_im.shape)) mask_loss_im_reduced = KL.Lambda( lambda xx: tf.reduce_mean(xx, axis=[2, 3]), name='mask_loss_im_reduced')(mask_loss_im) def _get_individual_losses(loss_im, name, index): return KL.Lambda( lambda xx: tf.reduce_mean(xx[:, index], axis=[1, 2]), name=name)(loss_im) # visualization with tf.name_scope('original_masks'): for i, class_ids in enumerate(heads): for j, class_id in enumerate(class_ids): name = head_label_names[i][j] fg_target_pred_original_view = tf.expand_dims(tf.concat([ tf.cast( input_gt_masks[:, class_id, :, :], tf.float32), output_masks[:, class_id, :, :]], axis=-1), axis=-1) tf.summary.image( f'fg_target_pred_original_view_{i}_{name}', fg_target_pred_original_view) with tf.name_scope('cropped_masks'): for i, class_ids in enumerate(heads): for j, class_id in enumerate(class_ids): name = head_label_names[i][j] fg_target_pred_loss = tf.expand_dims(tf.concat([ target_gt_fg_masks[:, class_id, :, :], mrcnn_fg_masks[:, class_id, :, :], mask_loss_im[:, class_id]], axis=-1), axis=-1) tf.summary.image( f'fg_target_pred_loss_{name}', fg_target_pred_loss) bg_target_pred_loss = tf.expand_dims(tf.concat([ target_gt_bg_masks[:, i, :, :], mrcnn_bg_masks[:, i, :, :], mask_loss_im[:, i + num_masks]], axis=-1), axis=-1) tf.summary.image( f'bg_target_pred_loss_{i}', bg_target_pred_loss) mask_loss = KL.Lambda( lambda xx: tf.reduce_mean(xx, axis=[1]))(mask_loss_im_reduced) mask_loss = KL.Lambda( lambda xx: tf.where(tf.reshape( xx[0] > 0, tf.shape(xx[1])), xx[1], tf.zeros_like(xx[1])), name='mask_loss')([input_gt_masks_exist, mask_loss]) print('mask_loss.shape={}, {}'.format(mask_loss.shape, mask_loss._keras_shape)) if box_pred_method == 'lbf_guided': inputs = [ input_molded_image_exist, input_gt_masks_exist, input_molded_lbf_landmark68_pts_exist, input_molded_image, input_gt_masks, input_molded_lbf_landmark68_pts ] outputs = [mask_loss] elif box_pred_method == 'regress_landmark': inputs = [ input_molded_image_exist, input_gt_masks_exist, input_gt_molded_landmark68_pts_exist, input_molded_image, input_gt_masks, input_gt_molded_landmark68_pts ] outputs = [mask_loss, landmark68_loss] elif box_pred_method == 'regress_segbox': inputs = [ input_molded_image_exist, input_gt_masks_exist, input_gt_molded_head_boxes_exist, input_molded_image, input_gt_masks, input_gt_molded_head_boxes, ] outputs = [mask_loss, box_loss] elif box_pred_method == 'gt_segbox': inputs = [ input_molded_image_exist, input_gt_masks_exist, input_gt_molded_head_boxes_exist, input_molded_image, input_gt_masks, input_gt_molded_head_boxes ] outputs = [mask_loss] else: if box_pred_method == 'lbf_guided': inputs = [ input_molded_image_exist, input_molded_lbf_landmark68_pts_exist, input_molded_image, input_molded_lbf_landmark68_pts ] outputs = [ output_masks, molded_head_boxes ] elif box_pred_method == 'regress_landmark': inputs = [ input_molded_image_exist, input_molded_image ] outputs = [ output_masks, molded_head_boxes, pred_molded_landmark68_pts ] elif box_pred_method == 'regress_segbox': inputs = [ input_molded_image_exist, input_molded_image ] outputs = [ output_masks, molded_head_boxes ] elif box_pred_method == 'gt_segbox': inputs = [ input_molded_image_exist, input_gt_molded_head_boxes_exist, input_molded_image, input_gt_molded_head_boxes ] outputs = [ output_masks, molded_head_boxes ] return [inputs, outputs]
def pointnet2_cls_ssg(num_class, num_points, num_dim=3): ''' input: BxNx3 output: Bxnum_class ''' input = keras.Input((num_points, num_dim)) # (batch, num_points, num_dim) inp = input if num_dim > 3: l0_xyz = crop(2, 0, 3)(input) l0_points = crop(2, 3, num_dim)(input) use_feature = True else: l0_xyz = input l0_points = input # useless # for the first stage, there is no high level feature, only coordinate use_feature = False l1_xyz, l1_points, _ = pointnet_sa_module(l0_xyz, l0_points, n_centroid=512, radius=0.2, n_samples=32, mlp=[64, 64, 128], bn=True, relu6=False, use_xyz=True, use_feature=use_feature, random_sample=False) l2_xyz, l2_points, _ = pointnet_sa_module(l1_xyz, l1_points, n_centroid=128, radius=0.4, n_samples=64, mlp=[128, 128, 256], bn=True, relu6=False, use_xyz=True, use_feature=True, random_sample=False) ''' l3_xyz, l3_points, _ = pointnet_sa_module(l2_xyz, l2_points, n_centroid=32, radius=0.6, n_samples=32, mlp=[256,512,1024], bn=True, relu6=False, use_xyz=True, use_feature=True) x = layers.GlobalMaxPooling1D()(l3_points) # at this stage, no sampling or grouping, use PointNet layer directly # as Keras don't support None as input or output # the original implementation doesn't work here ''' # try this instead x = l2_points x = layers.Reshape((-1, 1, 256))(x) x = mlp_layers(x, [256, 512, 1024]) x = layers.GlobalMaxPooling2D()(x) # fullly connected layers # x = layers.Flatten()(x) # (Batch, :) x = fully_connected(x, 512, bn=True, relu6=False, activation=True) x = layers.Dropout(0.5)(x) x = fully_connected(x, 256, bn=True, relu6=False, activation=True) x = layers.Dropout(0.5)(x) x = fully_connected(x, num_class, bn=False, activation=False) # no BN nor ReLU here x = layers.Softmax()(x) return keras.models.Model(inputs=inp, outputs=x)
def trainModel(digitSizeID=0, toRuleID=0, layerCount=1, trainingSize=1, hiddenSize=128, epochSize=100, modelID=0): allData = datasets[digitSizeID][toRuleID] DIGITS = toDigitSize[digitSizeID] TARGETSIZE = toTargetSize[digitSizeID][toRuleID] QUERYLEN = DIGITS + 1 + DIGITS RNN = layers.LSTM HIDDEN_SIZE = hiddenSize BATCH_SIZE = 128 DICT_SIZE = dictSizes[digitSizeID][toRuleID] print('Build model...') if modelID == 0: model = Sequential() model.add(RNN(HIDDEN_SIZE, input_shape=(QUERYLEN, DICT_SIZE))) model.add(layers.RepeatVector(TARGETSIZE)) for i in range(0, layerCount): model.add(RNN(HIDDEN_SIZE, return_sequences=True)) model.add(layers.TimeDistributed(layers.Dense(DICT_SIZE))) model.add(layers.Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() elif modelID == 1: model = Sequential() model.add( RNN(HIDDEN_SIZE, input_shape=(QUERYLEN, DICT_SIZE), return_sequences=True)) model.add(layers.Reshape((HIDDEN_SIZE, QUERYLEN))) model.add(layers.TimeDistributed(layers.Dense(TARGETSIZE))) model.add(layers.Reshape((TARGETSIZE, HIDDEN_SIZE))) for i in range(0, layerCount): model.add(RNN(HIDDEN_SIZE, return_sequences=True)) model.add(layers.TimeDistributed(layers.Dense(DICT_SIZE))) model.add(layers.Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() csvLog = [] finalLoss = 0 finalAccuracy = 0 finalValAccuracy = 0 configInfo = [ toDigitSize[digitSizeID], ALLRULESID[toRuleID], layerCount, trainingSize, hiddenSize, epochSize, modelID ] trainingDataSubset = allData["train"][:math.floor(trainingSize * trainingSize)] for i in range(0, epochSize): print('=' * 50) print('Iteration', i) history = model.fit(allData["train"][:, :QUERYLEN], allData["train"][:, QUERYLEN:], batch_size=BATCH_SIZE, epochs=1, validation_data=(allData["valid"][:, :QUERYLEN], allData["valid"][:, QUERYLEN:])) finalLoss, finalAccuracy, finalValAccuracy = history.history[ "loss"], history.history["acc"], history.history["val_acc"] csvLog.append([ *configInfo, history.history["loss"], history.history["acc"], history.history["val_acc"] ]) with open("trainingLog.csv", 'a', newline='', encoding='utf-8') as csvfile: toWriter = csv.writer(csvfile) for r in csvLog: toWriter.writerow(r) testCorrect = 0 finalTestAccuracy = 0 testQuery = allData["test"][:, :QUERYLEN] preds = model.predict_classes(testQuery, verbose=0) testTargets = allData["test"][:, QUERYLEN:] def backToString(classes): return "".join([allData["oneHotMap"][c] for c in classes]) for i in range(0, len(preds)): correct = backToString( [list(l).index(True) for l in list(testTargets[i])]) guess = backToString(list(preds[i])) if correct == guess: testCorrect += 1 if i < 5: query = backToString( [list(l).index(True) for l in list(testQuery[i])]) print("Q: ", query, "; Prediction: ", guess, "; Answer: ", correct, " (", correct == guess, ") ") finalTestAccuracy = testCorrect / len(preds) print("Final Test Accuracy is {}".format(finalTestAccuracy)) resultAry = [ *configInfo, finalLoss, finalAccuracy, finalValAccuracy, finalTestAccuracy ] with open("finalResults.csv", 'a', newline='', encoding='utf-8') as csvfile: toWriter = csv.writer(csvfile) toWriter.writerow(resultAry)
def reshapeEasy(inp, target_shape): from keras import layers inputR = layers.Reshape(target_shape=target_shape)(inp) return inputR
def single_ae( enc_size, input_shape, name='single_ae', prefix=None, ae_type='dense', # 'dense', or 'conv' conv_size=None, input_model=None, enc_lambda_layers=None, batch_norm=True, padding='same', activation=None, include_mu_shift_layer=False, do_vae=False): """single-layer Autoencoder (i.e. input - encoding - output""" # naming model_name = name if prefix is None: prefix = model_name if enc_lambda_layers is None: enc_lambda_layers = [] # prepare input input_name = '%s_input' % prefix if input_model is None: assert input_shape is not None, 'input_shape of input_model is necessary' input_tensor = KL.Input(shape=input_shape, name=input_name) last_tensor = input_tensor else: input_tensor = input_model.input last_tensor = input_model.output input_shape = last_tensor.shape.as_list()[1:] input_nb_feats = last_tensor.shape.as_list()[-1] # prepare conv type based on input if ae_type == 'conv': ndims = len(input_shape) - 1 convL = getattr(KL, 'Conv%dD' % ndims) assert conv_size is not None, 'with conv ae, need conv_size' conv_kwargs = {'padding': padding, 'activation': activation} # if want to go through a dense layer in the middle of the U, need to: # - flatten last layer if not flat # - do dense encoding and decoding # - unflatten (rehsape spatially) at end if ae_type == 'dense' and len(input_shape) > 1: name = '%s_ae_%s_down_flat' % (prefix, ae_type) last_tensor = KL.Flatten(name=name)(last_tensor) # recall this layer pre_enc_layer = last_tensor # encoding layer if ae_type == 'dense': assert len( enc_size) == 1, "enc_size should be of length 1 for dense layer" enc_size_str = ''.join(['%d_' % d for d in enc_size])[:-1] name = '%s_ae_mu_enc_dense_%s' % (prefix, enc_size_str) last_tensor = KL.Dense(enc_size[0], name=name)(pre_enc_layer) else: # convolution # convolve then resize. enc_size should be [nb_dim1, nb_dim2, ..., nb_feats] assert len(enc_size) == len(input_shape), \ "encoding size does not match input shape %d %d" % (len(enc_size), len(input_shape)) if list(enc_size)[:-1] != list(input_shape)[:-1] and \ all([f is not None for f in input_shape[:-1]]) and \ all([f is not None for f in enc_size[:-1]]): assert len( enc_size ) - 1 == 2, "Sorry, I have not yet implemented non-2D resizing -- need to check out interpn!" name = '%s_ae_mu_enc_conv' % (prefix) last_tensor = convL(enc_size[-1], conv_size, name=name, **conv_kwargs)(pre_enc_layer) name = '%s_ae_mu_enc' % (prefix) resize_fn = lambda x: tf.image.resize_bilinear(x, enc_size[:-1]) last_tensor = KL.Lambda(resize_fn, name=name)(last_tensor) elif enc_size[ -1] is None: # convolutional, but won't tell us bottleneck name = '%s_ae_mu_enc' % (prefix) last_tensor = KL.Lambda(lambda x: x, name=name)(pre_enc_layer) else: name = '%s_ae_mu_enc' % (prefix) last_tensor = convL(enc_size[-1], conv_size, name=name, **conv_kwargs)(pre_enc_layer) if include_mu_shift_layer: # shift name = '%s_ae_mu_shift' % (prefix) last_tensor = layers.LocalBiasLayer(name=name)(last_tensor) # encoding clean-up layers for layer_fcn in enc_lambda_layers: lambda_name = layer_fcn.__name__ name = '%s_ae_mu_%s' % (prefix, lambda_name) last_tensor = KL.Lambda(layer_fcn, name=name)(last_tensor) if batch_norm is not None: name = '%s_ae_mu_bn' % (prefix) last_tensor = KL.BatchNormalization(axis=batch_norm, name=name)(last_tensor) # have a simple layer that does nothing to have a clear name before sampling name = '%s_ae_mu' % (prefix) last_tensor = KL.Lambda(lambda x: x, name=name)(last_tensor) # if doing variational AE, will need the sigma layer as well. if do_vae: mu_tensor = last_tensor # encoding layer if ae_type == 'dense': name = '%s_ae_sigma_enc_dense_%s' % (prefix, enc_size_str) last_tensor = KL.Dense(enc_size[0], name=name)(pre_enc_layer) else: if list(enc_size)[:-1] != list(input_shape)[:-1] and \ all([f is not None for f in input_shape[:-1]]) and \ all([f is not None for f in enc_size[:-1]]): assert len( enc_size ) - 1 == 2, "Sorry, I have not yet implemented non-2D resizing..." name = '%s_ae_sigma_enc_conv' % (prefix) last_tensor = convL(enc_size[-1], conv_size, name=name, **conv_kwargs)(pre_enc_layer) name = '%s_ae_sigma_enc' % (prefix) resize_fn = lambda x: tf.image.resize_bilinear( x, enc_size[:-1]) last_tensor = KL.Lambda(resize_fn, name=name)(last_tensor) elif enc_size[ -1] is None: # convolutional, but won't tell us bottleneck name = '%s_ae_sigma_enc' % (prefix) last_tensor = convL(pre_enc_layer.shape.as_list()[-1], conv_size, name=name, **conv_kwargs)(pre_enc_layer) # cannot use lambda, then mu and sigma will be same layer. # last_tensor = KL.Lambda(lambda x: x, name=name)(pre_enc_layer) else: name = '%s_ae_sigma_enc' % (prefix) last_tensor = convL(enc_size[-1], conv_size, name=name, **conv_kwargs)(pre_enc_layer) # encoding clean-up layers for layer_fcn in enc_lambda_layers: lambda_name = layer_fcn.__name__ name = '%s_ae_sigma_%s' % (prefix, lambda_name) last_tensor = KL.Lambda(layer_fcn, name=name)(last_tensor) if batch_norm is not None: name = '%s_ae_sigma_bn' % (prefix) last_tensor = KL.BatchNormalization(axis=batch_norm, name=name)(last_tensor) # have a simple layer that does nothing to have a clear name before sampling name = '%s_ae_sigma' % (prefix) last_tensor = KL.Lambda(lambda x: x, name=name)(last_tensor) logvar_tensor = last_tensor # VAE sampling sampler = _VAESample().sample_z name = '%s_ae_sample' % (prefix) last_tensor = KL.Lambda(sampler, name=name)([mu_tensor, logvar_tensor]) if include_mu_shift_layer: # shift name = '%s_ae_sample_shift' % (prefix) last_tensor = layers.LocalBiasLayer(name=name)(last_tensor) # decoding layer if ae_type == 'dense': name = '%s_ae_%s_dec_flat_%s' % (prefix, ae_type, enc_size_str) last_tensor = KL.Dense(np.prod(input_shape), name=name)(last_tensor) # unflatten if dense method if len(input_shape) > 1: name = '%s_ae_%s_dec' % (prefix, ae_type) last_tensor = KL.Reshape(input_shape, name=name)(last_tensor) else: if list(enc_size)[:-1] != list(input_shape)[:-1] and \ all([f is not None for f in input_shape[:-1]]) and \ all([f is not None for f in enc_size[:-1]]): name = '%s_ae_mu_dec' % (prefix) resize_fn = lambda x: tf.image.resize_bilinear(x, input_shape[:-1]) last_tensor = KL.Lambda(resize_fn, name=name)(last_tensor) name = '%s_ae_%s_dec' % (prefix, ae_type) last_tensor = convL(input_nb_feats, conv_size, name=name, **conv_kwargs)(last_tensor) if batch_norm is not None: name = '%s_bn_ae_%s_dec' % (prefix, ae_type) last_tensor = KL.BatchNormalization(axis=batch_norm, name=name)(last_tensor) # create the model and retun model = Model(inputs=input_tensor, outputs=[last_tensor], name=model_name) return model
def u_net_model(init_shape, final_size, lr_rate, req_result): x_input = layers.Input(init_shape) # Currently you have a 720p set of images. Let's rescale x_rescale = tf.keras.layers.experimental.preprocessing.Rescaling( scale=1. / 255)(x_input) skip_5 = layers.Conv2D(filters=32, kernel_size=(1, 1), strides=1)(x_rescale) # ENCODING # For the Main Path, we have to go from 720p to 360. x = residual_block(x_rescale, filters=[32, 32, 32], f=3, s=2) skip_4 = x # From 360 we need to go again to 180 x = residual_block(x, filters=[32, 32, 32], f=3, s=2) skip_3 = x # From 180 to 90 x = residual_block(x, filters=[32, 32, 32], f=3, s=2) skip_2 = x # From 90 to 45 x = residual_block(x, filters=[32, 32, 32], f=3, s=2) skip_1 = x # From 45 to 9 x = residual_block(x, filters=[16, 16, 16], f=3, s=5) skip_0 = x # FLATTEN AND DENSE LAYERS x = layers.Flatten()(x) x = layers.Dense(64)(x) x = layers.Dense(36)(x) # DECODING # Currently the shape is a flat 36 x = layers.Reshape(target_shape=(9, 4, 1))(x) x = residual_block(x, filters=[64, 64, 64], f=1, s=1) x = layers.Reshape(target_shape=(9, 16, 16))(x) x = layers.Add()([x, skip_0]) x = layers.Activation('relu')(x) # Now from 9x16x16 we need to keep up-scaling using transposed convolution x = trans_conv_block(x, skip_1, s=5) # From 45x80 to 90x160 x = trans_conv_block(x, skip_2, s=2) # From 90x160 to 180x320 x = trans_conv_block(x, skip_3, s=2) # From 180x320 to 360x640 x = trans_conv_block(x, skip_4, s=2) # From 360x640 to 720x1280 x = trans_conv_block(x, skip_5, s=2) # GOING BEYOND RECONSTRUCTION x_out = get_output(x, req_result) if req_result > 3: x_out = tf.image.resize(x_out, size=final_size, preserve_aspect_ratio=True) x_ups = tf.image.resize(x_rescale, size=final_size, method=tf.image.ResizeMethod.BICUBIC, preserve_aspect_ratio=True) x_out = layers.Add(dtype='float32')([x_out, x_ups]) x_out = layers.Activation('relu', dtype='float32')(x_out) # Compile and view summary model = tf.keras.Model(inputs=x_input, outputs=x_out) model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_rate), loss=tf.keras.losses.MeanSquaredError()) model.summary() return model
def design_dnn(nb_features, input_shape, nb_levels, conv_size, nb_labels, feat_mult=1, pool_size=2, padding='same', activation='elu', final_layer='dense-sigmoid', conv_dropout=0, conv_maxnorm=0, nb_input_features=1, batch_norm=False, name=None, prefix=None, use_strided_convolution_maxpool=True, nb_conv_per_level=2): """ "deep" cnn with dense or global max pooling layer @ end... Could use sequential... """ model_name = name if model_name is None: model_name = 'model_1' if prefix is None: prefix = model_name ndims = len(input_shape) input_shape = tuple(input_shape) convL = getattr(KL, 'Conv%dD' % ndims) maxpool = KL.MaxPooling3D if len(input_shape) == 3 else KL.MaxPooling2D if isinstance(pool_size, int): pool_size = (pool_size, ) * ndims # kwargs for the convolution layer conv_kwargs = {'padding': padding, 'activation': activation} if conv_maxnorm > 0: conv_kwargs['kernel_constraint'] = maxnorm(conv_maxnorm) # initialize a dictionary enc_tensors = {} # first layer: input name = '%s_input' % prefix enc_tensors[name] = KL.Input(shape=input_shape + (nb_input_features, ), name=name) last_tensor = enc_tensors[name] # down arm: # add nb_levels of conv + ReLu + conv + ReLu. Pool after each of first nb_levels - 1 layers for level in range(nb_levels): for conv in range(nb_conv_per_level): if conv_dropout > 0: name = '%s_dropout_%d_%d' % (prefix, level, conv) enc_tensors[name] = KL.Dropout(conv_dropout)(last_tensor) last_tensor = enc_tensors[name] name = '%s_conv_%d_%d' % (prefix, level, conv) nb_lvl_feats = np.round(nb_features * feat_mult**level).astype(int) enc_tensors[name] = convL(nb_lvl_feats, conv_size, **conv_kwargs, name=name)(last_tensor) last_tensor = enc_tensors[name] # max pool if use_strided_convolution_maxpool: name = '%s_strided_conv_%d' % (prefix, level) enc_tensors[name] = convL(nb_lvl_feats, pool_size, **conv_kwargs, name=name)(last_tensor) last_tensor = enc_tensors[name] else: name = '%s_maxpool_%d' % (prefix, level) enc_tensors[name] = maxpool(pool_size=pool_size, name=name, padding=padding)(last_tensor) last_tensor = enc_tensors[name] # dense layer if final_layer == 'dense-sigmoid': name = "%s_flatten" % prefix enc_tensors[name] = KL.Flatten(name=name)(last_tensor) last_tensor = enc_tensors[name] name = '%s_dense' % prefix enc_tensors[name] = KL.Dense(1, name=name, activation="sigmoid")(last_tensor) elif final_layer == 'dense-tanh': name = "%s_flatten" % prefix enc_tensors[name] = KL.Flatten(name=name)(last_tensor) last_tensor = enc_tensors[name] name = '%s_dense' % prefix enc_tensors[name] = KL.Dense(1, name=name)(last_tensor) last_tensor = enc_tensors[name] # Omittting BatchNorm for now, it seems to have a cpu vs gpu problem # https://github.com/tensorflow/tensorflow/pull/8906 # https://github.com/fchollet/keras/issues/5802 # name = '%s_%s_bn' % prefix # enc_tensors[name] = KL.BatchNormalization(axis=batch_norm, name=name)(last_tensor) # last_tensor = enc_tensors[name] name = '%s_%s_tanh' % prefix enc_tensors[name] = KL.Activation(activation="tanh", name=name)(last_tensor) elif final_layer == 'dense-softmax': name = "%s_flatten" % prefix enc_tensors[name] = KL.Flatten(name=name)(last_tensor) last_tensor = enc_tensors[name] name = '%s_dense' % prefix enc_tensors[name] = KL.Dense(nb_labels, name=name, activation="softmax")(last_tensor) # global max pooling layer elif final_layer == 'myglobalmaxpooling': name = '%s_batch_norm' % prefix enc_tensors[name] = KL.BatchNormalization(axis=batch_norm, name=name)(last_tensor) last_tensor = enc_tensors[name] name = '%s_global_max_pool' % prefix enc_tensors[name] = KL.Lambda(_global_max_nd, name=name)(last_tensor) last_tensor = enc_tensors[name] name = '%s_global_max_pool_reshape' % prefix enc_tensors[name] = KL.Reshape((1, 1), name=name)(last_tensor) last_tensor = enc_tensors[name] # cannot do activation in lambda layer. Could code inside, but will do extra lyaer name = '%s_global_max_pool_sigmoid' % prefix enc_tensors[name] = KL.Conv1D(1, 1, name=name, activation="sigmoid", use_bias=True)(last_tensor) elif final_layer == 'globalmaxpooling': name = '%s_conv_to_featmaps' % prefix enc_tensors[name] = KL.Conv3D(2, 1, name=name, activation="relu")(last_tensor) last_tensor = enc_tensors[name] name = '%s_global_max_pool' % prefix enc_tensors[name] = KL.GlobalMaxPooling3D(name=name)(last_tensor) last_tensor = enc_tensors[name] # cannot do activation in lambda layer. Could code inside, but will do extra lyaer name = '%s_global_max_pool_softmax' % prefix enc_tensors[name] = KL.Activation('softmax', name=name)(last_tensor) last_tensor = enc_tensors[name] # create the model model = Model(inputs=[enc_tensors['%s_input' % prefix]], outputs=[last_tensor], name=model_name) return model
# naming the model model_name = name if prefix is None: prefix = model_name # first layer: input name = '%s_input' % prefix if input_model is None: input_tensor = KL.Input(shape=input_shape, name=name) last_tensor = input_tensor else: input_tensor = input_model.inputs last_tensor = input_model.outputs if isinstance(last_tensor, list): last_tensor = last_tensor[0] last_tensor = KL.Reshape(input_shape, name='predicted_output')(last_tensor) # get deformed labels n_labels = input_shape[-1] if validation_on_real_images: labels_gt = KL.Input(shape=input_shape[:-1]+[1], name='labels_input') input_tensor = [input_tensor[0], labels_gt] else: labels_gt = input_model.get_layer('labels_out').output # convert gt labels to 0...N-1 values n_labels = segmentation_label_list.shape[0] _, lut = utils.rearrange_label_list(segmentation_label_list) labels_gt = KL.Lambda(lambda x: tf.gather(tf.convert_to_tensor(lut, dtype='int32'), tf.cast(x, dtype='int32')), name='metric_convert_labels')(labels_gt)
activation='relu'))(input_OD) shape_before_Maxpool = K.int_shape(x) x = TimeDistributed( layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='same'))(x) x = TimeDistributed( layers.Conv2D(32, 6, padding='same', activation='relu', strides=(2, 2)))(x) x = TimeDistributed( layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='same'))(x) x = TimeDistributed(layers.Conv2D(4, 6, padding='same', activation='relu'))(x) x = TimeDistributed(layers.Flatten())(x) encoder_bef_reshape = layers.Dense(encoding_dim, activation='relu')(x) encoder_output = layers.Reshape( (encoding_dim, num_cell, 1), input_shape=(num_cell, encoding_dim))(encoder_bef_reshape) # input time information time_input = layers.Input(shape=(1, num_cell, 1), name='time_input') concat = layers.concatenate([encoder_output, time_input], axis=1) output_size = encoding_dim # predicting part X = layers.Conv2D(64, 2, padding='same', activation='relu')(concat) X = layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')(X) X = layers.Conv2D(32, 2, padding='same', activation='relu')(X) X = layers.MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')(X)
x = layers.Dense(32, activation='relu')(x) z_mean = layers.Dense(latent_dim, name='z_mean')(x) z_log_var = layers.Dense(latent_dim, name='z_log_var')(x) def sampling(args): z_mean, z_log_var = args epsilon = K.random_normal(shape=(K.shape(z_mean)[0], latent_dim), mean=0., stddev=1.) return z_mean + K.exp(z_log_var) * epsilon z = layers.Lambda(sampling)([z_mean, z_log_var]) decoder_input = layers.Input(K.int_shape(z)[1:]) x = layers.Dense(np.prod(shape_before_flattening[1:]), activation='relu')(decoder_input) x = layers.Reshape(shape_before_flattening[1:])(x) x = layers.Conv2DTranspose(32, 3, padding='same', activation='relu', strides=(2, 2))(x) x = layers.Conv2D(1, 3, padding='same', activation='sigmoid')(x) decoder = Model(decoder_input, x) z_decoded = decoder(z) class CustomVariationalLayer(keras.layers.Layer): def vae_loss(self, x, z_decoded): x = K.flatten(x) z_decoded = K.flatten(z_decoded) xent_loss = keras.metrics.binary_crossentropy(x, z_decoded) kl_loss = -5e-4 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1) return K.mean(xent_loss + kl_loss) def call(self, inputs):
def CapsNet(input_shape, n_class, routings): """ Defining the CapsNet :param input_shape: data shape, 3d, [width, height, channels] :param n_class: number of classes :param routings: number of routing iterations :return: Two Keras Models, the first one used for training, and the second one for evaluation. """ x = layers.Input(shape=input_shape) conv1 = layers.Conv2D(filters=64, kernel_size=3, strides=1, padding='valid', activation='relu', name='conv1')(x) conv2 = layers.Conv2D(filters=128, kernel_size=3, strides=1, padding='valid', activation='relu', name='conv2')(conv1) conv3 = layers.Conv2D(filters=256, kernel_size=3, strides=2, padding='valid', activation='relu', name='conv3')(conv2) primarycaps = PrimaryCap(conv3, dim_capsule=8, n_channels=32, kernel_size=9, strides=2, padding='valid') digitcaps = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings, channels=32, name='digitcaps')(primarycaps) out_caps = Length(name='capsnet')(digitcaps) """ Decoder Network """ y = layers.Input(shape=(n_class, )) masked_by_y = Mask()([digitcaps, y]) masked = Mask()(digitcaps) decoder = models.Sequential(name='decoder') decoder.add( Dense(input_dim=16 * n_class, activation="relu", output_dim=7 * 7 * 32)) decoder.add(Reshape((7, 7, 32))) decoder.add(BatchNormalization(momentum=0.8)) decoder.add( layers.Deconvolution2D(32, 3, 3, subsample=(1, 1), border_mode='same', activation="relu")) decoder.add( layers.Deconvolution2D(16, 3, 3, subsample=(2, 2), border_mode='same', activation="relu")) decoder.add( layers.Deconvolution2D(8, 3, 3, subsample=(2, 2), border_mode='same', activation="relu")) decoder.add( layers.Deconvolution2D(4, 3, 3, subsample=(1, 1), border_mode='same', activation="relu")) decoder.add( layers.Deconvolution2D(1, 3, 3, subsample=(1, 1), border_mode='same', activation="sigmoid")) decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon')) """ Models for training and evaluation (prediction) """ train_model = models.Model([x, y], [out_caps, decoder(masked_by_y)]) eval_model = models.Model(x, [out_caps, decoder(masked)]) return train_model, eval_model
generatorInput = keras.Input(shape=(latentDim, )) x = generatorInput x = layers.Dropout(0.2)(x) x = layers.Dense(width * height)(x) x = layers.LeakyReLU()(x) x = layers.Dropout(0.2)(x) x = layers.Dense(width * height)(x) x = layers.LeakyReLU()(x) x = layers.Reshape((height, width))(x) x = layers.LSTM(width, return_sequences=True)(x) x = layers.LeakyReLU()(x) x = layers.LSTM(width, return_sequences=True)(x) x = layers.LeakyReLU()(x) x = layers.LSTM(width, return_sequences=True)(x) x = layers.LeakyReLU()(x) x = layers.LSTM(width, return_sequences=True)(x) x = layers.LeakyReLU()(x) x = layers.Reshape((200, 1))(x)
import os from keras.datasets import mnist from keras.preprocessing import image # region GAN generator network latent_dim = 32 height = 28 width = 28 channels = 1 channel_feature_map = 18 generator_input = keras.Input(shape=(latent_dim, )) x = layers.Dense(channel_feature_map * (width // 2) * (height // 2))(generator_input) x = layers.LeakyReLU()(x) x = layers.Reshape((width // 2, height // 2, channel_feature_map))(x) x = layers.Conv2D(channel_feature_map * 2, 5, padding='same')(x) x = layers.LeakyReLU()(x) # upsample to width * height x = layers.Conv2DTranspose(channel_feature_map * 2, 4, strides=2, padding='same')(x) x = layers.LeakyReLU()(x) x = layers.Conv2D(channel_feature_map * 2, 5, padding='same')(x) x = layers.LeakyReLU()(x) x = layers.Conv2D(channel_feature_map * 2, 5, padding='same')(x) x = layers.LeakyReLU()(x)
def model_ContextWeighted(p, embedding_matrix, max_sent_len, n_out): print("Parameters:", p) # Take sentence encoded as indices and convert it to embeddings sentence_input = layers.Input(shape=(max_sent_len,), dtype='int32', name='sentence_input') # Repeat the input N times for each edge x = layers.RepeatVector(MAX_EDGES_PER_GRAPH)(sentence_input) word_embeddings = layers.wrappers.TimeDistributed(layers.Embedding(output_dim=embedding_matrix.shape[1], input_dim=embedding_matrix.shape[0], input_length=max_sent_len, weights=[embedding_matrix], mask_zero=True, trainable=False))(x) word_embeddings = layers.Dropout(p['dropout1'])(word_embeddings) # Take token markers that identify entity positions, convert to position embeddings entity_markers = layers.Input(shape=(MAX_EDGES_PER_GRAPH, max_sent_len,), dtype='int8', name='entity_markers') pos_embeddings = layers.wrappers.TimeDistributed(layers.Embedding(output_dim=p['position_emb'], input_dim=POSITION_VOCAB_SIZE, input_length=max_sent_len, mask_zero=True, embeddings_regularizer = regularizers.l2(), trainable=True))(entity_markers) # Merge word and position embeddings and apply the specified amount of RNN layers x = layers.concatenate([word_embeddings, pos_embeddings]) for i in range(p["rnn1_layers"]-1): lstm_layer = layers.LSTM(p['units1'], return_sequences=True) if p['bidirectional']: lstm_layer = layers.Bidirectional(lstm_layer) x = layers.wrappers.TimeDistributed(lstm_layer)(x) lstm_layer = layers.LSTM(p['units1'], return_sequences=False) if p['bidirectional']: lstm_layer = layers.Bidirectional(lstm_layer) sentence_matrix = layers.wrappers.TimeDistributed(lstm_layer)(x) ### Attention over ghosts ### layers_to_concat = [] num_units = p['units1'] * (2 if p['bidirectional'] else 1) for i in range(MAX_EDGES_PER_GRAPH): # Compute a memory vector for the target entity pair sentence_vector = layers.Lambda(lambda l: l[:, i], output_shape=(num_units,))(sentence_matrix) target_sentence_memory = layers.Dense(num_units, activation="linear", use_bias=False)(sentence_vector) if i == 0: context_vectors = layers.Lambda(lambda l: l[:, i+1:], output_shape=(MAX_EDGES_PER_GRAPH-1, num_units))(sentence_matrix) elif i == MAX_EDGES_PER_GRAPH - 1: context_vectors = layers.Lambda(lambda l: l[:, :i], output_shape=(MAX_EDGES_PER_GRAPH-1, num_units))(sentence_matrix) else: context_vectors = layers.Lambda(lambda l: K.concatenate([l[:, :i], l[:, i+1:]], axis=1), output_shape=(MAX_EDGES_PER_GRAPH-1, num_units))(sentence_matrix) # Compute the score between each memory and the memory of the target entity pair sentence_scores = layers.Lambda(lambda inputs: K.batch_dot(inputs[0], inputs[1], axes=(1, 2)), output_shape=(MAX_EDGES_PER_GRAPH,))([target_sentence_memory, context_vectors]) sentence_scores = layers.Activation('softmax')(sentence_scores) # Compute the final vector by taking the weighted sum of context vectors and the target entity vector context_vector = layers.Lambda(lambda inputs: K.batch_dot(inputs[0], inputs[1], axes=(1, 1)), output_shape=(num_units,))([context_vectors, sentence_scores]) edge_vector = layers.concatenate([sentence_vector, context_vector]) edge_vector = layers.Reshape((1, num_units * 2))(edge_vector) layers_to_concat.append(edge_vector) edge_vectors = layers.concatenate(layers_to_concat, axis=1) # Apply softmax edge_vectors = layers.Dropout(p['dropout1'])(edge_vectors) main_output = layers.wrappers.TimeDistributed(layers.Dense(n_out, activation="softmax", name='main_output'))(edge_vectors) model = models.Model(inputs=[sentence_input, entity_markers], outputs=[main_output]) optimizer = optimizers.Adam(lr=0.001) model.compile(optimizer=optimizer, loss=masked_categorical_crossentropy, metrics=['accuracy']) return model
def validate_rnn_self_text_self_cross(rnn_speech, rnn_text, train_y, hidden_lstm_speech, hidden_con, hidden_lstm_text, hidden_dim, cw, val_sp, bat_size, filename): ##### Speech BiLSTM-SA speech_input = Input(shape=(len(rnn_speech[0]), len(rnn_speech[0][0])), dtype='float32') speech_layer = Bidirectional( LSTM(hidden_lstm_speech, return_sequences=True))(speech_input) speech_att = Dense(hidden_con, activation='tanh')(speech_layer) speech_att_source = np.zeros((len(rnn_speech), hidden_con)) speech_att_input = Input(shape=(hidden_con, ), dtype='float32') speech_att_vec = Dense(hidden_con, activation='relu')(speech_att_input) speech_att_vec = Lambda(lambda x: K.batch_dot(*x, axes=(1, 2)))( [speech_att_vec, speech_att]) ##### Text BiLSTM-SA text_input = Input(shape=(len(rnn_text[0]), len(rnn_text[0][0])), dtype='float32') text_layer = Bidirectional(LSTM(hidden_lstm_text, return_sequences=True))(text_input) text_att = Dense(hidden_con, activation='tanh')(text_layer) text_att_source = np.zeros((len(rnn_text), hidden_con)) text_att_input = Input(shape=(hidden_con, ), dtype='float32') text_att_vec = Dense(hidden_con, activation='relu')(text_att_input) text_att_vec = Lambda(lambda x: K.batch_dot(*x, axes=(1, 2)))( [text_att_vec, text_att]) ##### speech_att_vec = Dense(hidden_con, activation='softmax')(speech_att_vec) text_att_vec = Dense(hidden_con, activation='softmax')(text_att_vec) #att_vec = layers.concatenate([speech_att_vec, text_att_vec]) cross_speech_att_vec = Dense(len(rnn_speech[0]), activation='softmax')(text_att_vec) cross_text_att_vec = Dense(len(rnn_text[0]), activation='softmax')(speech_att_vec) ##### cross_speech_att_vec = layers.Reshape( (len(rnn_speech[0]), 1))(cross_speech_att_vec) speech_output = layers.multiply([cross_speech_att_vec, speech_layer]) speech_output = Lambda(lambda x: K.sum(x, axis=1))(speech_output) speech_output = Dense(hidden_dim, activation='relu')(speech_output) ##### cross_text_att_vec = layers.Reshape( (len(rnn_text[0]), 1))(cross_text_att_vec) text_output = layers.multiply([cross_text_att_vec, text_layer]) text_output = Lambda(lambda x: K.sum(x, axis=1))(text_output) text_output = Dense(hidden_dim, activation='relu')(text_output) ##### Total output output = layers.concatenate([speech_output, text_output]) output = Dense(hidden_dim, activation='relu')(output) output = Dropout(0.3)(output) output = Dense(hidden_dim, activation='relu')(output) output = Dropout(0.3)(output) main_output = Dense(int(max(train_y) + 1), activation='softmax')(output) model = Sequential() ##### model = Model( inputs=[speech_input, speech_att_input, text_input, text_att_input], outputs=[main_output]) model.compile(optimizer=adam_half, loss="sparse_categorical_crossentropy", metrics=["accuracy"]) filepath = filename + "-{epoch:02d}-{val_acc:.4f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, mode='max') ##### callbacks_list = [metricsf1macro_4input, checkpoint] model.summary() ##### model.fit([rnn_speech, speech_att_source, rnn_text, text_att_source], train_y, validation_split=val_sp, epochs=50, batch_size=bat_size, callbacks=callbacks_list, class_weight=cw)
def __init__(self, input_tensor, encoder, is_training, reuse): net = input_tensor with tf.variable_scope('Decoder'): # Layer 1 Up: Deconvolutional capsules, skip connection, convolutional capsules net = capsule_layers.DeconvCapsuleLayer(kernel_size=4, num_capsule=8, num_atoms=16, upsamp_type='deconv', scaling=2, padding='same', routings=3, name='deconv_cap_1')(net) self.upcap_1 = net net = layers.Concatenate(axis=-2, name='skip_1')([net, encoder.conv_cap_3]) # Layer 2 Up: Deconvolutional capsules, skip connection, convolutional capsules net = capsule_layers.DeconvCapsuleLayer(kernel_size=4, num_capsule=4, num_atoms=8, upsamp_type='deconv', scaling=2, padding='same', routings=3, name='deconv_cap_2')(net) self.upcap_2 = net net = layers.Concatenate(axis=-2, name='skip_2')([net, encoder.conv_cap_2]) # Layer 3 Up: Deconvolutional capsules, skip connection net = capsule_layers.DeconvCapsuleLayer(kernel_size=4, num_capsule=2, num_atoms=8, upsamp_type='deconv', scaling=2, padding='same', routings=3, name='deconv_cap_3')(net) self.upcap_3 = net net = layers.Concatenate( axis=-2, name='skip_3')([net, encoder.primary_caps]) # Layer 4 Up: Deconvolutional capsules, skip connection net = capsule_layers.DeconvCapsuleLayer(kernel_size=4, num_capsule=1, num_atoms=16, upsamp_type='deconv', scaling=2, padding='same', routings=3, name='deconv_cap_4')(net) self.upcap_4 = net # Reconstruction - Reshape, skip connection + 3x conventional Conv2D layers _, H, W, C, D = net.get_shape() net = layers.Reshape((H.value, W.value, D.value))(net) net = layers.Concatenate(axis=-1, name='skip_4')([net, encoder.conv1]) net = layers.Conv2D(filters=64, kernel_size=1, padding='same', kernel_initializer='he_normal', activation='relu', name='recon_1')(net) net = layers.Conv2D(filters=128, kernel_size=1, padding='same', kernel_initializer='he_normal', activation='relu', name='recon_2')(net) if tf.rank(encoder.input_tensor) == 3: self.out_depth = 1 else: self.out_depth = encoder.input_tensor.shape[3].value net = layers.Conv2D(filters=self.out_depth, kernel_size=1, padding='same', kernel_initializer='he_normal', activation='sigmoid', name='out_recon')(net) self.output = net
def CapsNetR3(input_shape, n_class=2): x = layers.Input(shape=input_shape) # Layer 1: Just a conventional Conv2D layer conv1 = layers.Conv2D(filters=16, kernel_size=5, strides=1, padding='same', activation='relu', name='conv1')(x) # Reshape layer to be 1 capsule x [filters] atoms _, H, W, C = conv1.get_shape() conv1_reshaped = layers.Reshape((H.value, W.value, 1, C.value))(conv1) # conv1_reshaped = layers.Reshape((H, W, 1, C))(conv1) # Layer 1: Primary Capsule: Conv cap with routing 1 primary_caps = ConvCapsuleLayer(kernel_size=5, num_capsule=2, num_atoms=16, strides=2, padding='same', routings=1, name='primarycaps')(conv1_reshaped) # Layer 2: Convolutional Capsule conv_cap_2_1 = ConvCapsuleLayer(kernel_size=5, num_capsule=4, num_atoms=16, strides=1, padding='same', routings=3, name='conv_cap_2_1')(primary_caps) # Layer 2: Convolutional Capsule conv_cap_2_2 = ConvCapsuleLayer(kernel_size=5, num_capsule=4, num_atoms=32, strides=2, padding='same', routings=3, name='conv_cap_2_2')(conv_cap_2_1) # Layer 3: Convolutional Capsule conv_cap_3_1 = ConvCapsuleLayer(kernel_size=5, num_capsule=8, num_atoms=32, strides=1, padding='same', routings=3, name='conv_cap_3_1')(conv_cap_2_2) # Layer 3: Convolutional Capsule conv_cap_3_2 = ConvCapsuleLayer(kernel_size=5, num_capsule=8, num_atoms=64, strides=2, padding='same', routings=3, name='conv_cap_3_2')(conv_cap_3_1) # Layer 4: Convolutional Capsule conv_cap_4_1 = ConvCapsuleLayer(kernel_size=5, num_capsule=8, num_atoms=32, strides=1, padding='same', routings=3, name='conv_cap_4_1')(conv_cap_3_2) # Layer 1 Up: Deconvolutional Capsule deconv_cap_1_1 = DeconvCapsuleLayer(kernel_size=4, num_capsule=8, num_atoms=32, upsamp_type='deconv', scaling=2, padding='same', routings=3, name='deconv_cap_1_1')(conv_cap_4_1) # Skip connection up_1 = layers.Concatenate(axis=-2, name='up_1')([deconv_cap_1_1, conv_cap_3_1]) # Layer 1 Up: Deconvolutional Capsule deconv_cap_1_2 = ConvCapsuleLayer(kernel_size=5, num_capsule=4, num_atoms=32, strides=1, padding='same', routings=3, name='deconv_cap_1_2')(up_1) # Layer 2 Up: Deconvolutional Capsule deconv_cap_2_1 = DeconvCapsuleLayer(kernel_size=4, num_capsule=4, num_atoms=16, upsamp_type='deconv', scaling=2, padding='same', routings=3, name='deconv_cap_2_1')(deconv_cap_1_2) # Skip connection up_2 = layers.Concatenate(axis=-2, name='up_2')([deconv_cap_2_1, conv_cap_2_1]) # Layer 2 Up: Deconvolutional Capsule deconv_cap_2_2 = ConvCapsuleLayer(kernel_size=5, num_capsule=4, num_atoms=16, strides=1, padding='same', routings=3, name='deconv_cap_2_2')(up_2) # Layer 3 Up: Deconvolutional Capsule deconv_cap_3_1 = DeconvCapsuleLayer(kernel_size=4, num_capsule=2, num_atoms=16, upsamp_type='deconv', scaling=2, padding='same', routings=3, name='deconv_cap_3_1')(deconv_cap_2_2) # Skip connection up_3 = layers.Concatenate(axis=-2, name='up_3')([deconv_cap_3_1, conv1_reshaped]) # Layer 4: Convolutional Capsule: 1x1 reshape = ConvCapsuleLayer(kernel_size=1, num_capsule=n_class, num_atoms=16, strides=1, padding='same', routings=3, name='seg_caps')(up_3) # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape. # out_seg16 = Length(num_classes=n_class, seg=True, name='out_seg')(reshape) # out_seg = K.permute_dimensions(seg_caps, (0, 1, 2, 4, 3)) # out_seg = K.squeeze(out_seg, axis=4) # out_seg = layers.Conv2D(filters=n_class, kernel_size=1, padding='same', activation='softmax')(out_seg16) out_seg = caps_length(reshape, axis=2) # Decoder network. _, H, W, C = out_seg.get_shape() y = layers.Input(shape=input_shape[:-1] + (6, )) masked_by_y = Mask()( [out_seg, y] ) # The true label is used to mask the output of capsule layer. For training masked = Mask()( out_seg) # Mask using the capsule with maximal length. For prediction def shared_decoder(mask_layer): recon_remove_dim = layers.Reshape( (H.value, W.value, C.value))(mask_layer) recon_1 = layers.Conv2D(filters=64, kernel_size=1, padding='same', kernel_initializer='he_normal', activation='relu', name='recon_1')(recon_remove_dim) recon_2 = layers.Conv2D(filters=128, kernel_size=1, padding='same', kernel_initializer='he_normal', activation='relu', name='recon_2')(recon_1) out_recon = layers.Conv2D(filters=1, kernel_size=1, padding='same', kernel_initializer='he_normal', activation='sigmoid', name='out_recon')(recon_2) return out_recon # Models for training and evaluation (prediction) train_model = models.Model(inputs=[x, y], outputs=[out_seg, shared_decoder(masked_by_y)]) # train_model = models.Model(inputs=x, outputs=out_seg) eval_model = models.Model(inputs=x, outputs=[out_seg, shared_decoder(masked)]) # manipulate model noise = layers.Input(shape=((H.value, W.value, C.value))) noised_seg_caps = layers.Add()([out_seg, noise]) masked_noised_y = Mask()([noised_seg_caps, y]) manipulate_model = models.Model(inputs=[x, y, noise], outputs=shared_decoder(masked_noised_y)) return train_model, eval_model, manipulate_model