config.encoding_dim = 32 config.epochs = 1 (x_train, _), (x_test, _) = mnist.load_data() (x_train_noisy, x_test_noisy) = add_noise(x_train, x_test) x_train = x_train.astype('float32') / 255. x_test = x_test.astype('float32') / 255. model = Sequential() model.add(Flatten(input_shape=(28,28))) model.add(Dense(config.encoding_dim, activation='relu')) model.add(Dense(784, activation='sigmoid')) model.add(Reshape((28,28))) model.compile(optimizer='adam', loss='mse') model.fit(x_train_noisy, x_train, epochs=config.epochs, validation_data=(x_test_noisy, x_test), callbacks=[WandbKerasCallback()]) model.save("auto-denoise.h5")
def bidaff(num_words, embeddings_matrix, ce_loader, scope, embedding_dim=64): # (batch, input_len) => (batch, input_len, embedding_dim) q_input = Input(shape=(QUESTION_LEN,), name="q_input") a_input = Input(shape=(ANSWER_LEN,), name="a_input") c_input = Input(shape=(CONTEXT_LEN,), name="c_input") q_char_input = Input(shape=(QUESTION_LEN * MAX_WORD_LEN,), name="q_char_input") a_char_input = Input(shape=(ANSWER_LEN * MAX_WORD_LEN,), name="a_char_input") c_char_input = Input(shape=(CONTEXT_LEN * MAX_WORD_LEN,), name="c_char_input") # Word embedders. q_emb = Embedding(input_dim=num_words + 1, # word 0 used for padding output_dim=embedding_dim, weights=[embeddings_matrix], input_length=QUESTION_LEN, name="embedding_q2_" + scope, mask_zero=False, trainable=False) a_emb = Embedding(input_dim=num_words + 1, # word 0 used for padding output_dim=embedding_dim, weights=[embeddings_matrix], input_length=ANSWER_LEN, name="embedding_a2_" + scope, mask_zero=False, trainable=False) c_emb = Embedding(input_dim=num_words + 1, # word 0 used for padding output_dim=embedding_dim, weights=[embeddings_matrix], input_length=CONTEXT_LEN, name="embedding_c2_" + scope, mask_zero=False, trainable=False) # Char embedders. q_char_emb = Embedding(input_dim=ce_loader.get_num_words() + 1, output_dim=ce_loader.get_embedding_len(), weights=[ce_loader.get_embeddings_matrix()], input_length=MAX_WORD_LEN * QUESTION_LEN, name="char_embedding_q2_" + scope, mask_zero=False, trainable=False) a_char_emb = Embedding(input_dim=ce_loader.get_num_words() + 1, output_dim=ce_loader.get_embedding_len(), weights=[ce_loader.get_embeddings_matrix()], input_length=MAX_WORD_LEN * ANSWER_LEN, name="char_embedding_a2_" + scope, mask_zero=False, trainable=False) c_char_emb = Embedding(input_dim=ce_loader.get_num_words() + 1, output_dim=ce_loader.get_embedding_len(), weights=[ce_loader.get_embeddings_matrix()], input_length=MAX_WORD_LEN * CONTEXT_LEN, name="char_embedding_c2_" + scope, mask_zero=False, trainable=False) q = q_emb(q_input) a = a_emb(a_input) c = c_emb(c_input) q_char = q_char_emb(q_char_input) a_char = a_char_emb(a_char_input) c_char = c_char_emb(c_char_input) assert(CHAR_EMBEDDINGS_DIM == ce_loader.get_embedding_len()) q_char = Reshape((QUESTION_LEN, MAX_WORD_LEN, CHAR_EMBEDDINGS_DIM))(q_char) a_char = Reshape((ANSWER_LEN, MAX_WORD_LEN, CHAR_EMBEDDINGS_DIM))(a_char) c_char = Reshape((CONTEXT_LEN, MAX_WORD_LEN, CHAR_EMBEDDINGS_DIM))(c_char) # CharCNNs for char level embeddings. q_char = CharCNN(q_char, name="q_charcnn") a_char = CharCNN(a_char, name="a_charcnn") c_char = CharCNN(c_char, name="c_charcnn") # Concatenate GloVe word embeddings with char-level embeddings. q = Concatenate(axis=-1)([q, q_char]) a = Concatenate(axis=-1)([a, a_char]) c = Concatenate(axis=-1)([c, c_char]) q = Dropout(0.2)(q) c = Dropout(0.2)(c) a = Dropout(0.2)(a) # Pass them through a 2 layer highway network. for highway_index in range(1, 2): q = TimeDistributedHighway(q, 92, "highway_q_bidaff_{}".format(highway_index)) a = TimeDistributedHighway(a, 92, "highway_a_bidaff_{}".format(highway_index)) c = TimeDistributedHighway(c, 92, "highway_c_bidaff_{}".format(highway_index)) # Contextual Embed Layer q_lstm = Bidirectional(LSTM(30, recurrent_dropout=0.15, return_sequences=True))(q) c_lstm = Bidirectional(LSTM(30, recurrent_dropout=0.15, return_sequences=True))(c) sim = Similarity()([c_lstm, q_lstm]) # ************* Context-to-query attention. *********************** # Softmax on each line. col_softmax = Lambda(lambda x: K.softmax(x, axis=-1))(sim) # Product between sofmax prob and each query vector. UT = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(2, 1)), output_shape=lambda x: x[0][:2] + x[1][2:])( [col_softmax, q_lstm]) # ************* Query-to-context attention. *********************** # Max per line then softmax. line_softmax = Lambda(lambda x: K.max(x, axis=-1), output_shape=lambda x: (x[0], x[1]))(sim) line_softmax = Lambda(lambda x: K.softmax(x, axis=-1))(line_softmax) # Make @line_softmax a matrix with 1 row. line_softmax = Lambda(lambda x: K.expand_dims(x, axis=1), output_shape=lambda x: (x[0], 1, x[1]))(line_softmax) # Matrix multiplication. HT = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=(2, 1)), output_shape=lambda x: x[0][:2] + x[1][2:])( [line_softmax, c_lstm]) # Remove one extra row. HT = Lambda(lambda x: K.squeeze(x, axis=1), output_shape=lambda x: (x[0], x[2]))(HT) HT = RepeatVector(CONTEXT_LEN)(HT) # ************ Combine attention vectors. *********************** G = Concatenate(axis=-1)([ c_lstm, UT, Multiply()([c_lstm, UT]), Multiply()([c_lstm, HT]) ]) a_lstm = Bidirectional(LSTM(20, recurrent_dropout=0.15))(a) a_lstm = RepeatVector(CONTEXT_LEN)(a_lstm) cqa = Concatenate(axis=-1)([G, a_lstm]) cqa = Dropout(0.2)(cqa) cqa = Bidirectional(LSTM(30, recurrent_dropout=0.15))(cqa) cqa = Dropout(0.25)(cqa) cqa = Dense(100, activation='relu')(cqa) cqa = Dropout(0.25)(cqa) output = Dense(2, activation='softmax')(cqa) model = Model(inputs=[ q_input, a_input, c_input, q_char_input, a_char_input, c_char_input, ], outputs=[output]) model.compile(loss=categorical_crossentropy, optimizer='adam', metrics=['accuracy']) plot_model(model, to_file='2way_model.png', show_shapes=True) return model
def Net(n_label,img_input, drop_rate=0.2): # ---------left branch ----- x = conv_block(img_input, 32, (3, 3), strides=1, name='L_conv1-1') x = SpatialDropout2D(drop_rate)(x) L1 = conv_block(x, 32, (3, 3), strides=1, name='L_conv1-2') x = conv_block(L1, 32, (3, 3), strides=2, name='L_conv1-3') # 400 -> 200 x = conv_block(x, 64, (3, 3), strides=1, name='L_conv2-1') x = SpatialDropout2D(drop_rate)(x) L2 = conv_block(x, 64, (3, 3), strides=1, name='L_conv2-2') x = conv_block(L2, 32, (3, 3), strides=2, name='L_conv2-3') # 200 -> 100 x = conv_block(x, 128, (3, 3), strides=1, name='L_conv3-1') x = SpatialDropout2D(drop_rate)(x) L3 = conv_block(x, 128, (3, 3), strides=1, name='L_conv3-2') x = conv_block(L3, 32, (3, 3), strides=2, name='L_conv3-3') # 100 -> 50 x = conv_block(x, 256, (3, 3), strides=1, name='L_conv4-1') x = SpatialDropout2D(drop_rate)(x) L4 = conv_block(x, 256, (3, 3), strides=1, name='L_conv4-2') x = conv_block(L4, 32, (3, 3), strides=2, name='L_conv4-3') # 50 -> 25 x = conv_block(x, 512, (3, 3), strides=1, name='L_conv5-1') x = conv_block(x, 512, (3, 3), strides=1, dila=2, name='L_conv5-2') x = SpatialDropout2D(drop_rate)(x) x = conv_block(x, 512, (3, 3), strides=1, dila=2, name='L_conv5-3') L5 = conv_block(x, 512, (3, 3), strides=1, name='L_conv5-4') # 25 # ---------Right branch ----- # 25 -> 50 x = Deconv2D(256, kernel_size=2, strides=2, padding='same',name='R_conv1-1')(L5) x = BatchNormalization(axis=bn_axis, name='R_conv1-1_' + 'bn')(x) x = conv_block(Concatenate(axis=-1)([x, L4]), 256, (3, 3), strides=1, name='R_conv1-2') x = SpatialDropout2D(0.1)(x) x = conv_block(x, 256, (3, 3), strides=1, name='R_conv1-3') R_out1 = Conv2D(n_label,(1,1),name='R_out1')(x) # 50 -> 100 x = Deconv2D(128, kernel_size=2, strides=2, padding='same', name='R_conv2-1')(x) x = BatchNormalization(axis=bn_axis, name='R_conv2-1_' + 'bn')(x) x = conv_block(Concatenate(axis=-1)([x, L3]), 128, (3, 3), strides=1, name='R_conv2-2') x = SpatialDropout2D(0.1)(x) x = conv_block(x, 128, (3, 3), strides=1, name='R_conv2-3') R_out2 = Conv2D(n_label, (1, 1), name='R_out2')(x) # 100 -> 200 x = Deconv2D(64, kernel_size=2, strides=2, padding='same', name='R_conv3-1')(x) x = BatchNormalization(axis=bn_axis, name='R_conv3-1_' + 'bn')(x) x = conv_block(Concatenate(axis=-1)([x, L2]), 64, (3, 3), strides=1, name='R_conv3-2') x = SpatialDropout2D(0.1)(x) x = conv_block(x, 64, (3, 3), strides=1, name='R_conv3-3') R_out3 = Conv2D(n_label, (1, 1), name='R_out3')(x) # 200 -> 400 x = Deconv2D(32, kernel_size=2, strides=2, padding='same', name='R_conv4-1')(x) x = BatchNormalization(axis=bn_axis, name='R_conv4-1_' + 'bn')(x) x = conv_block(Concatenate(axis=-1)([x, L1]), 32, (3, 3), strides=1, name='R_conv4-2') x = SpatialDropout2D(0.1)(x) x = conv_block(x, 32, (3, 3), strides=1, name='R_conv4-3') R_out4 = Conv2D(n_label, (1, 1), name='R_out4')(x) # ---------Recoding branch ----- x = conv_block(R_out4, 32, (1, 1), strides=1, name='E_conv1-1') x = conv_block(x, 32, (3, 3), strides=1, name='E_conv1-2') x = SpatialDropout2D(drop_rate)(x) x = conv_block(x, 32, (3, 3), strides=2, name='E_conv1-3') # 400 -> 200 x = conv_block(Concatenate(axis=-1)([x, conv_block(R_out3,64, (1, 1), strides=1,name='c1')]), 64, (3, 3), strides=1, name='E_conv2-1') x = conv_block(x, 64, (3, 3), strides=1, name='E_conv2-2') x = SpatialDropout2D(drop_rate)(x) x = conv_block(x, 64, (3, 3), strides=2, name='E_conv2-3') # 200 -> 100 x = conv_block(Concatenate(axis=-1)([x, conv_block(R_out2,128, (1, 1), strides=1,name='c2')]), 128, (3, 3), strides=1, name='E_conv3-1') x = conv_block(x, 128, (3, 3), strides=1, name='E_conv3-2') x = SpatialDropout2D(drop_rate)(x) x = conv_block(x, 128, (3, 3), strides=2, name='E_conv3-3') # 100 -> 50 x = conv_block(Concatenate(axis=-1)([x, conv_block(R_out1,256, (1, 1), strides=1,name='c3')]), 256, (3, 3), strides=1, name='E_conv4-1') x = conv_block(x, 256, (3, 3), strides=1, name='E_conv4-2') x = SpatialDropout2D(drop_rate)(x) x = conv_block(x, 256, (3, 3), strides=1, dila=2, name='E_conv4-3') x = conv_block(x, 256, (3, 3), strides=1, dila=2, name='E_conv4-4') x = conv_block(x, 256, (3, 3), strides=1, name='E_conv4-5') # 50 x = global_context_block(x, channels=64) # ----------------------------------------- final_out = Conv2D(n_label,(1,1), name='final_out')(x) final_out = UpSampling2D(size=(8,8))(final_out) final_out = Activation('softmax',name='l0')(Reshape((400 * 400, n_label))(final_out)) out1 = Activation('softmax',name='l1')(Reshape((400 * 400, n_label))(R_out4)) out2 = Activation('softmax',name='l2')(Reshape((200 * 200, n_label))(R_out3)) out3 = Activation('softmax',name='l3')(Reshape((100 * 100, n_label))(R_out2)) out4 = Activation('softmax',name='l4')(Reshape((50 * 50, n_label))(R_out1)) return [final_out, out1, out2, out3, out4]
def _main(args): config_path = os.path.expanduser(args.config_path) weights_path = os.path.expanduser(args.weights_path) assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( config_path) assert weights_path.endswith( '.weights'), '{} is not a .weights file'.format(weights_path) output_path = os.path.expanduser(args.output_path) assert output_path.endswith( '.h5'), 'output path {} is not a .h5 file'.format(output_path) output_root = os.path.splitext(output_path)[0] # Load weights and config. print('Loading weights.') weights_file = open(weights_path, 'rb') weights_header = np.ndarray( shape=(5, ), dtype='int32', buffer=weights_file.read(20)) print('Weights Header: ', weights_header) # TODO: Check transpose flag when implementing fully connected layers. # transpose = (weight_header[0] > 1000) or (weight_header[1] > 1000) print('Parsing Darknet config.') unique_config_file = unique_config_sections(config_path) cfg_parser = configparser.ConfigParser() cfg_parser.read_file(unique_config_file) print('Creating Keras model.') if args.fully_convolutional: image_height, image_width = None, None else: image_height = int(cfg_parser['net_0']['height']) image_width = int(cfg_parser['net_0']['width']) prev_layer = Input(shape=(image_height, image_width, 3)) all_layers = [prev_layer] outputs = [] weight_decay = float(cfg_parser['net_0']['decay'] ) if 'net_0' in cfg_parser.sections() else 5e-4 count = 0 for section in cfg_parser.sections(): print('Parsing section {}'.format(section)) if section.startswith('convolutional'): filters = int(cfg_parser[section]['filters']) size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) pad = int(cfg_parser[section]['pad']) activation = cfg_parser[section]['activation'] batch_normalize = 'batch_normalize' in cfg_parser[section] # Setting weights. # Darknet serializes convolutional weights as: # [bias/beta, [gamma, mean, variance], conv_weights] prev_layer_shape = K.int_shape(prev_layer) # TODO: This assumes channel last dim_ordering. weights_shape = (size, size, prev_layer_shape[-1], filters) darknet_w_shape = (filters, weights_shape[2], size, size) weights_size = np.product(weights_shape) print('conv2d', 'bn' if batch_normalize else ' ', activation, weights_shape) conv_bias = np.ndarray( shape=(filters, ), dtype='float32', buffer=weights_file.read(filters * 4)) count += filters if batch_normalize: bn_weights = np.ndarray( shape=(3, filters), dtype='float32', buffer=weights_file.read(filters * 12)) count += 3 * filters # TODO: Keras BatchNormalization mistakenly refers to var # as std. bn_weight_list = [ bn_weights[0], # scale gamma conv_bias, # shift beta bn_weights[1], # running mean bn_weights[2] # running var ] conv_weights = np.ndarray( shape=darknet_w_shape, dtype='float32', buffer=weights_file.read(weights_size * 4)) count += weights_size # DarkNet conv_weights are serialized Caffe-style: # (out_dim, in_dim, height, width) # We would like to set these to Tensorflow order: # (height, width, in_dim, out_dim) # TODO: Add check for Theano dim ordering. conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) conv_weights = [conv_weights] if batch_normalize else [ conv_weights, conv_bias ] # Handle activation. act_fn = None if activation == 'leaky': pass # Add advanced activation later. elif activation != 'linear': raise ValueError( 'Unknown activation function `{}` in section {}'.format( activation, section)) padding = 'same' if pad == 1 and stride == 1 else 'valid' # Adjust padding model for darknet. if stride == 2: prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer) # Create Conv2D layer conv_layer = (Conv2D( filters, (size, size), strides=(stride, stride), kernel_regularizer=l2(weight_decay), use_bias=not batch_normalize, weights=conv_weights, activation=act_fn, padding=padding))(prev_layer) if batch_normalize: conv_layer = (BatchNormalization( weights=bn_weight_list))(conv_layer) prev_layer = conv_layer if activation == 'linear': all_layers.append(prev_layer) elif activation == 'leaky': act_layer = LeakyReLU(alpha=0.1)(prev_layer) prev_layer = act_layer all_layers.append(act_layer) elif section.startswith('maxpool'): size = int(cfg_parser[section]['size']) stride = int(cfg_parser[section]['stride']) all_layers.append( MaxPooling2D( padding='same', pool_size=(size, size), strides=(stride, stride))(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('avgpool'): if cfg_parser.items(section) != []: raise ValueError('{} with params unsupported.'.format(section)) all_layers.append(GlobalAveragePooling2D()(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('route'): ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] layers = [all_layers[i] for i in ids] if len(layers) > 1: print('Concatenating route layers:', layers) concatenate_layer = concatenate(layers) all_layers.append(concatenate_layer) prev_layer = concatenate_layer else: skip_layer = layers[0] # only one layer to route all_layers.append(skip_layer) prev_layer = skip_layer elif section.startswith('shortcut'): ids = [int(i) for i in cfg_parser[section]['from'].split(',')][0] activation = cfg_parser[section]['activation'] shortcut = add([all_layers[ids], prev_layer]) if activation == 'linear': shortcut = Activation('linear')(shortcut) all_layers.append(shortcut) prev_layer = all_layers[-1] elif section.startswith('upsample'): stride = int(cfg_parser[section]['stride']) all_layers.append( UpSampling2D( size=(stride, stride))(prev_layer)) prev_layer = all_layers[-1] elif section.startswith('yolo'): classes = int(cfg_parser[section]['classes']) # num = int(cfg_parser[section]['num']) # mask = int(cfg_parser[section]['mask']) n1, n2 = int(prev_layer.shape[1]), int(prev_layer.shape[2]) n3 = 3 n4 = (4 + 1 + classes) yolo = Reshape((n1, n2, n3, n4))(prev_layer) all_layers.append(yolo) prev_layer = all_layers[-1] outputs.append(len(all_layers) - 1) elif (section.startswith('net')): pass # Configs not currently handled during model definition. else: raise ValueError( 'Unsupported section header type: {}'.format(section)) # Create and save model. model = Model(inputs=all_layers[0], outputs=[all_layers[i] for i in outputs]) print(model.summary()) model.save('{}'.format(output_path)) print('Saved Keras model to {}'.format(output_path)) # Check to see if all weights have been read. remaining_weights = len(weights_file.read()) / 4 weights_file.close() print('Read {} of {} from Darknet weights.'.format(count, count + remaining_weights)) if remaining_weights > 0: print('Warning: {} unused weights'.format(remaining_weights)) if args.plot_model: plot(model, to_file='{}.png'.format(output_root), show_shapes=True) print('Saved model plot to {}.png'.format(output_root))
def __init__( self, layer_sizes, generator=None, n_samples=None, input_dim=None, aggregator=None, bias=True, dropout=0., normalize="l2", ): # Set the aggregator layer used in the model if aggregator is None: self._aggregator = MeanAggregator elif issubclass(aggregator, Layer): self._aggregator = aggregator else: raise TypeError("Aggregator should be a subclass of Keras Layer") # Set the normalization layer used in the model if normalize == "l2": self._normalization = Lambda(lambda x: K.l2_normalize(x, axis=2)) elif normalize is None or normalize == "none": self._normalization = Lambda(lambda x: x) # Get the input_dim and num_samples from the mapper if it is given # Use both the schema and head node type from the mapper # TODO: Refactor the horror of generator.generator.graph... if generator is not None: self.n_samples = generator.generator.num_samples feature_sizes = generator.generator.graph.node_feature_sizes() if len(feature_sizes) > 1: raise RuntimeError( "GraphSAGE called on graph with more than one node type.") self.input_feature_size = feature_sizes.popitem()[1] elif n_samples is not None and input_dim is not None: self.n_samples = n_samples self.input_feature_size = input_dim else: raise RuntimeError( "If mapper is not provided, n_samples and input_dim must be specified." ) # Model parameters self.n_layers = len(self.n_samples) self.bias = bias self.dropout = dropout # Feature dimensions for each layer self.dims = [self.input_feature_size] + layer_sizes # Aggregator functions for each layer self._aggs = [ self._aggregator( output_dim=self.dims[layer + 1], bias=self.bias, act="relu" if layer < self.n_layers - 1 else "linear", ) for layer in range(self.n_layers) ] # Sizes of the neighbours for each layer self._neigh_reshape = [[ Reshape((-1, max(1, self.n_samples[i]), self.dims[layer])) for i in range(self.n_layers - layer) ] for layer in range(self.n_layers)] self._normalization = Lambda(lambda x: K.l2_normalize(x, 2))
sequence_length = X.shape[1] vocabulary_size = len(vocabulary_inv) embedding_dim = 300 filter_sizes = [1,2,3,4,5,6] num_filters = 512 drop = 0.5 epochs = 20 batch_size = 30 print("Creating Model...") inputs = Input(shape=(sequence_length,), dtype='int32') embedding = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim, input_length=sequence_length)(inputs) reshape = Reshape((sequence_length,embedding_dim,1))(embedding) conv_0 = Conv2D(num_filters, kernel_size=(filter_sizes[0], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape) conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[1], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape) conv_2 = Conv2D(num_filters, kernel_size=(filter_sizes[2], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape) conv_3 = Conv2D(num_filters, kernel_size=(filter_sizes[3], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape) conv_4 = Conv2D(num_filters, kernel_size=(filter_sizes[4], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape) conv_5 = Conv2D(num_filters, kernel_size=(filter_sizes[5], embedding_dim), padding='valid', kernel_initializer='normal', activation='relu')(reshape) maxpool_0 = MaxPool2D(pool_size=(sequence_length - filter_sizes[0] + 1, 1), strides=(1,1), padding='valid')(conv_0) maxpool_1 = MaxPool2D(pool_size=(sequence_length - filter_sizes[1] + 1, 1), strides=(1,1), padding='valid')(conv_1) maxpool_2 = MaxPool2D(pool_size=(sequence_length - filter_sizes[2] + 1, 1), strides=(1,1), padding='valid')(conv_2) maxpool_3 = MaxPool2D(pool_size=(sequence_length - filter_sizes[3] + 1, 1), strides=(1,1), padding='valid')(conv_3) maxpool_4 = MaxPool2D(pool_size=(sequence_length - filter_sizes[4] + 1, 1), strides=(1,1), padding='valid')(conv_4) maxpool_5 = MaxPool2D(pool_size=(sequence_length - filter_sizes[5] + 1, 1), strides=(1,1), padding='valid')(conv_5) concatenated_tensor = Concatenate(axis=1)([maxpool_0, maxpool_1, maxpool_2, maxpool_3, maxpool_4,maxpool_5]) flatten = Flatten()(concatenated_tensor)
def ssc_300(image_size, n_classes, l2_regularization=0.0005, min_scale=None, max_scale=None, scales=None, aspect_ratios_global=None, aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=None, subtract_mean=[123, 117, 104], divide_by_stddev=None, swap_channels=[2, 1, 0], predictors=[ 'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2' ], hidden_size=[250, 250, 100], output_activation=False, lstm=False, condense_predictors=False): """ Build a Keras model with SSC300 architecture, see references. The base network is a reduced atrous VGG-16, extended by the SSD architecture, as described in the paper. Most of the arguments that this function takes are only needed for the anchor box layers. In case you're training the network. Note: Requires Keras v2.0 or later. Currently works only with the TensorFlow backend (v1.0 or later). References: https://arxiv.org/abs/1512.02325v5 :param tuple image_size: The input image size in the format `(height, width, channels)`. :param int n_classes: The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. :param float l2_regularization: The L2-regularization rate. Applies to all convolutional layers. Set to zero to deactivate L2-regularization. :param float min_scale: The smallest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. :param float max_scale: The largest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. All scaling factors between the smallest and the largest will be linearly interpolated. Note that the second to last of the linearly interpolated scaling factors will actually be the scaling factor for the last predictor layer, while the last scaling factor is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. :param list scales: A list of floats containing scaling factors per convolutional predictor layer. This list must be one element longer than the number of predictor layers. The first `k` elements are the scaling factors for the `k` predictor layers, while the last element is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional last scaling factor must be passed either way, even if it is not being used. If a list is passed, this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero. :param list aspect_ratios_global: The list of aspect ratios for which anchor boxes are to be generated. This list is valid for all prediction layers. :param list aspect_ratios_per_layer: A list containing one aspect ratio list for each prediction layer. This allows you to set the aspect ratios for each predictor layer individually, which is the case for the original SSD300 implementation. If a list is passed, it overrides `aspect_ratios_global`. :param bool two_boxes_for_ar1: Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise. If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated using the scaling factor for the respective layer, the second one will be generated using geometric mean of said scaling factor and next bigger scaling factor. :param list steps: `None` or a list with as many elements as there are predictor layers. The elements can be either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. If no steps are provided, then they will be computed such that the anchor box center points will form an equidistant grid within the image dimensions. :param list offsets: `None` or a list with as many elements as there are predictor layers. The elements can be either floats or tuples of two floats. These numbers represent for each predictor layer how many pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions of the step size specified in the `steps` argument. If the list contains floats, then that value will be used for both spatial dimensions. If the list contains tuples of two floats, then they represent `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size. :param list subtract_mean: `None` or an array-like object of integers or floating point values of any shape that is broadcast-compatible with the image shape. The elements of this array will be subtracted from the image pixel intensity values. For example, pass a list of three integers to perform per-channel mean normalization for color images. :param list divide_by_stddev: `None` or an array-like object of non-zero integers or floating point values of any shape that is broadcast-compatible with the image shape. The image pixel intensity values will be divided by the elements of this array. For example, pass a list of three integers to perform per-channel standard deviation normalization for color images. :param list swap_channels: Either `False` or a list of integers representing the desired order in which the input image channels should be swapped. :param list predictors: names of the convolutional layers used as predictors :param list hidden_size: number of neurons for the 3 hidden fully-connected layers :param bool output_activation: whether to include or not the softplus activation function after the hidden layers :param bool lstm: whether to add or not an LSTM cell on top of the hidden layer :param bool condense_predictors: whether to condense or not the predictors in a single prediction :return model: The Keras SSC300 model. """ n_predictor_layers = len( predictors ) # The number of predictor conv layers in the network is 6 for the original SSD300. l2_reg = l2_regularization # Make the internal name shorter. img_height, img_width, img_channels = image_size[0], image_size[ 1], image_size[2] ############################################################################ # Get a few exceptions out of the way. ############################################################################ if aspect_ratios_global is None and aspect_ratios_per_layer is None: raise ValueError( "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified." ) if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}." .format(n_predictor_layers, len(aspect_ratios_per_layer))) if (min_scale is None or max_scale is None) and scales is None: raise ValueError( "Either `min_scale` and `max_scale` or `scales` need to be specified." ) if scales: if len(scales) != n_predictor_layers + 1: raise ValueError( "It must be either scales is None or len(scales) == {}, but len(scales) == {}." .format(n_predictor_layers + 1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) if len(hidden_size) != 3: raise ValueError( "3 hidden size values must be passed, but {} values were received." .format(len(hidden_size))) hidden_size = np.array(hidden_size) if np.any(hidden_size <= 0): raise ValueError( "All hidden sizes must be >0, but the sizes given are {}".format( hidden_size)) if (not (steps is None)) and (len(steps) != n_predictor_layers): raise ValueError( "You must provide at least one step value per predictor layer.") if (not (offsets is None)) and (len(offsets) != n_predictor_layers): raise ValueError( "You must provide at least one offset value per predictor layer.") ############################################################################ # Compute the anchor box parameters. ############################################################################ # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. if aspect_ratios_per_layer: aspect_ratios = aspect_ratios_per_layer else: aspect_ratios = [aspect_ratios_global] * n_predictor_layers # Compute the number of boxes to be predicted per cell for each predictor layer. # We need this so that we know how many channels the predictor layers need to have. if aspect_ratios_per_layer: n_boxes = [] for ar in aspect_ratios_per_layer: if (1 in ar) & two_boxes_for_ar1: n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1 else: n_boxes.append(len(ar)) else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer if (1 in aspect_ratios_global) & two_boxes_for_ar1: n_boxes = len(aspect_ratios_global) + 1 else: n_boxes = len(aspect_ratios_global) n_boxes = [n_boxes] * n_predictor_layers if steps is None: steps = [None] * n_predictor_layers if offsets is None: offsets = [None] * n_predictor_layers ############################################################################ # Define functions for the Lambda layers below. ############################################################################ def identity_layer(tensor): return tensor def input_mean_normalization(tensor): return tensor - np.array(subtract_mean) def input_stddev_normalization(tensor): return tensor / np.array(divide_by_stddev) def input_channel_swap(tensor): if len(swap_channels) == 3: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]] ], axis=-1) elif len(swap_channels) == 4: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]], tensor[..., swap_channels[3]] ], axis=-1) ############################################################################ # Build the network. ############################################################################ x = Input(shape=(img_height, img_width, img_channels)) # The following identity layer is only needed so that the subsequent lambda layers can be optional. x1 = Lambda(identity_layer, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x) if not (subtract_mean is None): x1 = Lambda(input_mean_normalization, output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(input_stddev_normalization, output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) if swap_channels: x1 = Lambda(input_channel_swap, output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1) conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_1')(x1) conv1_2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_2')(conv1_1) pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(conv1_2) conv2_1 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_1')(pool1) conv2_2 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_2')(conv2_1) pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(conv2_2) conv3_1 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_1')(pool2) conv3_2 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_2')(conv3_1) conv3_3 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_3')(conv3_2) pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(conv3_3) conv4_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_1')(pool3) conv4_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_2')(conv4_1) conv4_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3')(conv4_2) pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4')(conv4_3) conv5_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_1')(pool4) conv5_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_2')(conv5_1) conv5_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_3')(conv5_2) pool5 = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same', name='pool5')(conv5_3) fc6 = Conv2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc6')(pool5) fc7 = Conv2D(1024, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7')(fc6) conv6_1 = Conv2D(256, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_1')(fc7) conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv6_padding')(conv6_1) conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2')(conv6_1) conv7_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_1')(conv6_2) conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv7_padding')(conv7_1) conv7_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2')(conv7_1) conv8_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_1')(conv7_2) conv8_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2')(conv8_1) conv9_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_1')(conv8_2) conv9_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2')(conv9_1) # Feed conv4_3 into the L2 normalization layer conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3) conv_features = { 'conv4_3': conv4_3_norm, 'fc7': fc7, 'conv6_2': conv6_2, 'conv7_2': conv7_2, 'conv8_2': conv8_2, 'conv9_2': conv9_2 } predictor_layers = [] ### Build the predictor layers on top of the base network for predictor in predictors: flatten = Flatten(name='{}_flat'.format(predictor))( conv_features[predictor]) d1 = Dense(hidden_size[0], name='{}_d1'.format(predictor))(flatten) d1bn = BatchNormalization(name='{}_bn1'.format(predictor))(d1) r1 = Activation(activation='relu', name='{}_r1'.format(predictor))(d1bn) d2 = Dense(hidden_size[1], name='{}_d2'.format(predictor))(r1) d2bn = BatchNormalization(name='{}_bn2'.format(predictor))(d2) r2 = Activation(activation='relu', name='{}_r2'.format(predictor))(d2bn) d3 = Dense(hidden_size[2], name='{}_d3'.format(predictor))(r2) d3bn = BatchNormalization(name='{}_bn3'.format(predictor))(d3) r3 = Activation(activation='relu', name='{}_r3'.format(predictor))(d3bn) pred = Dense(n_classes, name='{}_pred'.format(predictor))(r3) predictor_layers.append(pred) # Concatenate the output of the different predictors # Output shape of `predictions`: (batch, n_predictors, n_classes) predictions = Concatenate(axis=1, name='predictions1')(predictor_layers) if output_activation: predictions = Activation(activation='softplus')(predictions) if lstm: predictions = Reshape((n_predictor_layers, n_classes), name='lstm_predictions_res')(predictions) predictions = Bidirectional(LSTM(20, return_sequences=False), name='lstm_predictions')(predictions) if condense_predictors: predictions = Dense(n_classes, name='predictions_condensed')(predictions) return Model(inputs=x, outputs=predictions)
session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) K.set_session(session) # Supress warnings about wrong compilation of TensorFlow. tf.logging.set_verbosity(tf.logging.ERROR) noise_size = 100 ## G z = Input(shape=[noise_size]) G = Dense(8 * 4 * 256)(z) G = BatchNormalization(momentum=0.9)(G) G = LeakyReLU(alpha=0.2)(G) G = Reshape((4, 8, 256))(G) G = UpSampling2D()(G) G = Conv2D(128, (5, 5), padding='same')(G) G = BatchNormalization(momentum=0.9)(G) G = LeakyReLU(alpha=0.2)(G) G = UpSampling2D()(G) G = Conv2D(64, (5, 5), padding='same')(G) G = BatchNormalization(momentum=0.9)(G) G = LeakyReLU(alpha=0.2)(G) G = UpSampling2D()(G) G = Conv2D(32, (5, 5), padding='same')(G) G = BatchNormalization(momentum=0.9)(G) G = LeakyReLU(alpha=0.2)(G)
def ssd_300(image_size, n_classes, min_scale=None, max_scale=None, scales=None, aspect_ratios_global=None, aspect_ratios_per_layer=[[0.5, 1.0, 2.0], [1.0 / 3.0, 0.5, 1.0, 2.0, 3.0], [1.0 / 3.0, 0.5, 1.0, 2.0, 3.0], [1.0 / 3.0, 0.5, 1.0, 2.0, 3.0], [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]], two_boxes_for_ar1=True, steps=None, offsets=None, limit_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], coords='centroids', normalize_coords=False, subtract_mean=None, divide_by_stddev=None, swap_channels=False, return_predictor_sizes=False): ''' Build a Keras model with SSD_300 architecture, see references. The base network is a reduced atrous VGG-16, extended by the SSD architecture, as described in the paper. In case you're wondering why this function has so many arguments: All arguments except the first two (`image_size` and `n_classes`) are only needed so that the anchor box layers can produce the correct anchor boxes. In case you're training the network, the parameters passed here must be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading trained weights, the parameters passed here must be the same as the ones used to produce the trained weights. Some of these arguments are explained in more detail in the documentation of the `SSDBoxEncoder` class. Note: Requires Keras v2.0 or later. Currently works only with the TensorFlow backend (v1.0 or later). Arguments: image_size (tuple): The input image size in the format `(height, width, channels)`. n_classes (int): The number of categories for classification including the background class (i.e. the number of positive classes +1 for the background calss). min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. Defaults to 0.1. max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction of the shorter side of the input images. All scaling factors between the smallest and the largest will be linearly interpolated. Note that the second to last of the linearly interpolated scaling factors will actually be the scaling factor for the last predictor layer, while the last scaling factor is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. Defaults to 0.9. scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer. This list must be one element longer than the number of predictor layers. The first `k` elements are the scaling factors for the `k` predictor layers, while the last element is used for the second box for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional last scaling factor must be passed either way, even if it is not being used. Defaults to `None`. If a list is passed, this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero. aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be generated. This list is valid for all prediction layers. Defaults to None. aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each prediction layer. This allows you to set the aspect ratios for each predictor layer individually, which is the case for the original SSD300 implementation. If a list is passed, it overrides `aspect_ratios_global`. Defaults to the aspect ratios used in the original SSD300 architecture, i.e.: [[0.5, 1.0, 2.0], [1.0/3.0, 0.5, 1.0, 2.0, 3.0], [1.0/3.0, 0.5, 1.0, 2.0, 3.0], [1.0/3.0, 0.5, 1.0, 2.0, 3.0], [0.5, 1.0, 2.0], [0.5, 1.0, 2.0]] two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise. If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated using the scaling factor for the respective layer, the second one will be generated using geometric mean of said scaling factor and next bigger scaling factor. Defaults to `True`, following the original implementation. steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. If no steps are provided, then they will be computed such that the anchor box center points will form an equidistant grid within the image dimensions. Defaults to `None`. offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be either floats or tuples of two floats. These numbers represent for each predictor layer how many pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions of the step size specified in the `steps` argument. If the list contains floats, then that value will be used for both spatial dimensions. If the list contains tuples of two floats, then they represent `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size. Defaults to `None`. limit_boxes (bool, optional): If `True`, limits box coordinates to stay within image boundaries. This would normally be set to `True`, but here it defaults to `False`, following the original implementation. variances (list, optional): A list of 4 floats >0 with scaling factors (actually it's not factors but divisors to be precise) for the encoded predicted box coordinates. A variance value of 1.0 would apply no scaling at all to the predictions, while values in (0,1) upscale the encoded predictions and values greater than 1.0 downscale the encoded predictions. Defaults to `[0.1, 0.1, 0.2, 0.2]`, following the original implementation. The coordinate format must be 'centroids'. coords (str, optional): The box coordinate format to be used. Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height) or 'minmax' for the format `(xmin, xmax, ymin, ymax)`. Defaults to 'centroids', following the original implementation. normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates, i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. Defaults to `False`. subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values of any shape that is broadcast-compatible with the image shape. The elements of this array will be subtracted from the image pixel intensity values. For example, pass a list of three integers to perform per-channel mean normalization for color images. Defaults to `None`. divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or floating point values of any shape that is broadcast-compatible with the image shape. The image pixel intensity values will be divided by the elements of this array. For example, pass a list of three integers to perform per-channel standard deviation normalization for color images. Defaults to `None`. swap_channels (bool, optional): If `True` the color channel order of the input images will be reversed, i.e. if the input color channel order is RGB, the color channels will be swapped to BGR. Note that the original Caffe implementation assumes BGR input. Defaults to `True`. return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since you can always get their sizes easily via the Keras API, but it's convenient and less error-prone to get them this way. THey are only relevant for training anyway (SSDBoxEncoder needs to know the spatial dimensions of the predictor layers), for inference you don't need them. Returns: model: The Keras SSD model. predictor_sizes: A Numpy array containing the `(height, width)` portion of the output tensor shape for each convolutional predictor layer. During training, the generator function needs this in order to transform the ground truth labels into tensors of identical structure as the output tensors of the model, which is in turn needed for the cost function. References: https://arxiv.org/abs/1512.02325v5 ''' n_predictor_layers = 6 # The number of predictor conv layers in the network is 6 for the original SSD300 # Get a few exceptions out of the way first if aspect_ratios_global is None and aspect_ratios_per_layer is None: raise ValueError( "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified." ) if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}." .format(n_predictor_layers, len(aspect_ratios_per_layer))) if (min_scale is None or max_scale is None) and scales is None: raise ValueError( "Either `min_scale` and `max_scale` or `scales` need to be specified." ) if scales: if len(scales) != n_predictor_layers + 1: raise ValueError( "It must be either scales is None or len(scales) == {}, but len(scales) == {}." .format(n_predictor_layers + 1, len(scales))) else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) if len(variances) != 4: raise ValueError( "4 variance values must be pased, but {} values were received.". format(len(variances))) variances = np.array(variances) if np.any(variances <= 0): raise ValueError( "All variances must be >0, but the variances given are {}".format( variances)) if (not (steps is None)) and (len(steps) != n_predictor_layers): raise ValueError( "You must provide at least one step value per predictor layer.") if (not (offsets is None)) and (len(offsets) != n_predictor_layers): raise ValueError( "You must provide at least one offset value per predictor layer.") # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. if aspect_ratios_per_layer: aspect_ratios = aspect_ratios_per_layer else: aspect_ratios = [aspect_ratios_global] * n_predictor_layers # Compute the number of boxes to be predicted per cell for each predictor layer. # We need this so that we know how many channels the predictor layers need to have. if aspect_ratios_per_layer: n_boxes = [] for ar in aspect_ratios_per_layer: if (1 in ar) & two_boxes_for_ar1: n_boxes.append(len(ar) + 1) # +1 for the second box for aspect ratio 1 else: n_boxes.append(len(ar)) else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer if (1 in aspect_ratios_global) & two_boxes_for_ar1: n_boxes = len(aspect_ratios_global) + 1 else: n_boxes = len(aspect_ratios_global) n_boxes = [n_boxes] * n_predictor_layers if steps is None: steps = [None] * n_predictor_layers if offsets is None: offsets = [None] * n_predictor_layers # Input image format img_height, img_width, img_channels = image_size[0], image_size[ 1], image_size[2] ### Build the actual network. x = Input(shape=(img_height, img_width, img_channels)) # The following identity layer is only needed so that subsequent two lambda layers can be optional. x1 = Lambda(lambda z: z, output_shape=(img_height, img_width, img_channels), name='idendity_layer')(x) if not (subtract_mean is None): x1 = Lambda(lambda z: z - np.array(subtract_mean), output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(lambda z: z / np.array(divide_by_stddev), output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) if swap_channels and (img_channels == 3): x1 = Lambda(lambda z: z[..., ::-1], output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1) conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv1_1')(x1) conv1_2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv1_2')(conv1_1) pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(conv1_2) conv2_1 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv2_1')(pool1) conv2_2 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv2_2')(conv2_1) pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(conv2_2) conv3_1 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv3_1')(pool2) conv3_2 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv3_2')(conv3_1) conv3_3 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv3_3')(conv3_2) pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(conv3_3) conv4_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv4_1')(pool3) conv4_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv4_2')(conv4_1) conv4_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv4_3')(conv4_2) pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4')(conv4_3) conv5_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv5_1')(pool4) conv5_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv5_2')(conv5_1) conv5_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', name='conv5_3')(conv5_2) pool5 = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same', name='pool5')(conv5_3) fc6 = Conv2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', kernel_initializer='he_normal', name='fc6')(pool5) fc7 = Conv2D(1024, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', name='fc7')(fc6) conv6_1 = Conv2D(256, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', name='conv6_1')(fc7) conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv6_padding')(conv6_1) conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', name='conv6_2')(conv6_1) conv7_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', name='conv7_1')(conv6_2) conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv7_padding')(conv7_1) conv7_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', name='conv7_2')(conv7_1) conv8_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', name='conv8_1')(conv7_2) conv8_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', kernel_initializer='he_normal', name='conv8_2')(conv8_1) conv9_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', name='conv9_1')(conv8_2) conv9_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', kernel_initializer='he_normal', name='conv9_2')(conv9_1) # Feed conv4_3 into the L2 normalization layer conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3) ### Build the convolutional predictor layers on top of the base network # We precidt `n_classes` confidence values for each box, hence the confidence predictors have depth `n_boxes * n_classes` # Output shape of the confidence layers: `(batch, height, width, n_boxes * n_classes)` conv4_3_norm_mbox_conf = Conv2D( n_boxes[0] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', name='conv4_3_norm_mbox_conf')(conv4_3_norm) fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', name='fc7_mbox_conf')(fc7) conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', name='conv6_2_mbox_conf')(conv6_2) conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', name='conv7_2_mbox_conf')(conv7_2) conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', name='conv8_2_mbox_conf')(conv8_2) conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', name='conv9_2_mbox_conf')(conv9_2) # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4` # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)` conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (3, 3), padding='same', kernel_initializer='he_normal', name='conv4_3_norm_mbox_loc')(conv4_3_norm) fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (3, 3), padding='same', kernel_initializer='he_normal', name='fc7_mbox_loc')(fc7) conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (3, 3), padding='same', kernel_initializer='he_normal', name='conv6_2_mbox_loc')(conv6_2) conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (3, 3), padding='same', kernel_initializer='he_normal', name='conv7_2_mbox_loc')(conv7_2) conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (3, 3), padding='same', kernel_initializer='he_normal', name='conv8_2_mbox_loc')(conv8_2) conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (3, 3), padding='same', kernel_initializer='he_normal', name='conv9_2_mbox_loc')(conv9_2) ### Generate the anchor boxes (called "priors" in the original Caffe/C++ implementation, so I'll keep their layer names) # Output shape of anchors: `(batch, height, width, n_boxes, 8)` conv4_3_norm_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc) fc7_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='fc7_mbox_priorbox')(fc7_mbox_loc) conv6_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[2], this_offsets=offsets[2], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc) conv7_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[3], this_offsets=offsets[3], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc) conv8_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[4], next_scale=scales[5], aspect_ratios=aspect_ratios[4], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[4], this_offsets=offsets[4], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc) conv9_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[5], next_scale=scales[6], aspect_ratios=aspect_ratios[5], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[5], this_offsets=offsets[5], limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc) ### Reshape # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)` # We want the classes isolated in the last axis to perform softmax on them conv4_3_norm_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf) fc7_mbox_conf_reshape = Reshape( (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf) conv6_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf) conv7_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf) conv8_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf) conv9_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf) # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)` # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss conv4_3_norm_mbox_loc_reshape = Reshape( (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc) fc7_mbox_loc_reshape = Reshape((-1, 4), name='fc7_mbox_loc_reshape')(fc7_mbox_loc) conv6_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc) conv7_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc) conv8_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc) conv9_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc) # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` conv4_3_norm_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox) fc7_mbox_priorbox_reshape = Reshape( (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox) conv6_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox) conv7_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox) conv8_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox) conv9_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox) ### Concatenate the predictions from the different layers # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, # so we want to concatenate along axis 1, the number of boxes per layer # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes) mbox_conf = Concatenate(axis=1, name='mbox_conf')([ conv4_3_norm_mbox_conf_reshape, fc7_mbox_conf_reshape, conv6_2_mbox_conf_reshape, conv7_2_mbox_conf_reshape, conv8_2_mbox_conf_reshape, conv9_2_mbox_conf_reshape ]) # Output shape of `mbox_loc`: (batch, n_boxes_total, 4) mbox_loc = Concatenate(axis=1, name='mbox_loc')([ conv4_3_norm_mbox_loc_reshape, fc7_mbox_loc_reshape, conv6_2_mbox_loc_reshape, conv7_2_mbox_loc_reshape, conv8_2_mbox_loc_reshape, conv9_2_mbox_loc_reshape ]) # Output shape of `mbox_priorbox`: (batch, n_boxes_total, 8) mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([ conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape, conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape, conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape ]) # The box coordinate predictions will go into the loss function just the way they are, # but for the class predictions, we'll apply a softmax activation layer first mbox_conf_softmax = Activation('softmax', name='mbox_conf_softmax')(mbox_conf) # Concatenate the class and box predictions and the anchors to one large predictions vector # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) predictions = Concatenate(axis=2, name='predictions')( [mbox_conf_softmax, mbox_loc, mbox_priorbox]) model = Model(inputs=x, outputs=predictions) if return_predictor_sizes: # Get the spatial dimensions (height, width) of the predictor conv layers, we need them to # be able to generate the default boxes for the matching process outside of the model during training. # Note that the original implementation performs anchor box matching inside the loss function. We don't do that. # Instead, we'll do it in the batch generator function. # The spatial dimensions are the same for the confidence and localization predictors, so we just take those of the conf layers. predictor_sizes = np.array([ conv4_3_norm_mbox_conf._keras_shape[1:3], fc7_mbox_conf._keras_shape[1:3], conv6_2_mbox_conf._keras_shape[1:3], conv7_2_mbox_conf._keras_shape[1:3], conv8_2_mbox_conf._keras_shape[1:3], conv9_2_mbox_conf._keras_shape[1:3] ]) return model, predictor_sizes else: return model
def new_lpcnet_model(frame_size=160, rnn_units1=384, rnn_units2=16, nb_used_features=38, training=False, use_gpu=True): pcm = Input(shape=(None, 3)) feat = Input(shape=(None, nb_used_features)) pitch = Input(shape=(None, 1)) dec_feat = Input(shape=(None, 128)) dec_state1 = Input(shape=(rnn_units1, )) dec_state2 = Input(shape=(rnn_units2, )) padding = 'valid' if training else 'same' fconv1 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv1') fconv2 = Conv1D(128, 3, padding=padding, activation='tanh', name='feature_conv2') embed = Embedding(256, embed_size, embeddings_initializer=PCMInit(), name='embed_sig') cpcm = Reshape((-1, embed_size * 3))(embed(pcm)) pembed = Embedding(256, 64, name='embed_pitch') cat_feat = Concatenate()([feat, Reshape((-1, 64))(pembed(pitch))]) cfeat = fconv2(fconv1(cat_feat)) fdense1 = Dense(128, activation='tanh', name='feature_dense1') fdense2 = Dense(128, activation='tanh', name='feature_dense2') cfeat = fdense2(fdense1(cfeat)) rep = Lambda(lambda x: K.repeat_elements(x, frame_size, 1)) if use_gpu: rnn = CuDNNGRU(rnn_units1, return_sequences=True, return_state=True, name='gru_a') rnn2 = CuDNNGRU(rnn_units2, return_sequences=True, return_state=True, name='gru_b') else: rnn = GRU(rnn_units1, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_a') rnn2 = GRU(rnn_units2, return_sequences=True, return_state=True, recurrent_activation="sigmoid", reset_after='true', name='gru_b') rnn_in = Concatenate()([cpcm, rep(cfeat)]) md = MDense(pcm_levels, activation='softmax', name='dual_fc') gru_out1, _ = rnn(rnn_in) gru_out2, _ = rnn2(Concatenate()([gru_out1, rep(cfeat)])) ulaw_prob = md(gru_out2) model = Model([pcm, feat, pitch], ulaw_prob) model.rnn_units1 = rnn_units1 model.rnn_units2 = rnn_units2 model.nb_used_features = nb_used_features model.frame_size = frame_size encoder = Model([feat, pitch], cfeat) dec_rnn_in = Concatenate()([cpcm, dec_feat]) dec_gru_out1, state1 = rnn(dec_rnn_in, initial_state=dec_state1) dec_gru_out2, state2 = rnn2(Concatenate()([dec_gru_out1, dec_feat]), initial_state=dec_state2) dec_ulaw_prob = md(dec_gru_out2) decoder = Model([pcm, dec_feat, dec_state1, dec_state2], [dec_ulaw_prob, state1, state2]) return model, encoder, decoder
def build_late_fusion(): input_dc = Input(shape=(12, 16 * window * dc_frames_per_second, 1)) input_t = Input(shape=(window, feature_length, 1)) input_w = Input(shape=(window, feature_length, 1)) input_pm = Input(shape=(window, pm_frames_per_second * pm_frame_size, 1)) x = Conv2D(32, kernel_size=(3, 3), activation='relu')(input_dc) x = MaxPooling2D(pool_size=2, data_format='channels_last')(x) x = BatchNormalization()(x) x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x) x = MaxPooling2D(pool_size=2, data_format='channels_last')(x) x = BatchNormalization()(x) x = Flatten()(x) x = Dense(1200, activation='relu')(x) x = BatchNormalization()(x) x = Dense(600, activation='relu')(x) x = BatchNormalization()(x) x = Dense(100, activation='relu')(x) x = BatchNormalization()(x) y = TimeDistributed(Conv1D(32, kernel_size=5, activation='relu'))(input_t) y = TimeDistributed(MaxPooling1D(pool_size=2))(y) y = TimeDistributed(BatchNormalization())(y) y = TimeDistributed(Conv1D(64, kernel_size=5, activation='relu'))(y) y = TimeDistributed(MaxPooling1D(pool_size=2))(y) y = TimeDistributed(BatchNormalization())(y) y = Reshape((K.int_shape(y)[1], K.int_shape(y)[2] * K.int_shape(y)[3]))(y) y = LSTM(1200)(y) y = BatchNormalization()(y) y = Dense(600, activation='relu')(y) y = BatchNormalization()(y) y = Dense(100, activation='relu')(y) y = BatchNormalization()(y) z = TimeDistributed(Conv1D(32, kernel_size=5, activation='relu'))(input_pm) z = TimeDistributed(MaxPooling1D(pool_size=2))(z) z = TimeDistributed(BatchNormalization())(z) z = TimeDistributed(Conv1D(64, kernel_size=5, activation='relu'))(z) z = TimeDistributed(MaxPooling1D(pool_size=2))(z) z = TimeDistributed(BatchNormalization())(z) z = Reshape((K.int_shape(z)[1], K.int_shape(z)[2] * K.int_shape(z)[3]))(z) z = LSTM(1200)(z) z = BatchNormalization()(z) z = Dense(600, activation='relu')(z) z = BatchNormalization()(z) z = Dense(100, activation='relu')(z) z = BatchNormalization()(z) w = TimeDistributed(Conv1D(32, kernel_size=5, activation='relu'))(input_w) w = TimeDistributed(MaxPooling1D(pool_size=2))(w) w = TimeDistributed(BatchNormalization())(w) w = TimeDistributed(Conv1D(64, kernel_size=5, activation='relu'))(w) w = TimeDistributed(MaxPooling1D(pool_size=2))(w) w = TimeDistributed(BatchNormalization())(w) w = Reshape((K.int_shape(w)[1], K.int_shape(w)[2] * K.int_shape(w)[3]))(w) w = LSTM(1200)(w) w = BatchNormalization()(w) w = Dense(600, activation='relu')(w) w = BatchNormalization()(w) w = Dense(100, activation='relu')(w) w = BatchNormalization()(w) c = concatenate([x, y, z, w]) c = Dense(len(activity_list), activation='softmax')(c) model = Model(inputs=[input_t, input_w, input_dc, input_pm], outputs=c) model.summary() return model
main_input = Input(shape=(maxlen, ), dtype='int32') embedding_map = Embedding(output_dim=embedding_dims, input_dim=max_features, input_length=maxlen, W_regularizer=l2(reg_conf[0]))(main_input) ## convs = [] for index in range(embedding_dims): #print ("i:",index) t = Lambda(slice, output_shape=(maxlen, 1), arguments={'index': index}, name='slice_' + str(index + 1))(embedding_map) x = Reshape((maxlen, 1, 1))(t) #(batch, height, width, channels) #第一层conv and pooling x = Convolution2D(m1, w1, 1, border_mode='valid', subsample=(1, 1), activation='linear', dim_ordering='tf', W_regularizer=l2(reg_conf[1]), b_regularizer=l2(reg_conf[1]))(x) x = MaxPooling2D(pool_size=(2, 1), strides=(2, 1), border_mode='valid',
def build_keras_trainer(game, config): """Build neural network model in Keras. Args: game (Game): Perfect information dynamics/game. Used to get information like action/state space sizes etc. config (Config): Configuration loaded json .from file. Returns: KerasTrainer: Keras Sequential model wrapped in trainer object. """ conv_filters = config.nn["conv_filters"] conv_kernel = config.nn["conv_kernel"] conv_stride = config.nn["conv_stride"] residual_bottleneck = config.nn["residual_bottleneck"] residual_filters = config.nn["residual_filters"] residual_kernel = config.nn["residual_kernel"] residual_num = config.nn["residual_num"] feature_extractor = config.nn["feature_extractor"] dense_size = config.nn["dense_size"] loss = config.nn['loss'] l2_reg = config.nn["l2_regularizer"] lr = config.nn['lr'] momentum = config.nn['momentum'] DATA_FORMAT = image_data_format() BOARD_HEIGHT, BOARD_WIDTH = game.getBoardSize() ACTION_SIZE = game.getActionSize() def conv2d_n_batchnorm(x, filters, kernel_size, strides=1, shortcut=None): conv = Conv2D(filters, kernel_size=kernel_size, strides=strides, padding="same", kernel_regularizer=l2(l2_reg), data_format=DATA_FORMAT)(x) if DATA_FORMAT == 'channels_first': bn = BatchNormalization(axis=1)(conv) else: bn = BatchNormalization(axis=3)(conv) if shortcut is not None: out = add([bn, shortcut]) else: out = bn return Activation(activation='relu')(out) def residual_block(x, filters, bottleneck, kernel_size): y = conv2d_n_batchnorm(x, bottleneck, kernel_size=1, strides=1) y = conv2d_n_batchnorm(y, bottleneck, kernel_size, strides=1) return conv2d_n_batchnorm(y, filters, kernel_size=1, strides=1, shortcut=x) # Add batch dimension to inputs boards_input = Input(shape=(BOARD_HEIGHT, BOARD_WIDTH)) if DATA_FORMAT == 'channels_first': x = Reshape((1, BOARD_HEIGHT, BOARD_WIDTH))(boards_input) else: x = Reshape((BOARD_HEIGHT, BOARD_WIDTH, 1))(boards_input) # Input convolution if conv_filters > 0: x = conv2d_n_batchnorm(x, filters=conv_filters, kernel_size=conv_kernel, strides=conv_stride) # Tower of residual blocks if residual_filters > 0: if conv_filters != residual_filters: # Add additional layer to even out the number of filters between input CNN # and residual blocks, so that residual shortcut connection works properly x = conv2d_n_batchnorm(x, filters=residual_filters, kernel_size=residual_kernel, strides=1) for _ in range(residual_num): x = residual_block(x, residual_filters, residual_bottleneck, residual_kernel) # Final feature extractors if feature_extractor == "agz": pi = Flatten()(conv2d_n_batchnorm(x, filters=2, kernel_size=1, strides=1)) value = Flatten()(conv2d_n_batchnorm(x, filters=1, kernel_size=1, strides=1)) value = Dense(dense_size, activation='relu', kernel_regularizer=l2(l2_reg))(value) elif feature_extractor == "avgpool": x = GlobalAveragePooling2D(data_format=DATA_FORMAT)(x) pi = value = Dense(dense_size, activation='relu', kernel_regularizer=l2(l2_reg))(x) elif feature_extractor == "flatten": x = Flatten()(x) pi = value = Dense(dense_size, activation='relu', kernel_regularizer=l2(l2_reg))(x) else: raise ValueError( "Unknown feature extractor! Possible values: 'agz', 'avgpool', 'flatten'" ) # Heads pi = Dense(ACTION_SIZE, activation='softmax', kernel_regularizer=l2(l2_reg), name='pi')(pi) value = Dense(1, activation='tanh', kernel_regularizer=l2(l2_reg), name='value')(value) # Create model model = Model(inputs=boards_input, outputs=[pi, value]) # Compile model model.compile(loss=loss, optimizer=SGD(lr=lr, momentum=momentum, nesterov=True), metrics=['accuracy']) # Log model architecture model.summary(print_fn=lambda x: log.debug("%s", x)) return KerasTrainer(model, config.training)
name='conv_tanh_1')(input_img) x = Conv2D(32, (3, 3), activation='tanh', padding='same', name='conv_tanh_2')(x) x = MaxPooling2D((2, 2), padding='same', name='maxpool_1')(x) x = Dropout((0.25), name='dropout_1')(x) x = Flatten(name='flat_1')(x) x = Dense(units=512, activation='tanh', name='dense_tanh_1')(x) x = Dropout((0.5), name='dropout_2')(x) encoded = Dense(units=latent_dim, activation='softmax', name='dens_softmax_1')(x) # at this point the representation is 101-dimensional x = Dense(units=512, activation='tanh')(encoded) x = Dense(units=131072)(x) x = Reshape((64, 64, 32), input_shape=(131072, ))(x) x = UpSampling2D((2, 2))(x) x = Conv2D(32, (3, 3), activation='tanh', padding='same')(x) x = Conv2D(32, (3, 3), activation='tanh', padding='same')(x) decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x) autoencoder = Model(input_img, decoded) autoencoder.compile(optimizer='adadelta', loss='mse') print(autoencoder.summary()) autoencoder.fit_generator( tuple_generator(train_generator), steps_per_epoch=train_samples / batch_size, epochs=epochs, validation_data=tuple_generator(validation_generator), validation_steps=nb_validation_samples)
# Shape info needed to build Decoder Model shape = K.int_shape(x) # Generate a 16-dim latent vector x = Flatten()(x) latent = Dense(latent_dim, name='latent_vector')(x) # Instantiate Encoder Model encoder = Model(inputs, latent, name='encoder') encoder.summary() plot_model(encoder, to_file='encoder.png', show_shapes=True) # Build the Decoder Model latent_inputs = Input(shape=(latent_dim,), name='decoder_input') x = Dense(shape[1]*shape[2]*shape[3])(latent_inputs) x = Reshape((shape[1], shape[2], shape[3]))(x) # Stack of BN-ReLU-Transposed Conv2D-UpSampling2D blocks for i in range(2): x = BatchNormalization()(x) x = Activation('relu')(x) x = Conv2DTranspose(filters=filters, kernel_size=kernel_size, padding='same')(x) x = UpSampling2D()(x) filters //= 2 x = Conv2DTranspose(filters=1, kernel_size=kernel_size, padding='same')(x)
# Layer 22 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_22', use_bias=False)(x) x = BatchNormalization(name='norm_22')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 23 x = Conv2D(BOX * (4 + 1 + CLASS), (1, 1), strides=(1, 1), padding='same', name='conv_23')(x) output = Reshape((GRID_H, GRID_W, BOX, 4 + 1 + CLASS))(x) # small hack to allow true_boxes to be registered when Keras build the model # for more information: https://github.com/fchollet/keras/issues/2790 output = Lambda(lambda args: args[0])([output, true_boxes]) model = Model([input_image, true_boxes], output) # In[ ]: model.summary() # # Load pretrained weights # **Load the weights originally provided by YOLO**
def Inception_Inflated3d(include_top=True, weights=None, input_tensor=None, input_shape=None, dropout_prob=0.0, endpoint_logit=True, classes=400): """Instantiates the Inflated 3D Inception v1 architecture. Optionally loads weights pre-trained on Kinetics. Note that when using TensorFlow, for best performance you should set `image_data_format='channels_last'` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. Note that the default input frame(image) size for this model is 224x224. # Arguments include_top: whether to include the the classification layer at the top of the network. weights: one of `None` (random initialization) or 'kinetics_only' (pre-training on Kinetics dataset only). or 'imagenet_and_kinetics' (pre-training on ImageNet and Kinetics datasets). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: optional shape tuple, only to be specified if `include_top` is False (otherwise the input shape has to be `(NUM_FRAMES, 224, 224, 3)` (with `channels_last` data format) or `(NUM_FRAMES, 3, 224, 224)` (with `channels_first` data format). It should have exactly 3 inputs channels. NUM_FRAMES should be no smaller than 8. The authors used 64 frames per example for training and testing on kinetics dataset Also, Width and height should be no smaller than 32. E.g. `(64, 150, 150, 3)` would be one valid value. dropout_prob: optional, dropout probability applied in dropout layer after global average pooling layer. 0.0 means no dropout is applied, 1.0 means dropout is applied to all features. Note: Since Dropout is applied just before the classification layer, it is only useful when `include_top` is set to True. endpoint_logit: (boolean) optional. If True, the model's forward pass will end at producing logits. Otherwise, softmax is applied after producing the logits to produce the class probabilities prediction. Setting this parameter to True is particularly useful when you want to combine results of rgb model and optical flow model. - `True` end model forward pass at logit output - `False` go further after logit to produce softmax predictions Note: This parameter is only useful when `include_top` is set to True. classes: optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. # Returns A Keras model instance. # Raises ValueError: in case of invalid argument for `weights`, or invalid input shape. """ if not (weights in WEIGHTS_NAME or weights is None or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or %s' % str(WEIGHTS_NAME) + ' ' 'or a valid path to a file containing `weights` values') if weights in WEIGHTS_NAME and include_top and classes != 400: raise ValueError('If using `weights` as one of these %s, with `include_top`' ' as true, `classes` should be 400' % str(WEIGHTS_NAME)) # Determine proper input shape input_shape = _obtain_input_shape( input_shape, default_frame_size=224, min_frame_size=32, default_num_frames=64, min_num_frames=8, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 4 # Downsampling via convolution (spatial and temporal) x = conv3d_bn(img_input, 64, 7, 7, 7, strides=(2, 2, 2), padding='same', name='Conv3d_1a_7x7') # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_2a_3x3')(x) x = conv3d_bn(x, 64, 1, 1, 1, strides=(1, 1, 1), padding='same', name='Conv3d_2b_1x1') x = conv3d_bn(x, 192, 3, 3, 3, strides=(1, 1, 1), padding='same', name='Conv3d_2c_3x3') # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_3a_3x3')(x) # Mixed 3b branch_0 = conv3d_bn(x, 64, 1, 1, 1, padding='same', name='Conv3d_3b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_3b_1a_1x1') branch_1 = conv3d_bn(branch_1, 128, 3, 3, 3, padding='same', name='Conv3d_3b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_3b_2a_1x1') branch_2 = conv3d_bn(branch_2, 32, 3, 3, 3, padding='same', name='Conv3d_3b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 32, 1, 1, 1, padding='same', name='Conv3d_3b_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3b') # Mixed 3c branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_1a_1x1') branch_1 = conv3d_bn(branch_1, 192, 3, 3, 3, padding='same', name='Conv3d_3c_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_3c_2a_1x1') branch_2 = conv3d_bn(branch_2, 96, 3, 3, 3, padding='same', name='Conv3d_3c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_3c_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3c') # Downsampling (spatial and temporal) x = MaxPooling3D((3, 3, 3), strides=(2, 2, 2), padding='same', name='MaxPool2d_4a_3x3')(x) # Mixed 4b branch_0 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_4b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_4b_1a_1x1') branch_1 = conv3d_bn(branch_1, 208, 3, 3, 3, padding='same', name='Conv3d_4b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_4b_2a_1x1') branch_2 = conv3d_bn(branch_2, 48, 3, 3, 3, padding='same', name='Conv3d_4b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4b_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4b') # Mixed 4c branch_0 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4c_0a_1x1') branch_1 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4c_1a_1x1') branch_1 = conv3d_bn(branch_1, 224, 3, 3, 3, padding='same', name='Conv3d_4c_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4c_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4c_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4c') # Mixed 4d branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_1a_1x1') branch_1 = conv3d_bn(branch_1, 256, 3, 3, 3, padding='same', name='Conv3d_4d_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4d_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4d_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4d_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4d_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4d') # Mixed 4e branch_0 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4e_0a_1x1') branch_1 = conv3d_bn(x, 144, 1, 1, 1, padding='same', name='Conv3d_4e_1a_1x1') branch_1 = conv3d_bn(branch_1, 288, 3, 3, 3, padding='same', name='Conv3d_4e_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4e_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4e_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4e_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4e_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4e') # Mixed 4f branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_4f_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4f_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_4f_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4f_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_4f_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4f_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_4f_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4f') # Downsampling (spatial and temporal) x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same', name='MaxPool2d_5a_2x2')(x) # Mixed 5b branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_5b_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5b') # Mixed 5c branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1') branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1') branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3') branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1') x = layers.concatenate( [branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5c') if include_top: # Classification block x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) x = Dropout(dropout_prob)(x) x = conv3d_bn(x, classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1') num_frames_remaining = int(x.shape[1]) x = Reshape((num_frames_remaining, classes))(x) # logits (raw scores for each class) x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False), output_shape=lambda s: (s[0], s[2]))(x) if not endpoint_logit: x = Activation('softmax', name='prediction')(x) else: h = int(x.shape[2]) w = int(x.shape[3]) x = AveragePooling3D((2, h, w), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) inputs = img_input # create model model = Model(inputs, x, name='i3d_inception') # load weights if weights in WEIGHTS_NAME: if weights == WEIGHTS_NAME[0]: # rgb_kinetics_only if include_top: weights_url = WEIGHTS_PATH['rgb_kinetics_only'] model_name = 'i3d_inception_rgb_kinetics_only.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['rgb_kinetics_only'] model_name = 'i3d_inception_rgb_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[1]: # flow_kinetics_only if include_top: weights_url = WEIGHTS_PATH['flow_kinetics_only'] model_name = 'i3d_inception_flow_kinetics_only.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['flow_kinetics_only'] model_name = 'i3d_inception_flow_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[2]: # rgb_imagenet_and_kinetics if include_top: weights_url = WEIGHTS_PATH['rgb_imagenet_and_kinetics'] model_name = 'i3d_inception_rgb_imagenet_and_kinetics.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['rgb_imagenet_and_kinetics'] model_name = 'i3d_inception_rgb_imagenet_and_kinetics_no_top.h5' elif weights == WEIGHTS_NAME[3]: # flow_imagenet_and_kinetics if include_top: weights_url = WEIGHTS_PATH['flow_imagenet_and_kinetics'] model_name = 'i3d_inception_flow_imagenet_and_kinetics.h5' else: weights_url = WEIGHTS_PATH_NO_TOP['flow_imagenet_and_kinetics'] model_name = 'i3d_inception_flow_imagenet_and_kinetics_no_top.h5' downloaded_weights_path = get_file(model_name, weights_url, cache_subdir='models') model.load_weights(downloaded_weights_path) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first' and K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your keras config ' 'at ~/.keras/keras.json.') elif weights is not None: model.load_weights(weights) return model
y_train = sequence.pad_sequences(y_trj, maxlen = maxLen, dtype='float', padding = 'post', value=0.) else: maxLen = inputSize #MODEL SETUP #Setup 1D PCA-like time-lagged autoencoder input_shape = Input(shape=(maxLen,3)) #Encoder: inputLayer = Flatten()(input_shape) encoded = Dense(bottleneck)(inputLayer) #Decoder: decoded = Dense(maxLen*3)(encoded) reshaped = Reshape((maxLen,3))(decoded) #Mask the padded data. if(pad): decoded = Masking(0.)(decoded) #Compile model autoencoder = Model(input_shape,reshaped) autoencoder.compile(optimizer='adadelta', loss='mean_squared_error') #Train! training_start = time.time() autoencoder.fit(x_train,y_train,epochs=nEpochs, batch_size=batchSize) training_end = time.time() #See performance on training data
processed_batch = batch.astype('bool') return processed_batch def process_reward(self, reward): # return np.clip(reward, -1., 1.) return reward # Next, we build our model. We use the same model that was described by Mnih et al. (2015). input_shape = (num_zones, MAP_X, MAP_Y) print input_shape print env.micro.map.getMapState().shape assert input_shape == env.micro.map.getMapState().shape model = Sequential() model.add(Reshape((input_shape), input_shape=(WINDOW_LENGTH,) + input_shape)) if K.image_dim_ordering() == 'tf': print('tensorflow ordering') # (width, height, channels) model.add(Permute((2, 3, 1), input_shape=input_shape)) permute_shape = (MAP_X, MAP_Y, num_zones) elif K.image_dim_ordering() == 'th': # (channels, width, height) model.add(Permute((1, 2, 3), input_shape=input_shape)) permute_shape = (num_zones, MAP_X, MAP_Y) else: raise RuntimeError('Unknown image_dim_ordering.') model.add(Convolution2D(32, (8, 8), strides=(2, 2), padding='same')) model.add(Activation('relu')) model.add(Convolution2D(64, (4, 4), strides=(2, 2), padding='same'))
def kimCNN(embedding_output_size, imput_size, vocab_size, num_labels=5,loss='categorical_crossentropy'): """ Convolution neural network model for sentence classification. Parameters ---------- embedding_output_size: Dimension of the embedding space. vocab_size: size of the vocabulary imput_size: number of features of the imput. num_labels: number of output labels Returns ------- compiled keras model """ print('Preparing embedding matrix.') embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_output_size, input_length=imput_size, trainable=True) print('Training model.') sequence_input = Input(shape=(imput_size,), dtype='int32') embedded_sequences = embedding_layer(sequence_input) print(embedded_sequences.shape) # add first conv filter embedded_sequences = Reshape((imput_size, embedding_output_size, 1))(embedded_sequences) x = Conv2D(100, (5, embedding_output_size), activation='relu')(embedded_sequences) x = MaxPooling2D((imput_size - 5 + 1, 1))(x) # add second conv filter. y = Conv2D(100, (4, embedding_output_size), activation='relu')(embedded_sequences) y = MaxPooling2D((imput_size - 4 + 1, 1))(y) # add third conv filter. z = Conv2D(100, (3, embedding_output_size), activation='relu')(embedded_sequences) z = MaxPooling2D((imput_size - 3 + 1, 1))(z) # concate the conv layers alpha = concatenate([x, y, z]) # flatted the pooled features. alpha = Flatten()(alpha) # dropout alpha = Dropout(0.5)(alpha) # predictions preds = Dense(num_labels, activation='softmax')(alpha) # build model model = Model(sequence_input, preds) adadelta = optimizers.Adadelta() model.compile(loss=loss, optimizer=adadelta, metrics=['acc']) model.summary() return model
input_data_test = np.load('vectorized_883_400_test.npy') else: input_data = build_input('train', 400) input_data_test = build_input('test', 400) output_data = build_output('train') output_data_test = build_output('test') ## CNN classifier ###### from keras.layers import Conv1D, MaxPooling1D, Flatten, Dropout ##Building CNN model = Sequential() model.add( Embedding(int(np.max(input_data)), 200, input_length=input_data.shape[1])) model.add(Reshape((200, input_data.shape[1]))) model.add(Conv1D(50, 10)) model.add(MaxPooling1D(5)) model.add(Dropout(0.3)) model.add(Conv1D(50, 10)) model.add(MaxPooling1D(5)) model.add(Dropout(0.3)) model.add(Flatten()) model.add(Dense(100, activation='relu')) model.add(Dropout(0.3)) model.add(Dense(100, activation='relu')) model.add(Dropout(0.3)) model.add((Dense(output_data.shape[1], activation='sigmoid'))) ##Compilation and training model.compile(loss='binary_crossentropy', optimizer='nadam')
def myCrossLayer(nb_flow=2, map_height=16, map_width=8, nb_layers=3, window_len=12, nb_filter=64, external_dim=None, filter_size=3): """ the final model :param nb_flow: number of measurements, also number of channels of each picture sample :param map_height: grid map height, here is 16 :param map_width: grid map width, here is 8 :param nb_layers: number of cnn layers :return: """ window_len_pic_fea = [] main_inputs = [] if external_dim == None: for i in range(window_len): inputs = Input(shape=(nb_flow, map_height, map_width)) main_inputs.append(inputs) cnn_fea = dense_conv3D(nb_filter=nb_filter, nb_col=filter_size, nb_row=filter_size, padding='same', nb_layers=nb_layers, dense_units=1024, dropout_rate=0.5)(inputs) # cnn_fea_flatten = Reshape(([nb_layers * 1024]))(cnn_fea) cnn_fea_flatten = Reshape(([1024]))(cnn_fea) # cnn_fea_flatten = Dropout(rate=0.3)(cnn_fea_flatten) # cnn_fea_flatten = expand_dims(cnn_fea_flatten, axis=1) cnn_fea_flatten = Lambda(expand_dim_backend)(cnn_fea_flatten) window_len_pic_fea.append(cnn_fea_flatten) # add external feature here if external_dim != None and external_dim > 0: for i in range(window_len): # todo : use two tensor to represent the data and meta_data respectively inputs = Input(shape=((nb_flow, map_height, map_width), external_dim)) main_inputs.append(inputs) inputs_0 = inputs inputs_1 = inputs cnn_fea = dense_conv3D(nb_filter=nb_filter, nb_col=filter_size, nb_row=filter_size, padding='same', nb_layers=nb_layers, dense_units=1024, dropout_rate=0.5)(inputs_0) # cnn_fea_flatten = Reshape(([nb_layers * 1024]))(cnn_fea) cnn_fea_flatten = Reshape(([1024]))(cnn_fea) # cnn_fea_flatten = Dropout(rate=0.3)(cnn_fea_flatten) # cnn_fea_flatten = expand_dims(cnn_fea_flatten, axis=1) cnn_fea_flatten = Lambda(expand_dim_backend)(cnn_fea_flatten) window_len_pic_fea.append(cnn_fea_flatten) external_input = inputs_1 # external_input = Input(shape=(external_dim,)) main_inputs.append(external_input) # todo: change the code here embedding = Dense(nb_layers * 1024, activation='relu')(external_input) external_out = Lambda(expand_dim_backend)(embedding) new_concatenate_fea = [] for pic_fea in window_len_pic_fea: tmp_con = Concatenate(axis=-1)([pic_fea, external_out]) new_concatenate_fea.append(tmp_con) window_len_pic_fea = new_concatenate_fea outputs = add_densenet(nb_flow=nb_flow, map_height=map_height, map_width=map_width)(window_len_pic_fea) # outputs = add_lstm(nb_flow=nb_flow, map_height=map_height, map_width=map_width)(window_len_pic_fea) # outputs = attention_after_LSTM(nb_flow=nb_flow, map_height=map_height, # map_width=map_width, window_len=window_len)(window_len_pic_fea) model = Model(inputs=main_inputs, outputs=outputs) return model
PReLU(alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=[1, 2])) # model.add(BatchNormalization()) model.add(Conv2D(c, (5, 5), padding='same', strides=1)) model.add( PReLU(alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=[1, 2])) # model.add(BatchNormalization(name='last')) model.add(Flatten(name='last')) model.add(ChannelNormalizer(sqrtk, name='normal')) model.add(ChannelNoise(std, name='noise')) model.add(Reshape([8, 8, c])) # Decoder model.add(Conv2DTranspose(32, (5, 5), padding='same', strides=1)) model.add( PReLU(alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=[1, 2])) # model.add(BatchNormalization()) model.add(Conv2DTranspose(32, (5, 5), padding='same', strides=1)) model.add( PReLU(alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=[1, 2]))
def __init__(self): super(CNNLSTMModel, self).__init__() _NUM_CLASSES = 4 input_shape = ( 128, 128, 1, ) self.input_layer = Input(shape=input_shape) bn_axis = 2 # Block 1 self.bn1 = BatchNormalization(axis=bn_axis) self.conv1 = Conv2D(64, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv1') self.pool1 = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool1') # Block 2 self.bn2 = BatchNormalization(axis=bn_axis) self.conv2 = Conv2D(128, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv2') self.pool2 = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool2') # Block 3 self.bn3 = BatchNormalization(axis=bn_axis) self.conv3_1 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv3/conv3_1') self.conv3_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv3/conv3_2') self.pool3 = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool3') # Block 4 self.bn4 = BatchNormalization(axis=bn_axis) self.conv4_1 = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv4/conv4_1') self.conv4_2 = Conv2D(512, (3, 3), strides=(1, 1), activation='relu', padding='same', name='conv4/conv4_2') self.pool4 = MaxPooling2D((2, 2), strides=(2, 2), padding='same', name='pool4') self.reshape1 = Reshape((8, -1)) self.lstm1 = LSTM(128, return_sequences=True, return_state=False, kernel_regularizer=regularizers.l2(0.01), stateful=False) self.lstm2 = LSTM(128, return_sequences=True, return_state=False, kernel_regularizer=regularizers.l2(0.01), stateful=False) self.dropout1 = Dropout(0.5) self.dense1 = Dense(_NUM_CLASSES, activation='softmax') outputs = self.create_model() self.inputs = self.input_layer self.outputs = outputs self.build(input_shape)
def CreateModel(self): ''' 定义CNN/LSTM/CTC模型,使用函数式模型 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) 隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2 隐藏层:全连接层 输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出 ''' input_data = Input(name='the_input', shape=(self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) layer_h1 = Conv2D(32, (3,3), use_bias=False, activation='relu', padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 layer_h1 = Dropout(0.05)(layer_h1) layer_h2 = Conv2D(32, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 layer_h3 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h2) # 池化层 #layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 layer_h3 = Dropout(0.05)(layer_h3) layer_h4 = Conv2D(64, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 layer_h4 = Dropout(0.1)(layer_h4) layer_h5 = Conv2D(64, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 layer_h6 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h5) # 池化层 layer_h6 = Dropout(0.1)(layer_h6) layer_h7 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 layer_h7 = Dropout(0.15)(layer_h7) layer_h8 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 layer_h9 = MaxPooling2D(pool_size=2, strides=None, padding="valid")(layer_h8) # 池化层 layer_h9 = Dropout(0.15)(layer_h9) layer_h10 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 layer_h10 = Dropout(0.2)(layer_h10) layer_h11 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 layer_h12 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 layer_h12 = Dropout(0.2)(layer_h12) layer_h13 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h12) # 卷积层 layer_h13 = Dropout(0.2)(layer_h13) layer_h14 = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h13) # 卷积层 layer_h15 = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h14) # 池化层 #test=Model(inputs = input_data, outputs = layer_h12) #test.summary() layer_h16 = Reshape((200, 3200))(layer_h15) #Reshape层 #layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 #layer_h6 = Dropout(0.2)(layer_h5) # 随机中断部分神经网络连接,防止过拟合 layer_h16 = Dropout(0.3)(layer_h16) layer_h17 = Dense(128, activation="relu", use_bias=True, kernel_initializer='he_normal')(layer_h16) # 全连接层 layer_h17 = Dropout(0.3)(layer_h17) layer_h18 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, kernel_initializer='he_normal')(layer_h17) # 全连接层 y_pred = Activation('softmax', name='Activation0')(layer_h18) model_data = Model(inputs = input_data, outputs = y_pred) #model_data.summary() labels = Input(name='the_labels', shape=[self.label_max_string_length], dtype='float32') input_length = Input(name='input_length', shape=[1], dtype='int64') label_length = Input(name='label_length', shape=[1], dtype='int64') # Keras doesn't currently support loss funcs with extra parameters # so CTC loss is implemented in a lambda layer #layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) model = Model(inputs=[input_data, labels, input_length, label_length], outputs=loss_out) model = ParallelModel(model, NUM_GPU) model.summary() # clipnorm seems to speeds up convergence #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) #opt = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06) opt = Adam(lr = 0.001, beta_1 = 0.9, beta_2 = 0.999, decay = 0.0, epsilon = 10e-8) #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer = opt) # captures output of softmax so we can decode the output during visualization test_func = K.function([input_data], [y_pred]) #print('[*提示] 创建模型成功,模型编译成功') print('[*Info] Create Model Successful, Compiles Model Successful. ') return model, model_data
def Inception_Inflated3d(include_top=True, weights=None, input_tensor=None, input_shape=None, dropout_prob=0.0, endpoint_logit=True, classes=400): if not (weights in WEIGHTS_NAME or weights is None or os.path.exists(weights)): raise ValueError( 'The `weights` argument should be either ' '`None` (random initialization) or %s' % str(WEIGHTS_NAME) + ' ' 'or a valid path to a file containing `weights` values') if weights in WEIGHTS_NAME and include_top and classes != 400: raise ValueError( 'If using `weights` as one of these %s, with `include_top`' ' as true, `classes` should be 400' % str(WEIGHTS_NAME)) # Determine proper input shape input_shape = _obtain_input_shape(input_shape, default_frame_size=224, min_frame_size=32, default_num_frames=64, min_num_frames=8, data_format=K.image_data_format(), require_flatten=include_top, weights=weights) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 4 # Downsampling via convolution (spatial and temporal) x = conv3d_bn(img_input, 64, 7, 7, 7, strides=(2, 2, 2), padding='same', name='Conv3d_1a_7x7') #print(x) now = datetime.datetime.now() timestamp = str(now) #np.save('/home/mech/btech/me1130654/keras-kinetics-i3d/'+timestamp+'.npy', x) # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_2a_3x3')(x) x = conv3d_bn(x, 64, 1, 1, 1, strides=(1, 1, 1), padding='same', name='Conv3d_2b_1x1') x = conv3d_bn(x, 192, 3, 3, 3, strides=(1, 1, 1), padding='same', name='Conv3d_2c_3x3') # Downsampling (spatial only) x = MaxPooling3D((1, 3, 3), strides=(1, 2, 2), padding='same', name='MaxPool2d_3a_3x3')(x) # Mixed 3b branch_0 = conv3d_bn(x, 64, 1, 1, 1, padding='same', name='Conv3d_3b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_3b_1a_1x1') branch_1 = conv3d_bn(branch_1, 128, 3, 3, 3, padding='same', name='Conv3d_3b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_3b_2a_1x1') branch_2 = conv3d_bn(branch_2, 32, 3, 3, 3, padding='same', name='Conv3d_3b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 32, 1, 1, 1, padding='same', name='Conv3d_3b_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3b') # Mixed 3c branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_3c_1a_1x1') branch_1 = conv3d_bn(branch_1, 192, 3, 3, 3, padding='same', name='Conv3d_3c_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_3c_2a_1x1') branch_2 = conv3d_bn(branch_2, 96, 3, 3, 3, padding='same', name='Conv3d_3c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_3c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_3c_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_3c') # Downsampling (spatial and temporal) x = MaxPooling3D((3, 3, 3), strides=(2, 2, 2), padding='same', name='MaxPool2d_4a_3x3')(x) # Mixed 4b branch_0 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_4b_0a_1x1') branch_1 = conv3d_bn(x, 96, 1, 1, 1, padding='same', name='Conv3d_4b_1a_1x1') branch_1 = conv3d_bn(branch_1, 208, 3, 3, 3, padding='same', name='Conv3d_4b_1b_3x3') branch_2 = conv3d_bn(x, 16, 1, 1, 1, padding='same', name='Conv3d_4b_2a_1x1') branch_2 = conv3d_bn(branch_2, 48, 3, 3, 3, padding='same', name='Conv3d_4b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4b_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4b') # Mixed 4c branch_0 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4c_0a_1x1') branch_1 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4c_1a_1x1') branch_1 = conv3d_bn(branch_1, 224, 3, 3, 3, padding='same', name='Conv3d_4c_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4c_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4c_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4c') # Mixed 4d branch_0 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_0a_1x1') branch_1 = conv3d_bn(x, 128, 1, 1, 1, padding='same', name='Conv3d_4d_1a_1x1') branch_1 = conv3d_bn(branch_1, 256, 3, 3, 3, padding='same', name='Conv3d_4d_1b_3x3') branch_2 = conv3d_bn(x, 24, 1, 1, 1, padding='same', name='Conv3d_4d_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4d_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4d_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4d_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4d') # Mixed 4e branch_0 = conv3d_bn(x, 112, 1, 1, 1, padding='same', name='Conv3d_4e_0a_1x1') branch_1 = conv3d_bn(x, 144, 1, 1, 1, padding='same', name='Conv3d_4e_1a_1x1') branch_1 = conv3d_bn(branch_1, 288, 3, 3, 3, padding='same', name='Conv3d_4e_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4e_2a_1x1') branch_2 = conv3d_bn(branch_2, 64, 3, 3, 3, padding='same', name='Conv3d_4e_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4e_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 64, 1, 1, 1, padding='same', name='Conv3d_4e_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4e') # Mixed 4f branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_4f_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_4f_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_4f_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_4f_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_4f_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_4f_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_4f_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_4f') # Downsampling (spatial and temporal) x = MaxPooling3D((2, 2, 2), strides=(2, 2, 2), padding='same', name='MaxPool2d_5a_2x2')(x) # Mixed 5b branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='Conv3d_5b_0a_1x1') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5b') # Mixed 5c branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1') branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1') branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3') branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=channel_axis, name='Mixed_5c') if include_top: # Classification block x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) print(x.shape) x = Dropout(dropout_prob)(x) x = conv3d_bn(x, classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1') print(x.shape) num_frames_remaining = int(x.shape[1]) x = Reshape((num_frames_remaining, classes))(x) print(x.shape, num_frames_remaining) # logits (raw scores for each class) x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False), output_shape=lambda s: (s[0], s[2]))(x) if not endpoint_logit: x = Activation('softmax', name='prediction')(x) else: h = int(x.shape[2]) w = int(x.shape[3]) # print("h and w", h, w) x = AveragePooling3D((2, h, w), strides=(1, 1, 1), padding='valid', name='global_avg_pool')(x) print('droput used') x = Dropout(dropout_prob)(x) x = conv3d_bn(x, classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1') x = Reshape((-1, classes))(x) # logits (raw scores for each class) x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False), output_shape=lambda s: (None, classes))(x) if not endpoint_logit: x = Activation('softmax', name='prediction')(x) inputs = img_input # create model model = Model(inputs, x, name='i3d_inception') # load weights if weights in WEIGHTS_NAME: if weights == WEIGHTS_NAME[0]: # rgb_kinetics_only if include_top: model_weights_path = WEIGHTS_PATH['rgb_kinetics_only'] # model_name = 'i3d_inception_rgb_kinetics_only.h5' else: model_weights_path = WEIGHTS_PATH_NO_TOP['rgb_kinetics_only'] # model_name = 'i3d_inception_rgb_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[1]: # flow_kinetics_only if include_top: model_weights_path = WEIGHTS_PATH['flow_kinetics_only'] # model_name = 'i3d_inception_flow_kinetics_only.h5' else: model_weights_path = WEIGHTS_PATH_NO_TOP['flow_kinetics_only'] # model_name = 'i3d_inception_flow_kinetics_only_no_top.h5' elif weights == WEIGHTS_NAME[2]: # rgb_imagenet_and_kinetics if include_top: model_weights_path = WEIGHTS_PATH['rgb_imagenet_and_kinetics'] # model_name = 'i3d_inception_rgb_imagenet_and_kinetics.h5' else: model_weights_path = WEIGHTS_PATH_NO_TOP[ 'rgb_imagenet_and_kinetics'] # model_name = 'i3d_inception_rgb_imagenet_and_kinetics_no_top.h5' elif weights == WEIGHTS_NAME[3]: # flow_imagenet_and_kinetics if include_top: model_weights_path = WEIGHTS_PATH['flow_imagenet_and_kinetics'] # model_name = 'i3d_inception_flow_imagenet_and_kinetics.h5' else: model_weights_path = WEIGHTS_PATH_NO_TOP[ 'flow_imagenet_and_kinetics'] # model_name = 'i3d_inception_flow_imagenet_and_kinetics_no_top.h5' # downloaded_weights_path = get_file(model_name, weights_url, cache_subdir='models') model.load_weights(model_weights_path, by_name=True) if K.backend() == 'theano': layer_utils.convert_all_kernels_in_model(model) if K.image_data_format() == 'channels_first' and K.backend( ) == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' 'image data format convention ' '(`image_data_format="channels_first"`). ' 'For best performance, set ' '`image_data_format="channels_last"` in ' 'your keras config ' 'at ~/.keras/keras.json.') elif weights is not None: model.load_weights(weights, by_name=True) return model
def attentive_reader(num_words, embeddings_matrix, ce_loader, scope, embedding_dim=64): # (batch, input_len) => (batch, input_len, embedding_dim) q_input = Input(shape=(QUESTION_LEN,), name="q_input") a_input = Input(shape=(ANSWER_LEN,), name="a_input") c_input = Input(shape=(CONTEXT_LEN,), name="c_input") q_emb = Embedding(input_dim=num_words + 1, # word 0 used for padding output_dim=embedding_dim, weights=[embeddings_matrix], input_length=QUESTION_LEN, name="embedding_q_" + scope, mask_zero=False, trainable=False) a_emb = Embedding(input_dim=num_words + 1, # word 0 used for padding output_dim=embedding_dim, weights=[embeddings_matrix], input_length=ANSWER_LEN, name="embedding_a_" + scope, mask_zero=False, trainable=False) c_emb = Embedding(input_dim=num_words + 1, # word 0 used for padding output_dim=embedding_dim, weights=[embeddings_matrix], input_length=CONTEXT_LEN, name="embedding_c_" + scope, mask_zero=False, trainable=False) q = q_emb(q_input) a = a_emb(a_input) c = c_emb(c_input) q = TimeDistributed(Dense(300, activation='tanh'))(q) a = TimeDistributed(Dense(300, activation='tanh'))(a) c = TimeDistributed(Dense(300, activation='tanh'))(c) # q = Dropout(0.25)(q) # a = Dropout(0.25)(a) # c = Dropout(0.25)(c) q_lstm = Bidirectional(LSTM(50, recurrent_dropout=0.35))(q) c_lstm = Bidirectional(LSTM(50, recurrent_dropout=0.35, return_sequences=True))(c) aux1 = TimeDistributed(Dense(200, activation=None, use_bias=False))(c_lstm) aux2 = Dense(200, activation=None, use_bias=False)(q_lstm) aux2 = RepeatVector(CONTEXT_LEN)(aux2) mt = Add()([aux1, aux2]) mt = TimeDistributed(Activation('tanh'))(mt) st = TimeDistributed(Dense(1, activation=None, use_bias=False))(mt) st = Reshape((CONTEXT_LEN,))(st) st = Activation('softmax')(st) st = Reshape((CONTEXT_LEN, 1))(st) c_lstm = Permute((2, 1))(c_lstm) r = Lambda(lambda x: K.batch_dot(x[0], x[1]))([c_lstm, st]) r = Reshape((-1,))(r) # Combine document attention and query (question). aux1 = Dense(450, activation=None, use_bias=False)(q_lstm) r = Dense(450, activation=None, use_bias=False)(r) gAS = Add()([r, aux1]) gAS = Activation('tanh')(gAS) a_lstm = Bidirectional(LSTM(50, recurrent_dropout=0.25))(a) cqa = concatenate([gAS, a_lstm], axis=1) cqa = Dense(250, activation='relu')(cqa) cqa = Dropout(0.40)(cqa) cqa = Dense(250, activation='relu')(cqa) cqa = Dropout(0.15)(cqa) output = Dense(2, activation='softmax')(cqa) model = Model(inputs=[q_input, a_input, c_input], outputs=[output]) model.compile(loss=categorical_crossentropy, optimizer='adam', metrics=['accuracy']) plot_model(model, to_file='2way_model.png', show_shapes=True) return model
pass print('directory_name: ' + directory_name) print('path_to_dir: ' + path_to_dir) # Model ## ====================================================================================================== print("Creating Model...") inputs = Input(shape=(sequence_length, ), dtype='int32') embedding = Embedding(input_dim=len(word_index) + 1, weights=[embedding_matrix], output_dim=embedding_dim, input_length=sequence_length, trainable=True)(inputs) dropout_1 = Dropout(drop)(embedding) reshape1 = Reshape((sequence_length, embedding_dim, 1))(dropout_1) # Conv1 conv_1 = Conv2D(num_filters, kernel_size=(filter_sizes[0], embedding_dim), use_bias=True, strides=1, padding='valid', activation=activation1, name='conv_1')(reshape1) maxpool_1 = MaxPool2D(pool_size=pool_size, strides=stride_size, padding=padding, name='pool_1')(conv_1) maxpool_1_reshape = Reshape( (int(maxpool_1.shape[1]), int(maxpool_1.shape[3]), 1), name='pool_1_reshaped')(maxpool_1)
def YOLOMODEL(path): input_image = Input(shape=(input_size, input_size, 3)) true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4)) # Layer 1 x = Conv2D(32, (3, 3), strides=(1, 1), padding='same', name='conv_1', use_bias=False)(input_image) x = BatchNormalization(name='norm_1')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 2 x = Conv2D(64, (3, 3), strides=(1, 1), padding='same', name='conv_2', use_bias=False)(x) x = BatchNormalization(name='norm_2')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 3 x = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_3', use_bias=False)(x) x = BatchNormalization(name='norm_3')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 4 x = Conv2D(64, (1, 1), strides=(1, 1), padding='same', name='conv_4', use_bias=False)(x) x = BatchNormalization(name='norm_4')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 5 x = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name='conv_5', use_bias=False)(x) x = BatchNormalization(name='norm_5')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 6 x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_6', use_bias=False)(x) x = BatchNormalization(name='norm_6')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 7 x = Conv2D(128, (1, 1), strides=(1, 1), padding='same', name='conv_7', use_bias=False)(x) x = BatchNormalization(name='norm_7')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 8 x = Conv2D(256, (3, 3), strides=(1, 1), padding='same', name='conv_8', use_bias=False)(x) x = BatchNormalization(name='norm_8')(x) x = LeakyReLU(alpha=0.1)(x) x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 9 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_9', use_bias=False)(x) x = BatchNormalization(name='norm_9')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 10 x = Conv2D(256, (1, 1), strides=(1, 1), padding='same', name='conv_10', use_bias=False)(x) x = BatchNormalization(name='norm_10')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 11 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_11', use_bias=False)(x) x = BatchNormalization(name='norm_11')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 12 x = Conv2D(256, (1, 1), strides=(1, 1), padding='same', name='conv_12', use_bias=False)(x) x = BatchNormalization(name='norm_12')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 13 x = Conv2D(512, (3, 3), strides=(1, 1), padding='same', name='conv_13', use_bias=False)(x) x = BatchNormalization(name='norm_13')(x) x = LeakyReLU(alpha=0.1)(x) skip_connection = x x = MaxPooling2D(pool_size=(2, 2))(x) # Layer 14 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_14', use_bias=False)(x) x = BatchNormalization(name='norm_14')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 15 x = Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_15', use_bias=False)(x) x = BatchNormalization(name='norm_15')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 16 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_16', use_bias=False)(x) x = BatchNormalization(name='norm_16')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 17 x = Conv2D(512, (1, 1), strides=(1, 1), padding='same', name='conv_17', use_bias=False)(x) x = BatchNormalization(name='norm_17')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 18 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_18', use_bias=False)(x) x = BatchNormalization(name='norm_18')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 19 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_19', use_bias=False)(x) x = BatchNormalization(name='norm_19')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 20 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_20', use_bias=False)(x) x = BatchNormalization(name='norm_20')(x) x = LeakyReLU(alpha=0.1)(x) # Layer 21 skip_connection = Conv2D(64, (1, 1), strides=(1, 1), padding='same', name='conv_21', use_bias=False)(skip_connection) skip_connection = BatchNormalization( name='norm_21')(skip_connection) skip_connection = LeakyReLU(alpha=0.1)(skip_connection) skip_connection = Lambda(space_to_depth_x2)(skip_connection) x = concatenate([skip_connection, x]) # Layer 22 x = Conv2D(1024, (3, 3), strides=(1, 1), padding='same', name='conv_22', use_bias=False)(x) x = BatchNormalization(name='norm_22')(x) x = LeakyReLU(alpha=0.1)(x) feature_extractor = Model(input_image, x, name='FULLYOLO') features = feature_extractor(input_image) grid_h, grid_w = feature_extractor.get_output_shape_at(-1)[1:3] # make the object detection layer output = Conv2D(nb_box * (4 + 1 + nb_class), (1, 1), strides=(1, 1), padding='same', name='conv_23', kernel_initializer='lecun_normal')(features) output = Reshape( (grid_h, grid_w, nb_box, 4 + 1 + nb_class))(output) output = Lambda(lambda args: args[0])([output, true_boxes]) model = Model([input_image, true_boxes], output) # initialize the weights of the detection layer layer = model.layers[-4] weights = layer.get_weights() new_kernel = np.random.normal( size=weights[0].shape) / (grid_h * grid_w) new_bias = np.random.normal(size=weights[1].shape) / (grid_h * grid_w) layer.set_weights([new_kernel, new_bias]) model.load_weights(path) #print(model.summary()) return model
def make_generator(dense=True, labels_size=10): """Creates a generator model that takes a 100-dimensional noise vector as a "seed", and outputs images of size 28x28x1.""" model = Sequential() # ------------------------------ Layer 1: Dense + LeakyReLu --------------------------------------- if dense: model.add(Dense(1024, input_dim=100 + labels_size)) model.add(LeakyReLU()) model.add(Dense(128 * 7 * 7)) else: model.add(Dense(128 * 7 * 7, input_dim=100 + labels_size)) # ------------------------------ Layer 2: Dense + LeakyReLu --------------------------------------- model.add(BatchNormalization()) model.add(LeakyReLU()) # - - - - - - - - - - - - - - - - - - - Reshape - - - - - - - - - - - - - - - - if K.image_data_format() == 'channels_first': # size: 128 x 7 x 7 model.add(Reshape((128, 7, 7), input_shape=(128 * 7 * 7, ))) bn_axis = 1 # first else: # size: 7 x 7 x 128 model.add(Reshape((7, 7, 128), input_shape=(128 * 7 * 7, ))) bn_axis = -1 # last # ------------------------------ Layer 3: DeConv2D + LeakyReLu --------------------------------------- model.add( Conv2DTranspose(filters=128, kernel_size=(5, 5), strides=2, padding='same')) model.add(BatchNormalization(axis=bn_axis)) model.add(LeakyReLU()) # ------------------------------ Layer 4: Conv2D + LeakyReLu --------------------------------------- model.add(Convolution2D(64, (5, 5), padding='same')) model.add(BatchNormalization(axis=bn_axis)) model.add(LeakyReLU()) # ------------------------------ Layer 5: DeConv2D + LeakyReLu --------------------------------------- model.add(Conv2DTranspose(64, (5, 5), strides=2, padding='same')) model.add(BatchNormalization(axis=bn_axis)) model.add(LeakyReLU()) # ------------------------------ Layer 6: Conv2D + Tanh --------------------------------------- # Because we normalized training inputs to lie in the range [-1, 1], # the tanh function should be used for the output of the generator to ensure its output # also lies in this range. model.add(Convolution2D(1, (5, 5), padding='same', activation='tanh')) # our idea: # seed 100 # layer1: dense 1024 # layer2: dense 7*7*128 # reshape 7 x 7 x 128 # layer3: Deconv 14 x 14 x 128 # layer4: Conv 14 x 14 x 64 # layer5: Deconv 28 x 28 x 64 # layer6: Conv 28 x 28 x 1 return model