def MixSymbolDemodulator(input_shape, filter_num, label_size, signal_type_shape, dropout=0.0): input = Input(shape=input_shape) signal_type = Input(shape=signal_type_shape) kernel_sizes = [(3, 1), (16, 1)] kernel2_sizes = [4, 3] all_strides = [1, 2] outputs = list() for kernel_size, kernel2_size, strides in zip(kernel_sizes, kernel2_sizes, all_strides): conv1Output = Conv2D(filters=filter_num, kernel_size=kernel_size, strides=strides)(input) conv1Output = Reshape([ conv1Output.get_shape().as_list()[1], conv1Output.get_shape().as_list()[3] ])(conv1Output) # conv1Output = BatchNormalization()(conv1Output) conv1Output = Activation('selu')(conv1Output) conv1Output = Dropout(dropout)(conv1Output) conv1Output = Concatenate(axis=-1)([ conv1Output, RepeatVector(conv1Output.get_shape().as_list()[1])(signal_type) ]) conv1Output = Reshape([ conv1Output.get_shape().as_list()[1], conv1Output.get_shape().as_list()[2], 1 ])(conv1Output) conv2Output = Conv2D( filters=filter_num / 2, kernel_size=[kernel2_size, conv1Output.get_shape().as_list()[2]], strides=[2, 1])(conv1Output) conv2Output = Reshape([ conv2Output.get_shape().as_list()[1], conv2Output.get_shape().as_list()[3], 1 ])(conv2Output) # conv2Output = BatchNormalization()(conv2Output) conv2Output = Activation('selu')(conv2Output) pooling1Output = MaxPool2D((2, 1))(conv2Output) pooling1Output = Flatten()(pooling1Output) dropout2Output = Dropout(dropout)(pooling1Output) output = Dense(16, activation=relu)(dropout2Output) outputs.append(output) outputs.append(signal_type) output = Concatenate(axis=-1)(outputs) output = Dense(label_size, activation=softmax)(output) return Model([input, signal_type], output)
def create_convlstm_model(fingerprint_input, model_settings, is_training): tf.logging.info("conv lstm is used") if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') input_frequency_size = model_settings['dct_coefficient_count'] input_time_size = model_settings['spectrogram_length'] label_count = model_settings['label_count'] x = Reshape((14, int(input_time_size / 14), input_frequency_size, 1))(fingerprint_input) x = ConvLSTM2D(64, (8, 20))(x) x = MaxPooling2D(pool_size=(3, 3), padding='same')(x) if is_training: x = tf.nn.dropout(x, dropout_prob) shape = x.get_shape().as_list() #5d tf.logging.info("second layer shape:" + str(shape)) x = Conv2D(64, (2, 10), activation='relu', padding='same')(x) x = MaxPooling2D(pool_size=(2, 2), padding='same')(x) if is_training: x = tf.nn.dropout(x, dropout_prob) x = Flatten()(x) x = Dense(label_count, activation='tanh')(x) if is_training: return x, dropout_prob else: return x
def create_lstm_model(fingerprint_input, model_settings, is_training): tf.logging.info("lstm is used") if is_training: dropout_prob = tf.placeholder(tf.float32, name='dropout_prob') input_frequency_size = model_settings['dct_coefficient_count'] input_time_size = model_settings['spectrogram_length'] label_count = model_settings['label_count'] x = Reshape((input_time_size, input_frequency_size))(fingerprint_input) x = LSTM(1024)(x) if is_training: x = tf.nn.dropout(x, dropout_prob) shape = x.get_shape().as_list() #5d tf.logging.info("second layer shape:" + str(shape)) x = Dense(label_count, activation='tanh')(x) if is_training: return x, dropout_prob else: return x
def renet_module(X,hidden_size,receptive_filter_size=2, batch_size=1): _, X_height, X_width, X_channel= X.get_shape() print X_height vertical_rnn_inputs_fw,vertical_rnn_inputs_rev,horizontal_rnn_inputs_fw,horizontal_rnn_inputs_rev = rnn_input_layer(receptive_filter_size)(X) renet1 = CuDNNLSTM(hidden_size, return_sequences=True)(vertical_rnn_inputs_fw) print renet1.shape renet2 = CuDNNLSTM(hidden_size, return_sequences=True)(vertical_rnn_inputs_rev) print renet2.shape renet3 = CuDNNLSTM(hidden_size, return_sequences=True)(horizontal_rnn_inputs_fw) renet4 = CuDNNLSTM(hidden_size, return_sequences=True)(horizontal_rnn_inputs_rev) renet_concat = Concatenate(axis=2)([renet1, renet2, renet3, renet4]) print renet_concat.shape renet = Reshape((int(X_height)/receptive_filter_size, int(X_width)/receptive_filter_size, -1))(renet_concat) print renet.get_shape() return renet
def getModel(C, H, W, classes, h=None, mode="train"): inp = Input(shape=(C, H, W)) out = conv_bn_relu(inp, 64, 3) out = conv_bn_relu(out, 64, 3) out = MaxPooling2D(pool_size=(2, 2))(out) out = conv_bn_relu(out, 128, 3) out = conv_bn_relu(out, 128, 3) out = MaxPooling2D(pool_size=(2, 2))(out) out = conv_bn_relu(out, 256, 3) out = conv_bn_relu(out, 256, 3) out = conv_bn_relu(out, 256, 3) out = MaxPooling2D(pool_size=(2, 2))(out) out = conv_bn_relu(out, 512, 3) out = conv_bn_relu(out, 512, 3) out = conv_bn_relu(out, 512, 3) if mode != "dense": if h is None: _, c, h, w = [d.value for d in out.get_shape()] out = conv_bn_relu(out, 512, h, padding='valid') out = conv_bn_relu(out, classes, 1, activation=None, padding='valid') if mode == "train": out = Reshape((classes, -1))(out) out = Permute((2, 1))(out) out = Activation("softmax")(out) out = Flatten()(out) if mode == "test": _, c, h, w = [d.value for d in out.get_shape()] out = Reshape((classes, -1))(out) out = Permute((2, 1))(out) out = Activation("softmax")(out) out = Permute((2, 1))(out) out = Reshape((c, h, w))(out) if mode == "dense": out = MaxPooling2D(pool_size=(2, 2))(out) out = Flatten()(out) out = Dense(512, activation='relu')(out) out = Dense(classes, activation='softmax')(out) #out = Permute((2,1))(out) #out = Activation("softmax")(out) ##out = Permute((2,1))(out) #out = Reshape((c,h,w))(out) #out = Flatten()(out) #out = Dense(512,activation="relu")(out) #out = Dense(classes, activation="softmax")(out) return Model(inp, out)
def MixSignalDecoder(input_shape, filter_num, kernel_size, strides, label_size, signal_type_shape, dropout=0.0): input = Input(shape=input_shape) signal_type = Input(shape=signal_type_shape) conv1Output = Conv2D(filters=filter_num, kernel_size=kernel_size, strides=strides)(input) conv1Output = Reshape([ conv1Output.get_shape().as_list()[1], conv1Output.get_shape().as_list()[3] ])(conv1Output) # conv1Output = BatchNormalization()(reshapeOutput) conv1Output = Activation('selu')(conv1Output) conv1Output = Dropout(dropout)(conv1Output) conv1Output = Concatenate(axis=-1)([ conv1Output, RepeatVector(conv1Output.get_shape().as_list()[1])(signal_type) ]) conv1Output = Reshape([ conv1Output.get_shape().as_list()[1], conv1Output.get_shape().as_list()[2], 1 ])(conv1Output) conv2Output = ZeroPadding2D((1, 0))(conv1Output) conv2Output = Conv2D(filters=filter_num / 2, kernel_size=[3, conv1Output.get_shape().as_list()[2]], strides=[2, 1])(conv2Output) conv2Output = Reshape([ conv2Output.get_shape().as_list()[1], conv2Output.get_shape().as_list()[3] ])(conv2Output) # conv2Output = BatchNormalization()(conv2Output) conv2Output = Activation('selu')(conv2Output) conv2Output = Dropout(dropout)(conv2Output) conv2Output = Concatenate(axis=-1)([ conv2Output, RepeatVector(conv2Output.get_shape().as_list()[1])(signal_type) ]) conv2Output = TimeDistributed(Dense(16, activation=relu))(conv2Output) conv2Output = TimeDistributed(Dense(label_size, activation=softmax))(conv2Output) return Model([input, signal_type], conv2Output)
def DeepCNN(input_shape, filter_num, kernel_size, strides, label_size, dropout=0.0): input = Input(shape=input_shape) conv1Output = Conv2D(filters=filter_num, kernel_size=kernel_size, strides=strides)(input) reshapeOutput = Reshape([ conv1Output.get_shape().as_list()[1], conv1Output.get_shape().as_list()[3] ])(conv1Output) conv1Output = BatchNormalization()(reshapeOutput) conv1Output = Activation('relu')(conv1Output) conv1Output = Dropout(dropout)(conv1Output) conv1Output = Reshape([ conv1Output.get_shape().as_list()[1], conv1Output.get_shape().as_list()[2], 1 ])(conv1Output) conv2Output = ZeroPadding2D((1, 0))(conv1Output) conv2Output = Conv2D(filters=filter_num / 2, kernel_size=[3, filter_num], strides=[2, 1])(conv2Output) conv2Output = Reshape([ conv2Output.get_shape().as_list()[1], conv2Output.get_shape().as_list()[3] ])(conv2Output) conv2Output = BatchNormalization()(conv2Output) conv2Output = Activation('relu')(conv2Output) conv2Output = Dropout(dropout)(conv2Output) conv2Output = TimeDistributed(Dense(16, activation=relu))(conv2Output) conv2Output = TimeDistributed(Dense(label_size, activation=softmax))(conv2Output) return Model(input, conv2Output)
def Attentive_BconvLSTM_2d(x, g, inter_channel, data_format='channels_last'): print(x.shape) print(g.shape) g = Conv2D(inter_channel*2, [1, 1], strides=[1, 1], data_format=data_format)(g) # f(?,g_height,g_width,inter_channel) x = Reshape(target_shape=(1, x.get_shape().as_list()[1], x.get_shape().as_list()[2], inter_channel*2))(x) g = Reshape(target_shape=(1, g.get_shape().as_list()[1], g.get_shape().as_list()[2], inter_channel*2))(x) merge = concatenate([x, g], axis=1) f = ConvLSTM2D(filters=inter_channel, kernel_size=(3, 3), padding='same', return_sequences=False, go_backwards=True, kernel_initializer='he_normal')(merge) f = Activation('relu')(f) psi_f = Conv2D(1, [1, 1], strides=[1, 1], data_format=data_format)(f) rate = Activation('sigmoid')(psi_f) x = multiply([x, rate]) att_x = Reshape(target_shape=(x.get_shape().as_list()[2], x.get_shape().as_list()[3], x.get_shape().as_list()[4]))(x) return att_x
def build_model(): input_seq = Input(shape=(MAXLEN * 21, )) x = Reshape((MAXLEN, 21))(input_seq) x = Conv1D(320, 7, padding='same')(x) print(x.get_shape()) x = MaxPooling1D(4, padding='same')(x) print(x.get_shape()) x = Conv1D(160, 7, padding='same')(x) print(x.get_shape()) x = Conv1D(320, 7, padding='same')(x) print(x.get_shape()) x = UpSampling1D(4)(x) print(x.get_shape()) x = Conv1D(21, 7, activation='sigmoid', padding='same')(x) print(x.get_shape()) decoded = Reshape((MAXLEN * 21, ))(x) autoencoder = Model(input_seq, decoded) autoencoder.compile(optimizer='sgd', loss='mean_squared_error') autoencoder.summary() return autoencoder
input_tensor = Input((height, width, 3)) x = input_tensor for i in range(4): x = Convolution2D(32, 3, 3, activation='relu')(x) x = BatchNormalization(axis=-1)(x) x = Convolution2D(32, 3, 3, activation='relu')(x) x = BatchNormalization(axis=-1)(x) if i < 3: x = MaxPooling2D(pool_size=(2, 2))(x) else: x = MaxPooling2D(pool_size=(2, 1))(x) conv_shape = x.get_shape() x = Reshape(target_shape=(int(conv_shape[2]), int(conv_shape[1] * conv_shape[3])))(x) x = Dense(32, activation='relu')(x) gru_1 = GRU(rnn_size, return_sequences=True, init='he_normal', name='gru1')(x) gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, init='he_normal', name='gru1_b')(x) gru1_merged = keras.layers.add([gru_1, gru_1b]) gru_2 = GRU(rnn_size, return_sequences=True, init='he_normal', name='gru2')(gru1_merged)
def perframe_sequence_trainer_noattn( conditioning_input_shapes, conditioning_input_names, input_gt_frames_shape, perframe_painter_model, seq_len, is_done_model=None, n_const_frames=1, do_output_disc_stack=False, n_prev_frames=None, n_prev_disc_frames=1, n_painter_frame_outputs=2, ): if n_prev_frames is None: n_prev_frames = seq_len - 1 # collect conditioning inputs, which should include last frame, prev frames, prev attns if not isinstance(conditioning_input_shapes, list): conditioning_input_shapes = [conditioning_input_shapes] if conditioning_input_names is None: conditioning_input_names = [ 'cond_input_{}'.format(ii) for ii in range(len(conditioning_input_shapes)) ] conditioning_inputs = [] for ii, input_shape in enumerate(conditioning_input_shapes): conditioning_inputs.append( Input(input_shape, name=conditioning_input_names[ii])) input_gt_frames = Input(input_gt_frames_shape, name='input_gt_frames') inputs = conditioning_inputs + [input_gt_frames] prev_frames = conditioning_inputs[1:] gt_frames = Reshape(input_gt_frames_shape, name='reshape_gt')(input_gt_frames) # first two frames of the input stack will always be first and last frame const_frames = conditioning_inputs[:n_const_frames] curr_prev_frames = conditioning_inputs[n_const_frames:n_const_frames + n_prev_frames] curr_prev_attn_maps = conditioning_inputs[n_const_frames + n_prev_frames:] # first two frames of the input stack will always be first and last frame last_frame_seq = Lambda( lambda x: tf.tile(K.expand_dims(x, axis=-1), [1, 1, 1, 1, seq_len]), name='lambda_tile_slice_last_frame_seq')(const_frames[0]) director_preds_seqs = [] painter_preds_seqs = [] for t in range(seq_len): # cvae painter_cond_inputs = const_frames + curr_prev_frames # provide the true frame as input to the autoencoding branch of the painter gt_frame = Lambda( lambda x: tf.gather(x, t, axis=-1), name='lambda_slice_gt_frames_t{}'.format(t))(gt_frames) painter_ae_inputs = painter_cond_inputs + [gt_frame] # TODO: painter network currently expects cond input first, then ae input painter_preds = perframe_painter_model(painter_ae_inputs + painter_cond_inputs) if not isinstance(painter_preds, list): painter_preds = [painter_preds] clipped_painter_frames = [] for ppi in range(n_painter_frame_outputs): pred_frame = painter_preds[ppi] # TODO: get rid of hardcoding of range (e.g. if we are not normalizing) pred_frame = Lambda( lambda x: tf.clip_by_value(x, -1., 1.), name=F'lambda_clip_frame_{ppi}_t{t}')(pred_frame) clipped_painter_frames.append(pred_frame) painter_preds = clipped_painter_frames + painter_preds[ n_painter_frame_outputs:] if n_prev_disc_frames > 0: prev_frames = curr_prev_frames[-n_prev_disc_frames:] curr_prev_frames = curr_prev_frames[1:] + [painter_preds[0]] for ppi, pp in enumerate(painter_preds): # give every prediction a time dimension, and concatenate it pp = Reshape(pp.get_shape().as_list()[1:] + [1], name='reshape_t{}_pp{}'.format(t, ppi))(pp) if t == 0: painter_preds_seqs.append(pp) else: painter_preds_seqs[ppi] = Concatenate( name='concat_t{}_pp{}'.format(t, ppi))( [painter_preds_seqs[ppi], pp]) ####### compile information needed for conditional discriminators ####### if (do_output_disc_stack): if n_prev_disc_frames > 0: prev_frames = Reshape( prev_frames.get_shape().as_list()[1:] + [1], name='exptdim_t{}_prevframes'.format(t))(prev_frames) if t == 0: prev_frames_seq = prev_frames else: prev_frames_seq = Concatenate( name='concat_t{}_prevframe'.format(t))( [prev_frames_seq, prev_frames]) ####### compile information needed for our "is done" classifier if is_done_model is not None: # completed painting, and current prediction is_done_inputs = Concatenate(axis=-1)( [const_frames[0], pred_frame]) # run the classifier is_done_pred = is_done_model(is_done_inputs) # add a time dimension is_done_pred = Reshape( is_done_pred.get_shape().as_list()[1:] + [1], name='exptdim_t{}_isdone'.format(t))(is_done_pred) if t == 0: is_done_preds_seq = is_done_pred else: is_done_preds_seq = Concatenate( name='concat_t{}_isdone'.format(t), axis=-1)([is_done_preds_seq, is_done_pred]) outputs = director_preds_seqs + painter_preds_seqs # if we are using a discriminator, output the discriminator input stacks at the end so we can evaluate the scores if do_output_disc_stack: disc_inputs = [last_frame_seq] if n_prev_disc_frames > 0: disc_inputs.append(prev_frames_seq) # discriminator on attention map director_disc_stack = Concatenate( axis=-2, name='concat_director_disc_stack')(disc_inputs + [director_preds_seqs[0]]) outputs += [director_disc_stack] return Model(inputs=inputs, outputs=outputs, name='seqlen{}_perframe_trainer_model'.format(seq_len))
def DeepCNNSeq2Seq(filter_num, kernel_size, strides, output_dim, output_length, label_size, hidden_dim=None, input_shape=None, batch_size=None, batch_input_shape=None, input_dim=None, input_length=None, depth=1, dropout=0.0, unroll=False, stateful=False, model_type='simple'): input = Input(shape=input_shape) conv1Output = Conv2D(filters=filter_num, kernel_size=kernel_size, strides=strides)(input) reshapeOutput = Reshape([ conv1Output.get_shape().as_list()[1], conv1Output.get_shape().as_list()[3] ])(conv1Output) conv1Output = BatchNormalization()(reshapeOutput) conv1Output = Activation('relu')(conv1Output) conv1Output = Dropout(dropout)(conv1Output) conv1Output = Reshape([ conv1Output.get_shape().as_list()[1], conv1Output.get_shape().as_list()[2], 1 ])(conv1Output) conv2Output = Conv2D(filters=filter_num / 2, kernel_size=[3, filter_num], strides=[2, 1])(conv1Output) conv2Output = Reshape([ conv2Output.get_shape().as_list()[1], conv2Output.get_shape().as_list()[3] ])(conv2Output) conv2Output = BatchNormalization()(conv2Output) conv2Output = Activation('relu')(conv2Output) conv2Output = Dropout(dropout)(conv2Output) # pool1Output = MaxPool2D((conv1Output.get_shape().as_list()[1], 1))(conv1Output) # reshapeOutput = Reshape([pool1Output.get_shape().as_list()[3], 1])(pool1Output) if model_type == 'attention': encoder, decoder = AttentionSeq2Seq( output_dim, output_length, batch_size=batch_size, input_shape=tuple(conv2Output.get_shape().as_list()[1:]), input_length=input_length, input_dim=input_dim, hidden_dim=hidden_dim, depth=depth, unroll=unroll, stateful=stateful, dropout=dropout) else: encoder, decoder = SimpleSeq2Seq( output_dim, output_length, hidden_dim, tuple(conv2Output.get_shape().as_list()[1:]), batch_size, batch_input_shape, input_dim, input_length, depth, dropout, unroll, stateful) seq2seqEncoderOutput = encoder(conv2Output) seq2seqDecoderOutput = decoder(seq2seqEncoderOutput) output = TimeDistributed(Dense(label_size, activation=softmax))(seq2seqDecoderOutput) return Model(input, output)
def perframe_sequence_tester( perframe_tester_model, img_shape=(50, 50, 3), seq_len=40, latent_shape=(5, ), n_const_frames=1, n_prev_frames=1, ): inputs = [ Input(img_shape, name='input_last_frame'), Input(img_shape, name='input_prev_frame') ] # first two frames of the input stack will always be first and last frame const_frames = inputs[:n_const_frames] curr_prev_frames = inputs[n_const_frames:n_const_frames + n_prev_frames] # a dummy input that enables us to do sampling of the latent input in the network z_dummy = Input(latent_shape, name='input_z_dummy') inputs += [z_dummy] painter_preds_seqs = [] for t in range(seq_len): # assumes frame prediction is always the first pred, others might be KL painter_cond_inputs = const_frames + curr_prev_frames z_samp = Lambda(sampling_sigma1, name=f'lambda_z_sampling_frame{t}')(z_dummy) painter_preds = perframe_tester_model(painter_cond_inputs + [z_samp]) if not isinstance(painter_preds, list): painter_preds = [painter_preds] else: painter_preds = [painter_preds[0] ] # cvae painter might return transformed, delta clipped_painter_frames = [] for ppi in range(len(painter_preds)): pred_frame = painter_preds[ppi] pred_frame = Lambda( lambda x: tf.clip_by_value(x, -1., 1.), name=F'lambda_clip_frame_{ppi}_t{t}')(pred_frame) clipped_painter_frames.append(pred_frame) painter_preds = clipped_painter_frames # shift previous frames to make room for our new painter prediction curr_prev_frames = curr_prev_frames[1:] + [painter_preds[0]] for ppi, pp in enumerate(painter_preds): # give every prediction a time dimension, and concatenate it pp = Reshape(pp.get_shape().as_list()[1:] + [1], name='reshape_t{}_pp{}'.format(t, ppi))(pp) if t == 0: painter_preds_seqs.append(pp) else: painter_preds_seqs[ppi] = Concatenate( name='concat_t{}_pp{}'.format(t, ppi))( [painter_preds_seqs[ppi], pp]) return Model(inputs=inputs, outputs=painter_preds_seqs, name='seqlen{}_model'.format(seq_len))
def perframe_sampling_sequence_trainer_noattn( conditioning_input_shapes, conditioning_input_names, perframe_painter_model, seq_len, n_prev_frames=1, n_const_frames=1, n_prev_disc_frames=1, n_const_disc_frames=1, n_painter_frame_outputs=2, painter_latent_shape=None, make_painter_disc_stack=False, ): if n_prev_frames is None: n_prev_frames = seq_len - 1 if not isinstance(conditioning_input_shapes, list): conditioning_input_shapes = [conditioning_input_shapes] if conditioning_input_names is None: conditioning_input_names = [ 'cond_input_{}'.format(ii) for ii in range(len(conditioning_input_shapes)) ] conditioning_inputs = [] for ii, input_shape in enumerate(conditioning_input_shapes): conditioning_inputs.append( Input(input_shape, name=conditioning_input_names[ii])) inputs = [ci for ci in conditioning_inputs] # these inputs are required so we can use keras sampling..still havent # figured out how to do it without an initial input first if painter_latent_shape is not None: # if the painter uses a CVAE dummy_z_p_input = Input(painter_latent_shape, name='input_z_p_dummy') inputs += [dummy_z_p_input] # first two frames of the input stack will always be first and last frame const_frames = conditioning_inputs[:n_const_frames] curr_prev_frames = conditioning_inputs[n_const_frames:n_const_frames + n_prev_frames] curr_prev_attn_maps = conditioning_inputs[n_const_frames + n_prev_frames:] # first two frames of the input stack will always be first and last frame last_frame_seq = Lambda( lambda x: tf.tile(K.expand_dims(x, axis=-1), [1, 1, 1, 1, seq_len]), name='lambda_tile_slice_last_frame_seq')(const_frames[0]) painter_preds_seqs = [] painter_deltas_seq = [] for t in range(seq_len): ####### compile information needed for conditional discriminators ####### if (make_painter_disc_stack) and n_prev_disc_frames > 0: # both discriminators will probably require prev frames # TODO: remove hardcoding that assumes 1 const frame prev_frames = Reshape( const_frames[0].get_shape().as_list()[1:] + [1], name='exptdim_t{}_prevframes'.format(t))(curr_prev_frames[-1]) if t == 0: prev_frames_seq = prev_frames else: prev_frames_seq = Concatenate( name='concat_t{}_prevframe'.format(t))( [prev_frames_seq, prev_frames]) # cvae painter_cond_inputs = const_frames + curr_prev_frames # sample from the painter's prior instead painter_preds = perframe_painter_model(painter_cond_inputs + [dummy_z_p_input]) if not isinstance(painter_preds, list): painter_preds = [painter_preds] # assumes frame prediction is always the first pred, others might be KL clipped_painter_frames = [] for ppi in range(n_painter_frame_outputs): curr_pred_frame = painter_preds[ppi] curr_pred_name = os.path.basename( os.path.dirname(curr_pred_frame.name)) # TODO: get rid of hardcoding of range (e.g. if we are not normalizing) curr_pred_frame = Lambda( lambda x: tf.clip_by_value(x, -1., 1.), name=F'clip_pp{ppi}_{curr_pred_name}_t{t}')(curr_pred_frame) clipped_painter_frames.append(curr_pred_frame) painter_preds = clipped_painter_frames + painter_preds[ n_painter_frame_outputs:] curr_pred_frame = painter_preds[0] curr_prev_frames = curr_prev_frames[1:] + [curr_pred_frame] ########### compile predictions into sequences in time ###################### # hacky, but if the painter predicts a delta, we only want the first few recon outputs (the transformed frame) # and we can ignore the following output (the transform/delta) for ppi, pp in enumerate(painter_preds[:n_painter_frame_outputs]): # give every prediction a time dimension, and concatenate it ppn = os.path.basename(os.path.dirname(pp.name)) pp = Reshape(pp.get_shape().as_list()[1:] + [1], name='reshape_t{}_pp{}'.format(t, ppi))(pp) if t == 0: painter_preds_seqs.append(pp) else: painter_preds_seqs[ppi] = Concatenate( name='concat_t{}_pp-{}'.format(t, ppn))( [painter_preds_seqs[ppi], pp]) outputs = painter_preds_seqs # if we are using a discriminator, output the discriminator input stacks at the end so we can evaluate the scores if make_painter_disc_stack: disc_inputs = [] if n_const_disc_frames > 0: disc_inputs.append(last_frame_seq) if n_prev_disc_frames > 0: disc_inputs.append(prev_frames_seq) painter_disc_stack = Concatenate( axis=-2, name='concat_painter_disc_stack')(disc_inputs + [painter_preds_seqs[0]]) outputs += [painter_disc_stack] return Model( inputs=inputs, outputs=outputs, name='seqlen{}_perframe_sampling_trainer_model'.format(seq_len))
def ssd_300(image_size, n_classes, mode='training', l2_regularization=0.0005, min_scale=None, max_scale=None, scales=None, aspect_ratios_global=None, aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=None, clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], coords='centroids', normalize_coords=True, subtract_mean=[123, 117, 104], divide_by_stddev=None, swap_channels=[2, 1, 0], confidence_thresh=0.01, iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False): ''' Xây dựng model SSD300 với keras. Base network được sử dụng là VGG16. Chú ý: Yêu cầu Keras>=v2.0; TensorFlow backend>=v1.0. Arguments: image_size (tuple): Kích thước image input `(height, width, channels)`. n_classes (int): Số classes, chẳng hạn 20 cho Pascal VOC dataset, 80 cho MS COCO dataset. mode (str, optional): Một trong những dạng 'training', 'inference' và 'inference_fast'. 'training' mode: Đầu ra của model là raw prediction tensor. 'inference' và 'inference_fast' modes: raw predictions được decoded thành tọa độ đã được filtered thông qua threshold. l2_regularization (float, optional): L2-regularization rate. Áp dụng cho toàn bộ các convolutional layers. min_scale (float, optional): Nhân tố scaling nhỏ nhất cho các size của anchor boxes. Tỷ lệ này được tính trên so sánh với cạnh ngắn hơn của hình ảnh input. max_scale (float, optional): Nhân tố scale lớn nhất cho các size của anchor boxes. scales (list, optional): List các số floats chứa các nhân tố scaling của các convolutional predictor layer. List này phải lớn hơn số lượng các predictor layers là 1 để sử dụng cho trường hợp aspect ratio = 1 sẽ tính thêm next scale. Trong TH sử dụng scales thì interpolate theo min_scale và max_scale để tính list scales sẽ không được sử dụng. aspect_ratios_global (list, optional): List của các aspect ratios mà các anchor boxes được tạo thành. List này được áp dụng chung trên toàn bộ các prediction layers. aspect_ratios_per_layer (list, optional): List của các list aspect ratio cho mỗi một prediction layer. Nếu được truyền vào sẽ override `aspect_ratios_global`. two_boxes_for_ar1 (bool, optional): Chỉ áp dụng khi aspect ratio lists chứa 1. Sẽ bị loại bỏ trong các TH khác. Nếu `True`, 2 anchor boxes sẽ được tạo ra ứng với aspect ratio = 1. anchor box đầu tiên tạo thành bằng cách sử scale, anchor box thứ 2 được tạo thành bằng trung bình hình học của scale và next scale. steps (list, optional): `None` hoặc là list với rất nhiều các phần tử có số lượng bằng với số lượng layers. Mỗi phần tử đại diện cho mỗi một predictor layer có bao nhiêu pixels khoảng cách giữa các tâm của anchor box. steps có thể gồm 2 số đại diện cho (step_width, step_height). nếu không có steps nào được đưa ra thì chúng ta sẽ tính để cho khoảng các giữa các tâm của anchor box là bằng nhau offsets (list, optional): None hoặc là các con số đại diện cho mỗi một predictor layer bao nhiêu pixels từ góc trên và bên trái mở rộng của ảnh clip_boxes (bool, optional): Nếu `True`, giới hạn tọa độ các anchor box để nằm trong boundaries của image. variances (list, optional): Một list gồm 4 số floats >0. Một anchor box offset tương ứng với mỗi tọa độ sẽ được chi cho giá trị variance tương ứng. coords (str, optional): Tọa độ của box được sử dụng bên trong model (chẳng hạn, nó không là input format của ground truth labels). Có thể là dạng 'centroids' format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' format `(xmin, xmax, ymin, ymax)`, hoặc 'corners' format `(xmin, ymin, xmax, ymax)`. normalize_coords (bool, optional): Được đặt là `True` nếu model được giả định sử dụng tọa độ tương đối thay vì tuyệt đối coordinates, chẳng hạn nếu model dự báo tọa độ box nằm trong [0, 1] thay vì tọa độ tuyệt đối. subtract_mean (array-like, optional): `None` hoặc một array object với bất kì shape nào mà dạng mở rộng phù hợp với shape của ảnh. Gía trị của nó được bớt đi từ độ lớn pixel của ảnh. The elements of this array will be Chẳng hạn truyền vào một list gồm 3 số nguyên để tính toán trung bình chuẩn hóa cho các kênh của ảnh. divide_by_stddev (array-like, optional): `None` hoặc một array object. Tương tự như subtract_mean nhưng được chia cho từ độ lớn của ảnh để tính chuẩn hóa. swap_channels (list, optional): Là `False` hoặc một list các số nguyên biểu diễn thứ tự kì vọng mà trong đó đầu vào các channels của ảnh có thể được hoán đổi. confidence_thresh (float, optional): Một số float nằm trong khoảng [0,1), là ngưỡng tin cậy nhỏ nhất trong phân loại của một lớp xảy ra. iou_threshold (float, optional): Một float nằm trong khoảng [0,1]. Tất cả các boxes có chỉ số Jaccard similarity lớn hơn hoặc bằng `iou_threshold` sẽ được xem xét là chứa vệt thể bên trong nó. top_k (int, optional): Điểm dự báo cáo nhất được giữ trong mỗi batch item sau bước non-maximum suppression stage. nms_max_output_size (int, optional): Số lượng lớn nhất các dự báo sẽ được chuyển qua bước NMS stage. return_predictor_sizes (bool, optional): Nếu `True`, hàm số này sẽ không chỉ trả về mô hình, mà còn trả về một list chứa các chiều của predictor layers. Returns: model: The Keras SSD300 model. predictor_sizes (optional): Một numpy array chứa các phần `(height, width)` của output tensor shape tương ứng với mỗi convolutional predictor layer. References: https://arxiv.org/abs/1512.02325v5 ''' n_predictor_layers = 6 # Số lượng các preductor convolutional layers trong network là 6 cho original SSD300. n_classes += 1 # Số lượng classes, + 1 để tính thêm background class. l2_reg = l2_regularization # tham số chuẩn hóa của norm chuẩn l2. img_height, img_width, img_channels = image_size[0], image_size[ 1], image_size[2] ############################################################################ # Một số lỗi ngoại lệ. ############################################################################ if aspect_ratios_global is None and aspect_ratios_per_layer is None: raise ValueError( "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified." ) if aspect_ratios_per_layer: if len(aspect_ratios_per_layer) != n_predictor_layers: raise ValueError( "It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}." .format(n_predictor_layers, len(aspect_ratios_per_layer))) # Tạo list scales if (min_scale is None or max_scale is None) and scales is None: raise ValueError( "Either `min_scale` and `max_scale` or `scales` need to be specified." ) if scales: if len(scales) != n_predictor_layers + 1: raise ValueError( "It must be either scales is None or len(scales) == {}, but len(scales) == {}." .format(n_predictor_layers + 1, len(scales))) else: scales = np.linspace(min_scale, max_scale, n_predictor_layers + 1) if len(variances) != 4: raise ValueError( "4 variance values must be pased, but {} values were received.". format(len(variances))) variances = np.array(variances) if np.any(variances <= 0): raise ValueError( "All variances must be >0, but the variances given are {}".format( variances)) if (not (steps is None)) and (len(steps) != n_predictor_layers): raise ValueError( "You must provide at least one step value per predictor layer.") if (not (offsets is None)) and (len(offsets) != n_predictor_layers): raise ValueError( "You must provide at least one offset value per predictor layer.") ############################################################################ # Tính các tham số của anchor box. ############################################################################ # Thiết lập aspect ratios cho mỗi predictor layer (chỉ cần thiết cho tính toán anchor box layers). if aspect_ratios_per_layer: aspect_ratios = aspect_ratios_per_layer else: aspect_ratios = [aspect_ratios_global] * n_predictor_layers # Tính số lượng boxes được dự báo / 1 cell cho mỗi predictor layer. # Chúng ta cần biết bao nhiêu channels các predictor layers cần có. if aspect_ratios_per_layer: n_boxes = [] for ar in aspect_ratios_per_layer: if (1 in ar) & two_boxes_for_ar1: n_boxes.append(len(ar) + 1) # +1 cho trường hợp aspect ratio = 1 else: n_boxes.append(len(ar)) else: # Nếu chỉ 1 global aspect ratio list được truyền vào thì số lượng boxes là như nhau cho mọi layers. if (1 in aspect_ratios_global) & two_boxes_for_ar1: n_boxes = len(aspect_ratios_global) + 1 else: n_boxes = len(aspect_ratios_global) n_boxes = [n_boxes] * n_predictor_layers if steps is None: steps = [None] * n_predictor_layers if offsets is None: offsets = [None] * n_predictor_layers ############################################################################ # Xác định các hàm số cho Lambda layers bên dưới. ############################################################################ def identity_layer(tensor): return tensor def input_mean_normalization(tensor): return tensor - np.array(subtract_mean) def input_stddev_normalization(tensor): return tensor / np.array(divide_by_stddev) def input_channel_swap(tensor): if len(swap_channels) == 3: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]] ], axis=-1) elif len(swap_channels) == 4: return K.stack([ tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]], tensor[..., swap_channels[3]] ], axis=-1) ############################################################################ # Bước 1: Xây dựng network. ############################################################################ x = Input(shape=(img_height, img_width, img_channels)) x1 = Lambda(identity_layer, output_shape=(img_height, img_width, img_channels), name='identity_layer')(x) if not (subtract_mean is None): x1 = Lambda(input_mean_normalization, output_shape=(img_height, img_width, img_channels), name='input_mean_normalization')(x1) if not (divide_by_stddev is None): x1 = Lambda(input_stddev_normalization, output_shape=(img_height, img_width, img_channels), name='input_stddev_normalization')(x1) if swap_channels: x1 = Lambda(input_channel_swap, output_shape=(img_height, img_width, img_channels), name='input_channel_swap')(x1) ############################################################################ # Bước 1.1: Tính toán base network là mạng VGG16 ############################################################################ conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_1')(x1) conv1_2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_2')(conv1_1) pool1 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool1')(conv1_2) conv2_1 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_1')(pool1) conv2_2 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_2')(conv2_1) pool2 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool2')(conv2_2) conv3_1 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_1')(pool2) conv3_2 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_2')(conv3_1) conv3_3 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_3')(conv3_2) pool3 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool3')(conv3_3) conv4_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_1')(pool3) conv4_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_2')(conv4_1) conv4_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3')(conv4_2) pool4 = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same', name='pool4')(conv4_3) conv5_1 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_1')(pool4) conv5_2 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_2')(conv5_1) conv5_3 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_3')(conv5_2) pool5 = MaxPooling2D(pool_size=(3, 3), strides=(1, 1), padding='same', name='pool5')(conv5_3) ############################################################################ # Bước 1.2: Áp dụng các convolutional filter có kích thước (3 x 3) để tính toán ra features map. ############################################################################ fc6 = Conv2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc6')(pool5) print('fully connected 6: ', fc6.get_shape()) fc7 = Conv2D(1024, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7')(fc6) print('fully connected 7: ', fc7.get_shape()) conv6_1 = Conv2D(256, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_1')(fc7) conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv6_padding')(conv6_1) conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2')(conv6_1) print('conv6_2: ', conv6_2.get_shape()) conv7_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_1')(conv6_2) conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), name='conv7_padding')(conv7_1) conv7_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2')(conv7_1) print('conv7_2: ', conv7_2.get_shape()) conv8_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_1')(conv7_2) conv8_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2')(conv8_1) print('conv8_2: ', conv8_2.get_shape()) conv9_1 = Conv2D(128, (1, 1), activation='relu', padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_1')(conv8_2) conv9_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2')(conv9_1) print('conv9_2: ', conv9_2.get_shape()) # Feed conv4_3 vào the L2 normalization layer conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3) print('conv4_3_norm.shape: ', conv4_3_norm.get_shape()) ############################################################################ # Bước 1.3: Xác định output phân phối xác suất theo các classes ứng với mỗi một default bounding box. ############################################################################ ### Xây dựng các convolutional predictor layers tại top của base network # Chúng ta dự báo các giá trị confidence cho mỗi box, do đó confidence predictors có độ sâu `n_boxes * n_classes` # Đầu ra của confidence layers có shape: `(batch, height, width, n_boxes * n_classes)` conv4_3_norm_mbox_conf = Conv2D( n_boxes[0] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3_norm_mbox_conf')(conv4_3_norm) print('conv4_3_norm_mbox_conf.shape: ', conv4_3_norm_mbox_conf.get_shape()) fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7_mbox_conf')(fc7) print('fc7_mbox_conf.shape: ', fc7_mbox_conf.get_shape()) conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2_mbox_conf')(conv6_2) conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2_mbox_conf')(conv7_2) conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2_mbox_conf')(conv8_2) conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2_mbox_conf')(conv9_2) print('conv9_2_mbox_conf: ', conv9_2_mbox_conf.get_shape()) ############################################################################ # Bước 1.4: Xác định output các tham số offset của default bounding boxes tương ứng với mỗi cell trên các features map. ############################################################################ # Chúng ta dự báo 4 tọa độ cho mỗi box, do đó localization predictors có độ sâu `n_boxes * 4` # Output shape của localization layers: `(batch, height, width, n_boxes * 4)` conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3_norm_mbox_loc')(conv4_3_norm) print('conv4_3_norm_mbox_loc: ', conv4_3_norm_mbox_loc.get_shape()) fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7_mbox_loc')(fc7) conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2_mbox_loc')(conv6_2) conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2_mbox_loc')(conv7_2) conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2_mbox_loc')(conv8_2) conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (3, 3), padding='same', kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2_mbox_loc')(conv9_2) print('conv9_2_mbox_loc: ', conv9_2_mbox_loc.get_shape()) ############################################################################ # Bước 1.5: Tính toán các AnchorBoxes làm cơ sở để dự báo offsets cho các predicted bounding boxes bao quan vật thể ############################################################################ ### Khởi tạo các anchor boxes (được gọi là "priors" trong code gốc Caffe/C++ của mô hình) # Shape output của anchors: `(batch, height, width, n_boxes, 8)` conv4_3_norm_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[0], this_offsets=offsets[0], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc) print('conv4_3_norm_mbox_priorbox: ', conv4_3_norm_mbox_priorbox.get_shape()) fc7_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[1], this_offsets=offsets[1], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='fc7_mbox_priorbox')(fc7_mbox_loc) print('fc7_mbox_priorbox: ', fc7_mbox_priorbox.get_shape()) conv6_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[2], this_offsets=offsets[2], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc) print('conv6_2_mbox_priorbox: ', conv6_2_mbox_priorbox.get_shape()) conv7_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[3], this_offsets=offsets[3], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc) print('conv7_2_mbox_priorbox: ', conv7_2_mbox_priorbox.get_shape()) conv8_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[4], next_scale=scales[5], aspect_ratios=aspect_ratios[4], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[4], this_offsets=offsets[4], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc) print('conv8_2_mbox_priorbox: ', conv8_2_mbox_priorbox.get_shape()) conv9_2_mbox_priorbox = AnchorBoxes( img_height, img_width, this_scale=scales[5], next_scale=scales[6], aspect_ratios=aspect_ratios[5], two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[5], this_offsets=offsets[5], clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc) print('conv9_2_mbox_priorbox: ', conv9_2_mbox_priorbox.get_shape()) ############################################################################ # Bước 2: Reshape lại các output tensor shape ############################################################################ ############################################################################ # Bước 2.1: Reshape output của class predictions ############################################################################ # Reshape các class predictions, trả về 3D tensors có shape `(batch, height * width * n_boxes, n_classes)` # Chúng ta muốn các classes là tách biệt nhau trên last axis để tính softmax trên chúng. conv4_3_norm_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf) fc7_mbox_conf_reshape = Reshape( (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf) conv6_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf) conv7_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf) conv8_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf) conv9_2_mbox_conf_reshape = Reshape( (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf) print('conv4_3_norm_mbox_conf_reshape: ', conv4_3_norm_mbox_conf_reshape.get_shape()) print('fc7_mbox_conf_reshape: ', fc7_mbox_conf_reshape.get_shape()) print('conv9_2_mbox_conf_reshape: ', conv9_2_mbox_conf_reshape.get_shape()) print('conv9_2_mbox_conf_reshape: ', conv9_2_mbox_conf_reshape.get_shape()) print('conv9_2_mbox_conf_reshape: ', conv9_2_mbox_conf_reshape.get_shape()) ############################################################################ # Bước 2.2: Reshape output của bounding box predictions ############################################################################ # Reshape các box predictions, trả về 3D tensors có shape `(batch, height * width * n_boxes, 4)` # Chúng ta muốn 4 tọa độ box là tách biệt nhau trên last axis để tính hàm smooth L1 loss conv4_3_norm_mbox_loc_reshape = Reshape( (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc) fc7_mbox_loc_reshape = Reshape((-1, 4), name='fc7_mbox_loc_reshape')(fc7_mbox_loc) conv6_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc) conv7_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc) conv8_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc) conv9_2_mbox_loc_reshape = Reshape( (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc) print('conv4_3_norm_mbox_loc_reshape: ', conv4_3_norm_mbox_loc_reshape.get_shape()) print('fc7_mbox_loc_reshape: ', fc7_mbox_loc_reshape.get_shape()) print('conv6_2_mbox_loc_reshape: ', conv6_2_mbox_loc_reshape.get_shape()) print('conv7_2_mbox_loc_reshape: ', conv7_2_mbox_loc_reshape.get_shape()) print('conv8_2_mbox_loc_reshape: ', conv8_2_mbox_loc_reshape.get_shape()) print('conv9_2_mbox_loc_reshape: ', conv9_2_mbox_loc_reshape.get_shape()) ############################################################################ # Bước 2.3: Reshape output của anchor box ############################################################################ # Reshape anchor box tensors, trả về 3D tensors có shape `(batch, height * width * n_boxes, 8)` conv4_3_norm_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox) fc7_mbox_priorbox_reshape = Reshape( (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox) conv6_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox) conv7_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox) conv8_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox) conv9_2_mbox_priorbox_reshape = Reshape( (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox) print('conv4_3_norm_mbox_priorbox_reshape: ', conv4_3_norm_mbox_priorbox_reshape.get_shape()) print('fc7_mbox_priorbox_reshape: ', fc7_mbox_priorbox_reshape.get_shape()) print('conv6_2_mbox_priorbox_reshape: ', conv6_2_mbox_priorbox_reshape.get_shape()) print('conv7_2_mbox_priorbox_reshape: ', conv7_2_mbox_priorbox_reshape.get_shape()) print('conv8_2_mbox_priorbox_reshape: ', conv8_2_mbox_priorbox_reshape.get_shape()) print('conv9_2_mbox_priorbox_reshape: ', conv9_2_mbox_priorbox_reshape.get_shape()) ### Concatenate các predictions từ các layers khác nhau ############################################################################ # Bước 3: Concatenate các boxes trên layers ############################################################################ ############################################################################ # Bước 3.1: Concatenate confidence output box ############################################################################ # Axis 0 (batch) và axis 2 (n_classes hoặc 4) là xác định duy nhất cho toàn bộ các predictions layer # nên chúng ta muốn concatenate theo axis 1, số lượng các boxes trên layer # Output shape của `mbox_conf`: (batch, n_boxes_total, n_classes) mbox_conf = Concatenate(axis=1, name='mbox_conf')([ conv4_3_norm_mbox_conf_reshape, fc7_mbox_conf_reshape, conv6_2_mbox_conf_reshape, conv7_2_mbox_conf_reshape, conv8_2_mbox_conf_reshape, conv9_2_mbox_conf_reshape ]) print('mbox_conf.shape: ', mbox_conf.get_shape()) ############################################################################ # Bước 3.2: Concatenate location output box ############################################################################ # Output shape của `mbox_loc`: (batch, n_boxes_total, 4) mbox_loc = Concatenate(axis=1, name='mbox_loc')([ conv4_3_norm_mbox_loc_reshape, fc7_mbox_loc_reshape, conv6_2_mbox_loc_reshape, conv7_2_mbox_loc_reshape, conv8_2_mbox_loc_reshape, conv9_2_mbox_loc_reshape ]) print('mbox_loc.shape: ', mbox_loc.get_shape()) ############################################################################ # Bước 3.3: Concatenate anchor output box ############################################################################ # Output shape của `mbox_priorbox`: (batch, n_boxes_total, 8) mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([ conv4_3_norm_mbox_priorbox_reshape, fc7_mbox_priorbox_reshape, conv6_2_mbox_priorbox_reshape, conv7_2_mbox_priorbox_reshape, conv8_2_mbox_priorbox_reshape, conv9_2_mbox_priorbox_reshape ]) print('mbox_priorbox.shape: ', mbox_priorbox.get_shape()) ############################################################################ # Bước 4: Tính toán output ############################################################################ ############################################################################ # Bước 4.1 : Xây dựng các hàm loss function cho confidence ############################################################################ # tọa độ của box predictions sẽ được truyền vào hàm loss function, # nhưng cho các dự báo lớp, chúng ta sẽ áp dụng một hàm softmax activation layer đầu tiên mbox_conf_softmax = Activation('softmax', name='mbox_conf_softmax')(mbox_conf) # Concatenate các class và box predictions và the anchors thành một large predictions vector # Đầu ra của `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) predictions = Concatenate(axis=2, name='predictions')( [mbox_conf_softmax, mbox_loc, mbox_priorbox]) print('predictions.shape: ', predictions.get_shape()) if mode == 'training': model = Model(inputs=x, outputs=predictions) elif mode == 'inference': decoded_predictions = DecodeDetections( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) elif mode == 'inference_fast': decoded_predictions = DecodeDetectionsFast( confidence_thresh=confidence_thresh, iou_threshold=iou_threshold, top_k=top_k, nms_max_output_size=nms_max_output_size, coords=coords, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width, name='decoded_predictions')(predictions) model = Model(inputs=x, outputs=decoded_predictions) else: raise ValueError( "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'." .format(mode)) if return_predictor_sizes: predictor_sizes = np.array([ conv4_3_norm_mbox_conf._keras_shape[1:3], fc7_mbox_conf._keras_shape[1:3], conv6_2_mbox_conf._keras_shape[1:3], conv7_2_mbox_conf._keras_shape[1:3], conv8_2_mbox_conf._keras_shape[1:3], conv9_2_mbox_conf._keras_shape[1:3] ]) return model, predictor_sizes else: return model
def build_model(self, args): # based https://keras.io/getting-started/functional-api-guide/ start = time.time() # CPUs are used via a "device" which is just a threadpool if args.nCpu > 0: import tensorflow as tf tf.Session(config=tf.ConfigProto( intra_op_parallelism_threads=args.nCpu)) print('restrict CPU count to ', args.nCpu) dropFrac = args.dropFrac sh1 = self.data['train']['X'].shape print('build_model inp1:', sh1, 'design=', self.modelDesign) if self.modelDesign == 'cnn1d': # . . . . . . . . . . . . . . . xa = Input(shape=(sh1[1], ), name='inp1d') h = Reshape((sh1[1], 1))(xa) kernel = 5 pool_len = 3 # how much time_bins get reduced per pooling cnnDim = [2, 4] numCnn = len(cnnDim) print(' cnn1Dim:', cnnDim) for i in range(numCnn): dim = cnnDim[i] h = Conv1D(dim, kernel, activation='relu', padding='valid', name='cnn%d_d%d_k%d' % (i, dim, kernel))(h) h = MaxPool1D(pool_length=pool_len, name='pool_%d' % (i))(h) print('cnn 1d', i, h.get_shape()) h = Flatten(name='to_1d')(h) if self.modelDesign == 'cnn2d': # . . . . . . . . . . . . . . . xa = Input(shape=( sh1[1], sh1[2], ), name='inp2d') h = Reshape((sh1[1], sh1[2], 1))(xa) kernel = 3 pool_len = 2 # how much time_bins get reduced per pooling cnnDim = [4, 8] numCnn = len(cnnDim) print(' cnn2Dim:', cnnDim) for i in range(numCnn): dim = cnnDim[i] h = Conv2D(dim, kernel, activation='relu', padding='valid', name='cnn%d_d%d_k%d' % (i, dim, kernel))(h) h = MaxPool2D(pool_size=pool_len, name='pool_%d' % (i))(h) print('cnn 2d', i, h.get_shape()) h = Flatten(name='to_1d')(h) if self.modelDesign == 'lstm': # . . . . . . . . . . . . . . . lstmDim = 10 recDropFrac = 0.5 * dropFrac print(' lstmDim:', lstmDim) h = LSTM(lstmDim, activation='tanh', recurrent_dropout=recDropFrac, dropout=dropFrac, name='lstmA_%d' % lstmDim, return_sequences=True)(h) h = LSTM(lstmDim, activation='tanh', recurrent_dropout=recDropFrac, dropout=dropFrac, name='lstmB_%d' % lstmDim, return_sequences=False)(h) print('pre FC=>', h.get_shape()) h = Dropout(dropFrac, name='dropFC')(h) # .... FC layers COMMON fcDim = [10, 5] numFC = len(fcDim) for i in range(numFC): dim = fcDim[i] h = Dense(dim, activation='relu', name='fc%d' % i)(h) h = Dropout(dropFrac, name='drop%d' % i)(h) print('fc', i, h.get_shape()) y = Dense(1, activation='sigmoid', name='sigmoid')(h) lossName = 'binary_crossentropy' optimizerName = 'adam' print('build_model: loss=', lossName, ' optName=', optimizerName, ' out:', y.get_shape()) # full model model = Model(inputs=xa, outputs=y) model.compile(optimizer=optimizerName, loss=lossName, metrics=['accuracy']) self.model = model model.summary() # will print print('model size=%.1fK compiled elaT=%.1f sec' % (model.count_params() / 1000., time.time() - start))
biLSTM_Input = Reshape((sequence_length,embedding_dim))(model1) left_branch = LSTM(300,input_shape = (40,300),return_sequences='True',input_length=40)(biLSTM_Input) right_branch = LSTM(300,input_shape=(40,300),return_sequences='True',input_length=40,go_backwards=True)(biLSTM_Input) print "left_branch.get_shape()",left_branch.get_shape() print "right_branch.get_shape()",right_branch.get_shape() lstm_merged = merge([left_branch,right_branch],mode='ave') lstm_merged = Reshape([40,300,1])(lstm_merged) lstm_merged = Dropout(0.2)(lstm_merged) graph_in_temp = merge([model1, model2,lstm_merged],mode='concat',concat_axis=-1) graph_in = Reshape((40,300,3))(graph_in_temp) print graph_in.get_shape() conv_11 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[0], nb_col=col_size, border_mode='valid', activation='relu')(graph_in) conv_22 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[1], nb_col=col_size, border_mode='valid', activation='relu')(graph_in) conv_33 = Convolution2D(nb_filter=feature_map, nb_row=filter_sizes[2], nb_col=col_size, border_mode='valid', activation='relu')(graph_in) conv_11 = MaxPooling2D(pool_size=(int(conv_11.get_shape()[1]),int(conv_11.get_shape()[2])))(conv_11) conv_22 = MaxPooling2D(pool_size=(int(conv_22.get_shape()[1]),int(conv_22.get_shape()[2])))(conv_22) conv_33 = MaxPooling2D(pool_size=(int(conv_33.get_shape()[1]),int(conv_33.get_shape()[2])))(conv_33) conva = merge([conv_11, conv_22, conv_33], mode='concat',concat_axis=-1) conva = Dropout(dropout_prob[1])(conva) print conva.get_shape()
def googleNet_n(x, data_format='channels_last', num_classes=24, num_layers=[1, 1, 2, 1], features=[1, 1, 1, 1, 1]): xft = Lambda(lambda v: tf_fft(v))(x) x = Reshape(in_shp + (1, ), input_shape=in_shp)(x) x = Conv2D(filters=64 * features[0], kernel_size=[2, 7], strides=[2, 2], data_format=data_format, padding='same', activation='relu')(x) x = MaxPooling2D([1, 3], strides=[1, 2], padding='same')(x) for dep in range(num_layers[0]): x = Conv2D(filters=192 * features[1], kernel_size=[1, 3], strides=[1, 1], padding='same', activation='relu')(x) x = MaxPooling2D([1, 3], strides=[1, 2], padding='same')(x) xft = Reshape(in_shp + (1, ), input_shape=in_shp)(xft) xft = Conv2D(filters=64 * features[0], kernel_size=[2, 4], strides=[2, 2], data_format=data_format, padding='same', activation='relu')(xft) xft = MaxPooling2D([1, 3], strides=[1, 2], padding='same')(xft) for dep in range(num_layers[0]): xft = Conv2D(filters=192 * features[1], kernel_size=[1, 3], strides=[1, 1], padding='same', activation='relu')(xft) xft = MaxPooling2D([1, 3], strides=[1, 2], padding='same')(xft) print(x.get_shape(), xft.get_shape()) x = keras.layers.concatenate([x, xft], axis=3) print(x.get_shape()) for dep in range(num_layers[1]): x = inception(x, height=2, fs=np.array([32, 32, 32, 32, 32]) * features[2], tw_tower=True) x = MaxPooling2D([1, 3], strides=2, padding='same')(x) for dep in range(num_layers[2]): x = inception(x, height=2, fs=np.array([48, 96, 48, 96, 96]) * features[3], with_residual=True) #out_mid = out_tower(x, dr=0.3) #for dep in range(num_layers[3]): # x = inception(x, height=2, fs=np.array([48,96,48,96,96])*features[4], with_residual=True) x = MaxPooling2D([2, 3], strides=2, padding='same')(x) for dep in range(num_layers[3]): x = inception(x, height=1, fs=np.array([32, 32, 32, 32, 32]) * features[4]) out = out_tower(x, dr=0.5, reg=args.confireg) #out = Average()([out_mid, out_late]) return out
max_len_1 = 10 max_len_2 = 10 embed_size = 50 # mm = cal_match_matrix(query, doc, max_len_1, max_len_2, glove, embed_size) def test(mm, bin_num=20): a = 0 for i, j in mm[:, 1]: a += (i + j) return a inputs = Input(shape=(1, 10, 10)) conv2d = Conv2D(filters=5, kernel_size=3, data_format='channels_first', padding='valid', activation='relu')(inputs) print(conv2d.get_shape()) # fconv2d = Flatten()(conv2d) # print(fconv2d.get_shape()) fconv2d = Reshape((2, -1))(conv2d) print(fconv2d.get_shape()) # a = Lambda(lambda x: test(x))(conv2d) # print(a.get_shape())
def old_model(self, input_dim = (320, 320), input_channels=1, output_channels=4, drop_out=0.0, batch_Norm=True, USE_BIAS = False, interpolation='bilinear', fullbottle=False): n_filters = 64 inputs = Input((input_dim[0], input_dim[1], 3)) #get VGG16 vgg16 = VGG16(input_tensor=inputs, include_top=False) # vgg16 = VGG16( input_shape=inputShape, include_top=False) for l in vgg16.layers: l.trainable = True # vgg16.summary() out_vgg16 = vgg16(inputs) #get vgg layer outputs block1_conv2 = vgg16.get_layer("block1_conv2").output block2_conv2 = vgg16.get_layer("block2_conv2").output block3_conv3 = vgg16.get_layer("block3_conv3").output block4_conv3 = vgg16.get_layer("block4_conv3").output block5_conv3 = vgg16.get_layer("block5_conv3").output out_vgg16 = vgg16.get_layer("block5_pool").output #--mid convolutions-- convMid_1 = self.double_conv2D(n_filters*16, 3, out_vgg16 , batch_norm=batch_Norm, dropout=drop_out) if fullbottle: convMid_1 = self.bn_conv2D(n_filters*16, 3, convMid_1) # ------- up path ---------- # upconv_1 = Convolution2DTranspose(n_filters*8, (2, 2), strides=(2, 2), use_bias= USE_BIAS)(convMid_1) # upconv_1 = self.SubpixelConv2D((convMid_1.get_shape()[1], convMid_1.get_shape()[2], convMid_1.get_shape()[3]), scale=2, name='subpix1')(convMid_1) upconv_1 = UpSampling2D((2,2), interpolation=interpolation)(convMid_1) conca_1 = concatenate([upconv_1, block5_conv3], axis=self.axis) conv_1 = self.double_conv2D(n_filters*8, 3, conca_1, batch_norm=batch_Norm, dropout=drop_out) conv_1 = self.bn_conv2D(n_filters*8, 3, conv_1) # upconv_2 = Convolution2DTranspose(n_filters*8, (2, 2), strides=(2, 2), use_bias= USE_BIAS)(conv_1) # upconv_2 = self.SubpixelConv2D((conv_1.get_shape()[1], conv_1.get_shape()[2], conv_1.get_shape()[3]), scale=2, name='subpix2')(conv_1) upconv_2 = UpSampling2D((2,2), interpolation=interpolation)(conv_1) conca_2 = concatenate([upconv_2, block4_conv3], axis=self.axis) conv_2 = self.double_conv2D(n_filters*8, 3, conca_2, batch_norm=batch_Norm, dropout=drop_out) conv_2 = self.bn_conv2D(n_filters*8, 3, conv_2) # upconv_3 = Convolution2DTranspose(n_filters*4, (2, 2), strides=(2, 2), use_bias= USE_BIAS)(conv_2) # upconv_3 = self.SubpixelConv2D((conv_2.get_shape()[1], conv_2.get_shape()[2], conv_2.get_shape()[3]), scale=2, name='subpix3')(conv_2) upconv_3 = UpSampling2D((2,2), interpolation=interpolation)(conv_2) conca_3 = concatenate([upconv_3, block3_conv3], axis=self.axis) conv_3 = self.double_conv2D(n_filters*4, 3, conca_3, batch_norm=batch_Norm, dropout=drop_out) conv_3 = self.bn_conv2D(n_filters*4, 3, conv_3) # upconv_4 = Convolution2DTranspose(n_filters*2, (2, 2), strides=(2, 2), use_bias= USE_BIAS)(conv_3) # upconv_4 = self.SubpixelConv2D((conv_3.get_shape()[1], conv_3.get_shape()[2], conv_3.get_shape()[3]), scale=2, name='subpix4')(conv_3) upconv_4 = UpSampling2D((2,2), interpolation=interpolation)(conv_3) conca_4 = concatenate([upconv_4, block2_conv2], axis=self.axis) conv_4 = self.double_conv2D(n_filters*2, 3, conca_4, batch_norm=batch_Norm, dropout=drop_out) # upconv_5 = Convolution2DTranspose(n_filters, (2, 2), strides=(2, 2), use_bias= USE_BIAS)(conv_4) # upconv_5 = self.SubpixelConv2D((conv_4.get_shape()[1], conv_4.get_shape()[2], conv_4.get_shape()[3]), scale=2, name='subpi5')(conv_4) upconv_5 = UpSampling2D((2,2), interpolation=interpolation)(conv_4) conca_5 = concatenate([upconv_5, block1_conv2], axis=self.axis) conv_5 = self.double_conv2D(n_filters, 3, conca_5, batch_norm=batch_Norm, dropout=drop_out) in_c = Reshape((512,512,1))(Lambda(lambda x : x[:,:,:,0])(inputs)) print(in_c.get_shape()) print(inputs.get_shape()) conca_6 = concatenate([conv_5, in_c], axis=self.axis) out = Conv2D(output_channels, (1, 1))(conca_6) out = Activation('softmax')(out) model = Model(input=inputs, output=out, name="unet_vgg16") return model
def ResNetGenerator(c): """ Returns A Keras model instance. """ print('#' * 10, ' Create generator ', '#' * 10) net = {} signal_input = Input(shape=(c.audio_size, )) x = Reshape([c.audio_size, 1])(signal_input) # COMPRESS x = Conv1D(64, 7, strides=2, padding='same', name='conv1')(x) x = BatchNormalization(name='bn_conv1')(x) x = LeakyReLU(alpha=0.3)(x) print('COMPRESSION') print(c.audio_size, '-> ', x.get_shape().as_list()) for i in range(1, c.n_compress_block + 1): net[i] = x print(x.get_shape().as_list(), '-> ', end='') x = conv_block(x, 3, [ i**2 * c.convo_size, i**2 * c.convo_size, 4 * i**2 * c.convo_size ], stage=i, block='a') print(x.get_shape().as_list()) x = identity_block(x, 3, [ i**2 * c.convo_size, i**2 * c.convo_size, 4 * i**2 * c.convo_size ], stage=i, block='b') x = identity_block(x, 3, [ i**2 * c.convo_size, i**2 * c.convo_size, 4 * i**2 * c.convo_size ], stage=i, block='c') [print(k, v) for k, v in net.items()] print('\nAfter compression', x, '\n') # DECOMPRESS print('DECOMPRESSION') for i in range(c.n_compress_block, 0, -1): print(i, end=' ') print(x.get_shape().as_list(), '-> ', end='') x = deconv_block(x, 3, [ i**2 * c.convo_size, i**2 * c.convo_size, 4 * i**2 * c.convo_size ], stage=i, block='a_incr') print(x.get_shape().as_list()) x = Concatenate(axis=2)([net[i], x]) x = identity_block(x, 3, [ i**2 * c.convo_size, i**2 * c.convo_size, 4 * i**2 * c.convo_size ], stage=i, block='b_incr') x = identity_block(x, 3, [ i**2 * c.convo_size, i**2 * c.convo_size, 4 * i**2 * c.convo_size ], stage=i, block='c_incr') print(x.get_shape().as_list(), '-> ', end='') x = deconv_block( x, 3, [i**2 * c.convo_size, i**2 * c.convo_size, 4 * i**2 * c.convo_size], stage=42, block='a_incr') x = identity_block( x, 3, [i**2 * c.convo_size, i**2 * c.convo_size, 4 * i**2 * c.convo_size], stage=42, block='c_incr') print(x.get_shape().as_list()) x = Conv1D(1, 1, strides=1, padding='same')(x) x = Reshape((-1, ))(x) signal_output = Activation('tanh')(x) print('Recovered tensor', signal_output) # Create model. model = Model(signal_input, signal_output) return model
def create_model(num_input_channels,vocab_sizes,output_size_embedding,input_shapes,num_LSTM_layers,num_LSTM_units,num_dense_layers,num_dense_units,learning_rate,reg_param,inp_optim): outputs=[] inputs=[] squeezed=[] # Input Layer Constrution: Categorical text Inputs: for i_channels in range(num_input_channels): name_string="input"+str(i_channels+1) inputs.append(Input(shape=(input_shapes[i_channels][1],1),name=name_string)) print(inputs[i_channels].shape) # Squeeze to enable input to embedding layer: for i_channels in range(num_input_channels): squeezed.append(Lambda(lambda x:keras.backend.squeeze(x,axis=-1))(inputs[i_channels])) print(squeezed[i_channels].shape) # Embedding Layers Construction: Categorical text inputs for i_channels in range(num_input_channels): outputs.append(Embedding(input_dim=vocab_sizes[i_channels],output_dim=output_size_embedding,input_length=input_shapes[i_channels][1])(squeezed[i_channels])) #LSTM Layers Construction: Categorical text inputs #----------------------------------------------------- for i_channels in range(num_input_channels): for i_individual in range(num_LSTM_layers[i_channels]): if num_LSTM_layers[i_channels]==1: outputs[i_channels]=LSTM(num_LSTM_units[i_channels][i_individual], activation='relu')(outputs[i_channels]) else: if i_individual==0: outputs[i_channels]=LSTM(num_LSTM_units[i_channels][i_individual], activation='relu', return_sequences=True)(outputs[i_channels]) elif i_individual==num_LSTM_layers[i_channels]-1: outputs[i_channels]=LSTM(num_LSTM_units[i_channels][i_individual], activation='relu')(outputs[i_channels]) else: outputs[i_channels]=LSTM(num_LSTM_units[i_channels][i_individual], activation='relu', return_sequences=True)(outputs[i_channels]) #------------------------------------------------------- print("LSTM Layers Constructed") # Concatenate the outputs of all LSTM layers concat=[] #print(outputs[0].get_shape().as_list()) #print(outputs[1].get_shape().as_list()) #print(outputs[2].get_shape().as_list()) #print(outputs[3].get_shape().as_list()) for i_concat in range(num_input_channels-1): if i_concat==0: output=concatenate([outputs[0],outputs[1]]) # print(output.get_shape().as_list()) else: output=concatenate([output,outputs[i_concat+1]]) #Dense Layers Construction #------------------------------------------------------- print(output.get_shape().as_list()) print("Create Dense Layers") #----------------------------- #Dense Layer Creation output=Reshape((-1,1))(output) print(output.get_shape().as_list()) input1=Input(shape=(1,1)) # inputs # output=concatenate([output,input1],axis=1) #add inputs to LSTM outputs output=Flatten()(output) for i_dense in range(num_dense_layers): if i_dense==num_dense_layers-1: output=Dense(num_dense_units[i_dense],activation='softmax',kernel_regularizer=regularizers.l2(reg_param))(output) else: output=Dense(num_dense_units[i_dense],activation='relu',kernel_regularizer=regularizers.l2(reg_param))(output) output=BatchNormalization()(output) #---------------------------- print("Done") inputs.append(input1) model=Model(inputs=inputs,outputs=output) optim=Adam(lr=learning_rate,clipnorm=1.,amsgrad=True) model.compile(optimizer=optim,loss="categorical_crossentropy",metrics=["accuracy"]) model_json = model.to_json() with open("model.json", "w") as json_file: json_file.write(model_json) model.summary() return model
def draw_capsnet_model(hyper_param, embedding_matrix=None, verbose=True): """ Input: hyper parameters dictionary Construct: input layers : x , x_pos(o), x_captialization(o) embedding matrix : use_glove or randomly initialize conv1 : first convolution layer primarycaps : conv2 and squash function applied ner_caps : make 8 ner capsules of specified dim out_pred : calc length of 8 ner capsules as 8 prob. predictions over 8 ner classes Returns: if decoding/reconstruction disabled --> a single keras.models.Model object if decoding/reconstruction enabled --> three keras.models.Model objects """ # input layer(s) x = Input(shape=(hyper_param['maxlen'], ), name='x') if hyper_param['use_pos_tags']: x_pos = Input(shape=(hyper_param['maxlen'], hyper_param['poslen']), name='x_pos') if hyper_param['use_capitalization_info']: x_capital = Input(shape=(hyper_param['maxlen'], hyper_param['capitallen']), name='x_capital') # embedding matrix if hyper_param['use_glove']: embed = Embedding(hyper_param['max_features'], hyper_param['embed_dim'], weights=[embedding_matrix],\ input_length=hyper_param['maxlen'], trainable=hyper_param['allow_glove_retrain'])(x) else: embed = Embedding(hyper_param['max_features'], hyper_param['embed_dim'], input_length=hyper_param['maxlen'],\ embeddings_initializer="random_uniform" )(x) # concat embeddings with additional features if hyper_param['use_pos_tags'] and hyper_param['use_capitalization_info']: embed = Concatenate(axis=-1)([embed, x_pos, x_capital]) elif hyper_param['use_pos_tags'] and ( not hyper_param['use_capitalization_info']): embed = Concatenate(axis=-1)([embed, x_pos]) elif (not hyper_param['use_pos_tags'] ) and hyper_param['use_capitalization_info']: embed = Concatenate(axis=-1)([embed, x_capital]) else: embed = embed # add dropout here if hyper_param['embed_dropout'] > 0.0: embed = SpatialDropout1D(hyper_param['embed_dropout'])(embed) # feed embeddings into conv1 conv1 = Conv1D( filters=hyper_param['conv1_filters'], \ kernel_size=hyper_param['conv1_kernel_size'],\ strides=hyper_param['conv1_strides'], \ padding=hyper_param['conv1_padding'],\ activation='relu', name='conv1')(embed) # make primary capsules if hyper_param['use_2D_primarycaps']: convShape = conv1.get_shape().as_list() conv1 = Reshape((convShape[1], convShape[2], 1))(conv1) primaryCapLayer = PrimaryCap else: primaryCapLayer = PrimaryCap1D # make primary capsules primarycaps = primaryCapLayer(conv1, \ dim_capsule=hyper_param['primarycaps_dim_capsule'],\ n_channels=hyper_param['primarycaps_n_channels'],\ kernel_size=hyper_param['primarycaps_kernel_size'], \ strides=hyper_param['primarycaps_strides'], \ padding=hyper_param['primarycaps_padding']) # make ner capsules ner_caps = CapsuleLayer(num_capsule=hyper_param['ner_classes'], \ dim_capsule=hyper_param['ner_capsule_dim'], \ routings=hyper_param['num_dynamic_routing_passes'], \ name='nercaps')(primarycaps) # replace each ner capsuel with its length out_pred = Length(name='out_pred')(ner_caps) if verbose: print("x", x.get_shape()) if hyper_param['use_pos_tags']: print("x_pos", x_pos.get_shape()) if hyper_param['use_capitalization_info']: print("x_capital", x_capital.get_shape()) print("embed", embed.get_shape()) print("conv1", conv1.get_shape()) print("primarycaps", primarycaps.get_shape()) print("ner_caps", ner_caps.get_shape()) print("out_pred", out_pred.get_shape()) if hyper_param['use_decoder']: decoder_y_cat = Input(shape=(hyper_param['ner_classes'], ), name='decoder_y_cat') masked_by_y = Mask(name='masked_by_y')( [ner_caps, decoder_y_cat]) # true label is used to mask during training masked = Mask()( ner_caps) # mask using capsule with maximal length for predicion # decoder for training train_decoder_dense1 = Dense(hyper_param['decoder_feed_forward_1'], activation='relu',\ input_dim=hyper_param['ner_capsule_dim']*hyper_param['ner_classes'],\ name='train_decoder_dense1')(masked_by_y) train_decoder_dense1_dropout = Dropout( hyper_param['decoder_dropout'])(train_decoder_dense1) train_decoder_dense2 = Dense(hyper_param['decoder_feed_forward_2'], activation='relu',\ name='train_decoder_dense2')(train_decoder_dense1_dropout) train_decoder_dense2_dropout = Dropout( hyper_param['decoder_dropout'])(train_decoder_dense2) train_decoder_output = Dense(hyper_param['embed_dim'], activation=None,\ name='train_decoder_output')(train_decoder_dense2_dropout) # decoder for evaluation (prediction) eval_decoder_dense1 = Dense(hyper_param['decoder_feed_forward_1'], activation='relu',\ input_dim=hyper_param['ner_capsule_dim']*hyper_param['ner_classes'],\ name='eval_decoder_dense1')(masked) eval_decoder_dense2 = Dense(hyper_param['decoder_feed_forward_2'], activation='relu',\ name='eval_decoder_dense2')(eval_decoder_dense1) eval_decoder_output = Dense(hyper_param['embed_dim'], activation=None,\ name='eval_decoder_output')(eval_decoder_dense2) if verbose: print("Decoder model enabled for GloVe vector deconstruction...") print("decoder_y_cat", decoder_y_cat.get_shape()) print("masked_by_y", masked_by_y.get_shape()) print("train_decoder_dense1", train_decoder_dense1.get_shape()) print("train_decoder_dense1_dropout", train_decoder_dense1_dropout.get_shape()) print("train_decoder_dense2", train_decoder_dense2.get_shape()) print("train_decoder_dense2_dropout", train_decoder_dense2_dropout.get_shape()) print("train_decoder_output", train_decoder_output.get_shape()) print("masked", masked.get_shape()) print("eval_decoder_dense1", eval_decoder_dense1.get_shape()) print("eval_decoder_dense2", eval_decoder_dense2.get_shape()) print("eval_decoder_output", eval_decoder_output.get_shape()) # construct input list if hyper_param['use_pos_tags'] and hyper_param['use_capitalization_info']: input_list = [x, x_pos, x_capital] elif hyper_param['use_pos_tags'] and ( not hyper_param['use_capitalization_info']): input_list = [x, x_pos] elif (not hyper_param['use_pos_tags'] ) and hyper_param['use_capitalization_info']: input_list = [x, x_capital] else: input_list = [x] if hyper_param['use_decoder'] == False: print("decoder/reconstruction DISabled") print("returning 1 model") return Model(inputs=input_list, outputs=[out_pred]) else: train_model = Model(inputs=input_list + [decoder_y_cat], outputs=[out_pred, train_decoder_output]) eval_model = Model(inputs=input_list, outputs=[out_pred, eval_decoder_output]) print("decoder/reconstruction enabled") print("returning a list of 2 models: train_model, eval_model") return train_model, eval_model