elif emo_attr == 'Val': Label_mean = loadmat('./NormTerm/val_norm_means.mat')['normal_para'][0][0] Label_std = loadmat('./NormTerm/val_norm_stds.mat')['normal_para'][0][0] # Regression Task test_file_path, test_file_tar = getPaths(label_dir, split_set='Test', emo_attr=emo_attr) #test_file_path, test_file_tar = getPaths(label_dir, split_set='Validation', emo_attr=emo_attr) # Setting Online Prediction Model Graph (predict sentence by sentence rather than a data batch) time_step = 62 # same as the number of frames within a chunk (i.e., m) feat_num = 130 # number of LLDs features if atten_type == 'GatedVec': # Input Layer inputs = Input((time_step, feat_num)) cnn_inputs = Permute((2, 1))(inputs) # cnn1: [128, 128] encode = Conv1D(filters=128, kernel_size=3, strides=1, dilation_rate=1, data_format='channels_first')(cnn_inputs) encode = BatchNormalization()(encode) encode = Activation('relu')(encode) encode = Conv1D(filters=128, kernel_size=3, strides=1, dilation_rate=1, data_format='channels_first')(encode) encode = BatchNormalization()(encode) encode = Activation('relu')(encode) # cnn2: [64, 64] encode = Conv1D(filters=64, kernel_size=3, strides=1, dilation_rate=1, data_format='channels_first')(encode) encode = BatchNormalization()(encode) encode = Activation('relu')(encode) encode = Conv1D(filters=64, kernel_size=3, strides=1, dilation_rate=1, data_format='channels_first')(encode) encode = BatchNormalization()(encode) encode = Activation('relu')(encode) # cnn3: [32]
(WINDOW_LENGTH, ) + (56, ), (WINDOW_LENGTH, ) + (60, ), (WINDOW_LENGTH, ) + (55, ), (WINDOW_LENGTH, ) + (56, ), (WINDOW_LENGTH, ) + (60, ), (WINDOW_LENGTH, ) + (55, ), (WINDOW_LENGTH, ) + (56, ), (WINDOW_LENGTH, ) + (60, ), (WINDOW_LENGTH, ) + (55, ), (WINDOW_LENGTH, ) + (56, ), (WINDOW_LENGTH, ) + (60, ), (WINDOW_LENGTH, ) + (55, ), (WINDOW_LENGTH, ) + (56, )] models = [] for i in range(model_number): models.append(Sequential()) if K.image_dim_ordering() == 'tf': # (n, channels) for i in range(model_number): models[i].add(Permute((2, 1), input_shape=input_shape[i])) elif K.image_dim_ordering() == 'th': # (channels, n) for i in range(model_number): models[i].add(Permute((1, 2), input_shape=input_shape[i])) else: raise RuntimeError('Unknown image_dim_ordering.') for i in range(model_number): models[i].add(Flatten()) models[i].add(Dense(128)) models[i].add(Activation('relu')) models[i].add(Dense(128)) models[i].add(Activation('relu')) models[i].add(Dense(128)) models[i].add(Activation('relu'))
def get_net(): inputs = Input(shape=(N_channels, img_h, img_w)) # Block 1 conv1 = Conv2D(32, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(inputs) conv1 = Conv2D(32, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(conv1) pool1 = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(conv1) # Block 2 conv2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(pool1) conv2 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(conv2) pool2 = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(conv2) # Block 3 conv3 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(pool2) conv3 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(conv3) pool3 = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(conv3) # Block 4 conv4 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(pool3) conv4 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(conv4) pool4 = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(conv4) # Block 5 conv5 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(pool4) conv5 = Conv2D(512, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(conv5) up6 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4], axis=1) conv6 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(up6) conv6 = Conv2D(256, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(conv6) up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3], axis=1) conv7 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(up7) conv7 = Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(conv7) up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2], axis=1) conv8 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(up8) conv8 = Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(conv8) up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1], axis=1) conv9 = Conv2D(32, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal')(up9) conv10 = Conv2D(C, (1, 1), activation='relu', kernel_initializer='he_normal')(conv9) reshape = Reshape((C, img_h * img_w), input_shape=(C, img_h, img_w))(conv10) reshape = Permute((2, 1))(reshape) activation = Activation('softmax')(reshape) model = Model(input=inputs, output=activation) model.compile(optimizer=Adam(lr=1.0e-4), loss='categorical_crossentropy', metrics=['accuracy']) return model
def MusicTaggerCRNN(weights='msd', input_tensor=None, include_top=True): '''Instantiate the MusicTaggerCRNN architecture, optionally loading weights pre-trained on Million Song Dataset. Note that when using TensorFlow, for best performance you should set `image_dim_ordering="tf"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both TensorFlow and Theano. The dimension ordering convention used by the model is the one specified in your Keras config file. For preparing mel-spectrogram input, see `audio_conv_utils.py` in [applications](https://github.com/fchollet/keras/tree/master/keras/applications). You will need to install [Librosa](http://librosa.github.io/librosa/) to use it. # Arguments weights: one of `None` (random initialization) or "msd" (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. include_top: whether to include the 1 fully-connected layer (output layer) at the top of the network. If False, the network outputs 32-dim features. # Returns A Keras model instance. ''' if weights not in {'msd', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `msd` ' '(pre-training on Million Song Dataset).') # Determine proper input shape if K.image_dim_ordering() == 'th': input_shape = (1, 96, 1366) else: input_shape = (96, 1366, 1) if input_tensor is None: melgram_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): melgram_input = Input(tensor=input_tensor, shape=input_shape) else: melgram_input = input_tensor # Determine input axis if K.image_dim_ordering() == 'th': channel_axis = 1 freq_axis = 2 time_axis = 3 else: channel_axis = 3 freq_axis = 1 time_axis = 2 # Input block x = ZeroPadding2D(padding=(0, 37))(melgram_input) x = BatchNormalization(axis=time_axis, name='bn_0_freq')(x) # Conv block 1 x = Convolution2D(64, 3, 3, border_mode='same', name='conv1')(x) x = BatchNormalization(axis=channel_axis, mode=0, name='bn1')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x) # Conv block 2 x = Convolution2D(128, 3, 3, border_mode='same', name='conv2')(x) x = BatchNormalization(axis=channel_axis, mode=0, name='bn2')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x) # Conv block 3 x = Convolution2D(128, 3, 3, border_mode='same', name='conv3')(x) x = BatchNormalization(axis=channel_axis, mode=0, name='bn3')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x) # Conv block 4 x = Convolution2D(128, 3, 3, border_mode='same', name='conv4')(x) x = BatchNormalization(axis=channel_axis, mode=0, name='bn4')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x) # reshaping if K.image_dim_ordering() == 'th': x = Permute((3, 1, 2))(x) x = Reshape((15, 128))(x) # GRU block 1, 2, output x = GRU(32, return_sequences=True, name='gru1')(x) x = GRU(32, return_sequences=False, name='gru2')(x) if include_top: x = Dense(50, activation='sigmoid', name='output')(x) # Create model model = Model(melgram_input, x) with open("Music_Tagger.json", "w") as model_json: jsonObj = model.to_json() parsed = json.dumps(json.loads(jsonObj), indent=4) model_json.write(parsed) if weights is None: return model else: # Load weights if K.image_dim_ordering() == 'tf': weights_path = get_file( 'music_tagger_crnn_weights_tf_kernels_tf_dim_ordering.h5', TF_WEIGHTS_PATH, cache_subdir='models') else: weights_path = get_file( 'music_tagger_crnn_weights_tf_kernels_th_dim_ordering.h5', TH_WEIGHTS_PATH, cache_subdir='models') model.load_weights(weights_path, by_name=True) if K.backend() == 'theano': convert_all_kernels_in_model(model) return model
def L2X(datatype="mnist", train=True): # the whole thing is equation (5) x_train, y_train, x_val, y_val, input_shape = create_data() st1 = time.time() st2 = st1 print(input_shape) activation = 'relu' # P(S|X) we train the model on this, for capturing the important features. model_input = Input(shape=(input_shape, ), dtype='float32') net = Dense(256, activation=activation, name='s/dense1', kernel_regularizer=regularizers.l2(1e-3))(model_input) net = Dense(256, activation=activation, name='s/dense2', kernel_regularizer=regularizers.l2(1e-3))(net) # A tensor of shape, [batch_size, max_sents, 100] mid_dim = input_shape * num_groups logits = Dense(mid_dim)(net) # [BATCH_SIZE, max_sents, 1] k = 1 tau = 0.1 samples = Sample_Concrete(tau, k, input_shape, num_groups, name='sample')(logits) # samples = Reshape((num_groups, input_shape))(samples) samples = Reshape((input_shape, num_groups))(samples) samples = Permute((2, 1))(samples) def get_shape(input_shapes): return tuple((input_shapes[0], num_groups, input_shapes[1])) tile_layer = Lambda( lambda x: tf.tile(tf.expand_dims(x, axis=-2), [1, num_groups, 1]), output_shape=get_shape) model_input_tiled = tile_layer(model_input) #print(model_input_tiled.shape) new_input = Multiply()([model_input_tiled, samples]) get_first = Lambda(lambda x: x[:, 0, :], output_shape=lambda x: tuple((x[0], x[2]))) new_model_input = get_first(new_input) #new_model_input = Flatten()(new_input) #samples to be KD *1 and then make a matrix K*D and the K*D * D * 1 = K * 1 the new_model_input # 1) one nueral net that gives # 2) seperate neural net with one node as input. # q(X_S) variational family # new_model_input = Multiply()([model_input, samples]) # new_model_input = Dot(samples, model_input) '''def matmul_output_shape(input_shapes): shape1 = list(input_shapes[0]) shape2 = list(input_shapes[1]) return tuple((shape1[0], shape1[1])) matmul_layer = Lambda(lambda x: K.batch_dot(x[0], x[1]), output_shape=matmul_output_shape) new_model_input = matmul_layer([samples, model_input])''' print('heihei', new_model_input.shape) net = Dense(32, activation=activation, name='dense1', kernel_regularizer=regularizers.l2(1e-3))(new_model_input) net = BatchNormalization()(net) # Add batchnorm for stability. net = Dense(16, activation=activation, name='dense2', kernel_regularizer=regularizers.l2(1e-3))(net) net = BatchNormalization()(net) preds = Dense(10, activation='softmax', name='dense4', kernel_regularizer=regularizers.l2(1e-3))(net) model = Model(model_input, preds) model.summary() if train: adam = optimizers.Adam(lr=1e-3) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['acc']) filepath = "models/{}/L2X.hdf5".format(datatype) checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') callbacks_list = [checkpoint] model.fit(x_train, y_train, validation_data=(x_val, y_val), callbacks=callbacks_list, epochs=2, batch_size=BATCH_SIZE) st2 = time.time() else: model.load_weights('models/{}/L2X.hdf5'.format(datatype), by_name=True) pred_model = Model(model_input, samples) pred_model.compile(loss=None, optimizer='rmsprop', metrics=[None]) # For now samples is a matrix instead of a vector scores = pred_model.predict(x_val, verbose=1, batch_size=BATCH_SIZE) # We need to write a new compute_median_rank to do analysis # median_ranks = compute_median_rank(scores, k = ks[datatype], # datatype_val=datatype_val) median_ranks = compute_groups(scores) return median_ranks, time.time() - st2, st2 - st1, scores, x_val, y_val
def attention_3d_block(inputs): a = Permute((2, 1))(inputs)#实现转置** a = Dense(3, activation='softmax')(a)#全连接层,3个隐藏神经元 a_probs = Permute((2, 1))(a) output_attention_mul = Multiply()([inputs, a_probs])#矩阵相乘,注意力得分计算 return output_attention_mul
model.add(Activation('tanh')) pool1 = AveragePooling2D(pool_size=pool_size_1) model.add(pool1) conv2 = Convolution2D(nb_filters, kernel_size_2[0], kernel_size_2[1], border_mode='same') model.add(conv2) model.add(Activation('tanh')) pool2 = AveragePooling2D(pool_size=pool_size_2) model.add(pool2) model.add(Permute((2, 1, 3))) model.add(Reshape((200 * 40 * 59, ))) model.add(Dense(128)) model.add(Activation('tanh')) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.summary() plot(model, to_file='/home/xcat/experiment/BCI2008/ds1a/model/1*128sgd.png', show_shapes=True) #opt = RMSprop() #opt = Adam(lr=0.002) opt = SGD()
def get_net(): inputs = Input(shape=(img_h, img_w, N_channels)) # 网络结构定义 conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(inputs) conv1 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv1) pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool1) conv2 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv2) pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool2) conv3 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv3) pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool3) conv4 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv4) drop4 = Dropout(0.5)(conv4) pool4 = MaxPooling2D(pool_size=(2, 2))(drop4) conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(pool4) conv5 = Conv2D(1024, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv5) drop5 = Dropout(0.5)(conv5) up6 = Conv2D(512, 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2, 2))(drop5)) merge6 = concatenate([drop4, up6], axis=3) conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge6) conv6 = Conv2D(512, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv6) up7 = Conv2D(256, 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2, 2))(conv6)) merge7 = concatenate([conv3, up7], axis=3) conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge7) conv7 = Conv2D(256, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv7) up8 = Conv2D(128, 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2, 2))(conv7)) merge8 = concatenate([conv2, up8], axis=3) conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge8) conv8 = Conv2D(128, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv8) up9 = Conv2D(64, 2, activation='relu', padding='same', kernel_initializer='he_normal')(UpSampling2D(size=(2, 2))(conv8)) merge9 = concatenate([conv1, up9], axis=3) conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(merge9) conv9 = Conv2D(64, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9) conv9 = Conv2D(2, 3, activation='relu', padding='same', kernel_initializer='he_normal')(conv9) conv10 = Conv2D(C, (1, 1), activation='relu', kernel_initializer='he_normal')(conv9) reshape = Reshape((C, img_h * img_w), input_shape=(C, img_h, img_w))(conv10) reshape = Permute((2, 1))(reshape) activation = Activation('softmax')(reshape) model = Model(input=inputs, output=activation) model.compile(optimizer=Adam(lr=1.0e-4), loss='categorical_crossentropy', metrics=['accuracy']) return model
def isensee2017_model(input_shape=(4, 128, 128, 128), n_base_filters=16, depth=5, dropout_rate=0.3, n_segmentation_levels=3, n_labels=1, optimizer=Adam, initial_learning_rate=5e-4, loss_function=dice_coefficient_loss, activation_name="sigmoid", summation=False, **kargs): """ This function builds a model proposed by Isensee et al. for the BRATS 2017 competition: https://www.cbica.upenn.edu/sbia/Spyridon.Bakas/MICCAI_BraTS/MICCAI_BraTS_2017_proceedings_shortPapers.pdf This network is highly similar to the model proposed by Kayalibay et al. "CNN-based Segmentation of Medical Imaging Data", 2017: https://arxiv.org/pdf/1701.03056.pdf :param input_shape: :param n_base_filters: :param depth: :param dropout_rate: :param n_segmentation_levels: :param n_labels: :param optimizer: :param initial_learning_rate: :param loss_function: :param activation_name: :return: """ metrics = ['binary_accuracy', vod_coefficient] if loss_function != dice_coefficient_loss: metrics += [dice_coefficient] inputs = Input(input_shape) inputs_p = Permute((3, 1, 2))(inputs) current_layer = inputs_p level_output_layers = list() level_filters = list() for level_number in range(depth): n_level_filters = (2**level_number) * n_base_filters level_filters.append(n_level_filters) if current_layer is inputs_p: in_conv = create_convolution_block(current_layer, n_level_filters) else: in_conv = create_convolution_block(current_layer, n_level_filters, strides=(2, 2)) context_output_layer = create_context_module(in_conv, n_level_filters, dropout_rate=dropout_rate) summation_layer = Add()([in_conv, context_output_layer]) level_output_layers.append(summation_layer) current_layer = summation_layer segmentation_layers = list() for level_number in range(depth - 2, -1, -1): up_sampling = create_up_sampling_module(current_layer, level_filters[level_number]) concatenation_layer = concatenate( [level_output_layers[level_number], up_sampling], axis=1) localization_output = create_localization_module( concatenation_layer, level_filters[level_number]) current_layer = localization_output if level_number < n_segmentation_levels: segmentation_layers.insert(0, Conv2D(n_labels, (1, 1))(current_layer)) if summation: output_layer = None for level_number in reversed(range(n_segmentation_levels)): segmentation_layer = segmentation_layers[level_number] if output_layer is None: output_layer = segmentation_layer else: output_layer = Add()([output_layer, segmentation_layer]) if level_number > 0: output_layer = UpSampling2D(size=(2, 2))(output_layer) else: output_layer = segmentation_layers[0] activation_block = Activation(activation_name)(output_layer) activation_block = Permute((2, 3, 1))(activation_block) model = Model(inputs=inputs, outputs=activation_block) model.compile(optimizer=optimizer(lr=initial_learning_rate), loss=loss_function, metrics=metrics) return model
def basic_crnn_2d(rows, cols, channels, num_classes): kernel_size_7 = (7, 7) kernel_size_5 = (5, 5) kernel_size_3 = (3, 3) pool_size = (3, 3) activ = 'relu' input_1 = Input(shape=[rows, cols, channels]) input_2 = Input(shape=[row, cols, channels]) print input_1.shape print input_2.shape x = Conv2D(16, kernel_size=kernel_size_7, padding='same')(input_1) x = BatchNormalization()(x) x = Activation(activ)(x) x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x) print x.shape x = Conv2D(32, kernel_size=kernel_size_5, padding='same')(x) x = BatchNormalization()(x) x = Activation(activ)(x) x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x) print x.shape x = Conv2D(32, kernel_size=kernel_size_3, padding='same')(x) x = BatchNormalization()(x) x = Activation(activ)(x) x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x) print x.shape x = Conv2D(32, kernel_size=kernel_size_3, padding='same')(x) x = BatchNormalization()(x) x = Activation(activ)(x) x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x) print x.shape x = Conv2D(32, kernel_size=kernel_size_3, padding='same')(x) x = BatchNormalization()(x) x = Activation(activ)(x) x = MaxPooling2D(pool_size, strides=(2, 1), padding='same')(x) print x.shape x = Permute((2, 1, 3))(x) x = Reshape((126, 5 * 32))(x) print x.shape x = Bidirectional(CuDNNGRU(126, return_sequences=True))(x) x = Bidirectional(CuDNNGRU(126, return_sequences=False))(x) print x.shape #x = Dropout(0.25) (x) final = Dense(num_classes)(x) outputs = Activation('sigmoid', name='target')(final) model = Model([input_1], [outputs]) model.compile(optimizer=opt, loss=['binary_crossentropy'], metrics=acc_dcf_metric_list) return model
def build_model(embeddings): # input representation features words_input = Input(shape=[SEQUENCE_LEN], dtype='int32') pos1_input = Input(shape=[SEQUENCE_LEN], dtype='int32') pos2_input = Input(shape=[SEQUENCE_LEN], dtype='int32') tags_input = Input(shape=[SEQUENCE_LEN], dtype='int32') chars_input = Input(shape=[SEQUENCE_LEN, WORD_LEN], dtype='int32') segs_input = Input(shape=[SEQUENCE_LEN, 3], dtype='float32') # lexical features e1_input = Input(shape=[ENTITY_LEN], dtype='int32') # L1 e2_input = Input(shape=[ENTITY_LEN], dtype='int32') # L2 e1context_input = Input(shape=[2], dtype='int32') # L3 e2context_input = Input(shape=[2], dtype='int32') # L4 # word embedding we = embeddings["word_embeddings"] words_embed = Embedding(we.shape[0], we.shape[1], weights=[we], trainable=False) words = words_embed(words_input) e1 = words_embed(e1_input) e2 = words_embed(e2_input) e1context = words_embed(e1context_input) e2context = words_embed(e2context_input) # lexical feature e1_flat = Flatten()(e1) e2_flat = Flatten()(e2) e1context_flat = Flatten()(e1context) e2context_flat = Flatten()(e2context) # position embedding pe1 = embeddings["position_embeddings_1"] pos1 = Embedding(pe1.shape[0], pe1.shape[1], weights=[pe1])(pos1_input) pe2 = embeddings["position_embeddings_2"] pos2 = Embedding(pe2.shape[0], pe2.shape[1], weights=[pe2])(pos2_input) # tag embedding te = embeddings["tag_embeddings"] tags = Embedding(te.shape[0], te.shape[1], weights=[te])(tags_input) # character embedding ce = embeddings["char_embeddings"] chars = Embedding(ce.shape[0], ce.shape[1], weights=[ce], trainable=False)(chars_input) # character-level convolution char_feature = Conv2D(filters=NB_FILTERS_CHAR, kernel_size=(1, WINDOW_SIZE_CHAR), padding="same", activation="relu", kernel_initializer=TruncatedNormal(stddev=0.1), bias_initializer=Constant(0.1), )(chars) char_feature = CharLevelPooling()(char_feature) # input representation input_repre = Concatenate()([words, pos1, pos2, tags, char_feature]) input_repre = Dropout(DROPOUT)(input_repre) # input attention e1_conved = Conv1D(filters=WORD_EMBED_SIZE, kernel_size=ENTITY_LEN, padding="valid", activation="relu", kernel_initializer=TruncatedNormal(stddev=0.1), bias_initializer=Constant(0.1))(e1) e1_conved = Reshape([WORD_EMBED_SIZE])(e1_conved) e1_repeat = RepeatVector(SEQUENCE_LEN)(e1_conved) e2_conved = Conv1D(filters=WORD_EMBED_SIZE, kernel_size=ENTITY_LEN, padding="valid", activation="relu", kernel_initializer=TruncatedNormal(stddev=0.1), bias_initializer=Constant(0.1))(e2) e2_conved = Reshape([WORD_EMBED_SIZE])(e2_conved) e2_repeat = RepeatVector(SEQUENCE_LEN)(e2_conved) concat = Concatenate()([words, e1_repeat, e2_repeat]) alpha = Dense(1, activation="softmax")(concat) alpha = Reshape([SEQUENCE_LEN])(alpha) alpha = RepeatVector(WORD_REPRE_SIZE)(alpha) alpha = Permute([2, 1])(alpha) input_repre = Multiply()([input_repre, alpha]) # word-level convolution input_conved = Conv1D(filters=NB_FILTERS_WORD, kernel_size=WINDOW_SIZE_WORD, padding="same", activation="relu", kernel_initializer=TruncatedNormal(stddev=0.1), bias_initializer=Constant(0.1))(input_repre) # input_pooled = GlobalMaxPool1D()(input_conved) input_pooled = PiecewiseMaxPool()([input_conved, segs_input]) # fully connected output = Concatenate()([input_pooled, e1_flat, e2_flat, e1context_flat, e2context_flat]) output = Dropout(DROPOUT)(output) output = Dense( units=NB_RELATIONS, activation="softmax", kernel_initializer=TruncatedNormal(stddev=0.1), bias_initializer=Constant(0.1), kernel_regularizer='l2', bias_regularizer='l2', )(output) model = Model(inputs=[words_input, pos1_input, pos2_input, e1_input, e2_input, e1context_input, e2context_input, tags_input, chars_input, segs_input], outputs=[output]) model.compile(loss="sparse_categorical_crossentropy", metrics=["accuracy"], optimizer='adam') # model.summary() return model
Merge([story_encoder_m, question_encoder], mode="dot", dot_axes=[2, 2])) # encode story into vector space of question # output dim: (None, story_maxlen, query_maxlen) story_encoder_c = Sequential() story_encoder_c.add( Embedding(input_dim=vocab_size, output_dim=question_maxlen, input_length=story_maxlen)) story_encoder_c.add(Dropout(0.3)) # combine match and story vectors. # Output dim: (None, query_maxlen, story_maxlen) response = Sequential() response.add(Merge([match, story_encoder_c], mode="sum")) response.add(Permute((2, 1))) ## combine response and question vectors and do logistic regression answer = Sequential() answer.add(Merge([response, question_encoder], mode="concat", concat_axis=-1)) answer.add(LSTM(LSTM_OUTPUT_SIZE)) answer.add(Dropout(0.3)) answer.add(Dense(vocab_size)) answer.add(Activation("softmax")) answer.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"]) answer.fit([Xs_train, Xq_train, Xs_train], Y_train,
def attention(inputs, length): layer = Permute((2, 1))(inputs) layer = Dense(length, activation='softmax')(layer) layer = Permute((2, 1))(layer) return Multiply()([inputs, layer])
input_encoder_c.add(Dropout(0.3)) # embed the question into a sequence of vectors question_encoder = Sequential() question_encoder.add( Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen)) question_encoder.add(Dropout(0.3)) input_encoded_m = input_encoder_m(input_sequence) input_encoded_c = input_encoder_c(input_sequence) question_encoded = question_encoder(question) match = dot([input_encoded_m, question_encoded], axes=(2, 2)) match = Activation('softmax')(match) response = add([match, input_encoded_c]) response = Permute((2, 1))(response) answer = concatenate([response, question_encoded]) answer = LSTM(32)(answer) # (samples, 32) answer = Dropout(0.3)(answer) answer = Dense(vocab_size)(answer) answer = Activation('softmax')(answer) # building the model model = Model([input_sequence, question], answer) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) # train model.fit([inputs_train, queries_train],
def objective(dropout_val, dropout_layers, lstm_val, batch_size, create_save_plot = False): # placeholders input_sequence = Input((story_maxlen,)) question = Input((query_maxlen,)) # encoders # embed the input sequence into a sequence of vectors input_encoder_m = Sequential() input_encoder_m.add(Embedding(input_dim=vocab_size, output_dim=64)) input_encoder_m.add(Dropout(dropout_val)) # output: (samples, story_maxlen, embedding_dim) # embed the input into a sequence of vectors of size query_maxlen input_encoder_c = Sequential() input_encoder_c.add(Embedding(input_dim=vocab_size, output_dim=query_maxlen)) input_encoder_c.add(Dropout(dropout_val)) # output: (samples, story_maxlen, query_maxlen) # embed the question into a sequence of vectors question_encoder = Sequential() question_encoder.add(Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen)) question_encoder.add(Dropout(dropout_val)) # output: (samples, query_maxlen, embedding_dim) # encode input sequence and questions (which are indices) # to sequences of dense vectors input_encoded_m = input_encoder_m(input_sequence) input_encoded_c = input_encoder_c(input_sequence) question_encoded = question_encoder(question) # compute a 'match' between the first input vector sequence # and the question vector sequence # shape: `(samples, story_maxlen, query_maxlen)` match = dot([input_encoded_m, question_encoded], axes=(2, 2)) match = Activation('softmax')(match) # add the match matrix with the second input vector sequence response = add([match, input_encoded_c]) # (samples, story_maxlen, query_maxlen) response = Permute((2, 1))(response) # (samples, query_maxlen, story_maxlen) # concatenate the match matrix with the question vector sequence answer = concatenate([response, question_encoded]) # the original paper uses a matrix multiplication for this reduction step. # we choose to use a RNN instead. answer = LSTM(int(lstm_val))(answer) # (samples, 32) # one regularization layer -- more would probably be needed. for i in range((int)(dropout_layers)): answer = Dropout(dropout_val)(answer) answer = Dropout(dropout_val)(answer) answer = Dense(vocab_size)(answer) # (samples, vocab_size) # we output a probability distribution over the vocabulary answer = Activation('softmax')(answer) # build the final model model = Model([input_sequence, question], answer) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) # train callback = [ModelCheckpoint('weights.hdf5', save_best_only=True, save_weights_only=True, period = 10)] history = model.fit([inputs_train, queries_train], answers_train, batch_size=int(batch_size), epochs=300, validation_data=([inputs_test, queries_test], answers_test), callbacks = callback) print(history.history.keys()) # summarize history for accuracy if create_save_plot: plt.plot(history.history['acc']) plt.plot(history.history['val_acc']) plt.title('model accuracy') plt.ylabel('accuracy') plt.xlabel('epoch') plt.legend(['train', 'test'], loc='upper left') plt.savefig('accuracy_epochs.png', dpi=200) return model, history
def get_model_lstm_conv2d(input_profile_names, target_profile_names, scalar_input_names, actuator_names, lookbacks, lookahead, profile_length, std_activation, **kwargs): profile_inshape = (profile_lookback, profile_length) actuator_inshape = (actuator_lookback + lookahead,) num_profiles = len(input_profile_names) num_targets = len(target_profile_names) num_actuators = len(actuator_names) max_channels = 32 profile_inputs = [] profiles = [] for i in range(num_profiles): profile_inputs.append( Input(profile_inshape, name='input_' + input_profile_names[i])) profiles.append(Reshape((profile_lookback, profile_length, 1)) (profile_inputs[i])) profiles = Concatenate(axis=-1)(profiles) # shape = (lookback, length, channels=num_profiles) profiles = Conv2D(filters=int(num_profiles*max_channels/8), kernel_size=(1, 5), strides=(1, 1), padding='same', activation=std_activation)(profiles) profiles = Conv2D(filters=int(num_profiles*max_channels/4), kernel_size=(1, 10), strides=(1, 1), padding='same', activation=std_activation)(profiles) profiles = Conv2D(filters=int(num_profiles*max_channels), kernel_size=(1, 15), strides=(1, 1), padding='same', activation=std_activation)(profiles) # shape = (lookback, length, channels) if profile_lookback > 1: profiles = Reshape((profile_lookback, 1, profile_length, int(num_profiles*max_channels)))(profiles) # shape = (lookback, 1, length, channels) profiles = ConvLSTM2D(filters=int(num_profiles*max_channels), kernel_size=(10, 1), strides=(1, 1), padding='same', activation=std_activation, recurrent_activation='hard_sigmoid')(profiles) #shape = (1, length, channels) profiles = Reshape((profile_length, int( num_profiles*max_channels)))(profiles) # shape = (length, channels) else: profiles = Conv2D(filters=int(num_profiles*max_channels), kernel_size=(1, 10), strides=(1, 1), padding='same', activation=std_activation)(profiles) profiles = Reshape((profile_length, int( num_profiles*max_channels)))(profiles) # shape = (length, channels) actuator_inputs = [] actuators = [] for i in range(num_actuators): actuator_inputs.append( Input(actuator_inshape, name='input_' + actuator_names[i])) actuators.append( Reshape((actuator_lookback+lookahead, 1))(actuator_inputs[i])) actuators = Concatenate(axis=-1)(actuators) # shaoe = (time, num_actuators) actuators = Dense(units=int(num_profiles*max_channels/8), activation=std_activation)(actuators) actuators = Conv1D(filters=int(num_profiles*max_channels/4), kernel_size=3, strides=1, padding='causal', activation=std_activation)(actuators) actuators = LSTM(units=int(num_profiles*max_channels), activation=std_activation, recurrent_activation='hard_sigmoid')(actuators) actuators = Reshape((int(num_profiles*max_channels), 1))(actuators) # shape = (channels, 1) actuators = Dense(units=profile_length, activation=std_activation)(actuators) actuators = Dense(units=profile_length, activation=None)(actuators) # shape = (channels, profile_length) actuators = Permute(dims=(2, 1))(actuators) # shape = (profile_length, channels) merged = Add()([profiles, actuators]) merged = Reshape((1, profile_length, int( num_profiles*max_channels)))(merged) # shape = (1, length, channels) prof_act = [] for i in range(num_targets): prof_act.append(Conv2D(filters=max_channels, kernel_size=(1, 15), strides=(1, 1), padding='same', activation=std_activation)(merged)) # shape = (1,length,max_channels) prof_act[i] = Conv2D(filters=int(max_channels/4), kernel_size=(1, 15), strides=(1, 1), padding='same', activation=std_activation)(prof_act[i]) prof_act[i] = Conv2D(filters=int(max_channels/8), kernel_size=(1, 10), strides=(1, 1), padding='same', activation=std_activation)(prof_act[i]) prof_act[i] = Conv2D(filters=1, kernel_size=(1, 5), strides=(1, 1), padding='same', activation=None)(prof_act[i]) # shape = (1,length,1) prof_act[i] = Reshape((profile_length,), name='target_' + target_profile_names[i])(prof_act[i]) model = Model(inputs=profile_inputs + actuator_inputs, outputs=prof_act) return model
parser.add_argument('--env-name', type=str, default='BreakoutDeterministic-v4') parser.add_argument('--weights', type=str, default=None) args = parser.parse_args() # Gymの環境宣言 env = gym.make(args.env_name) np.random.seed(123) env.seed(123) nb_actions = env.action_space.n # モデル構築 input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE model = Sequential() if K.image_dim_ordering() == 'tf': # (width, height, channels) model.add(Permute((2, 3, 1), input_shape=input_shape)) elif K.image_dim_ordering() == 'th': # (channels, width, height) model.add(Permute((1, 2, 3), input_shape=input_shape)) else: raise RuntimeError('Unknown image_dim_ordering.') model.add(Conv2D(filters=32, kernel_size=(8, 8), strides=(4, 4), activation="relu", input_shape=(*INPUT_SHAPE, WINDOW_LENGTH))) model.add(Conv2D(filters=64, kernel_size=(4, 4), strides=(2, 2), activation="relu")) model.add(Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), activation="relu")) model.add(Flatten()) model.add(Dense(512, activation="relu")) model.add(Dense(nb_actions, activation="linear")) print(model.summary()) # memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
def get_model_conv2d(input_profile_names, target_profile_names, scalar_input_names, actuator_names, lookbacks, lookahead, profile_length, std_activation, **kwargs): max_profile_lookback = 0 for sig in input_profile_names: if lookbacks[sig] > max_profile_lookback: max_profile_lookback = lookbacks[sig] max_actuator_lookback = 0 for sig in actuator_names: if lookbacks[sig] > max_actuator_lookback: max_actuator_lookback = lookbacks[sig] max_scalar_lookback = 0 for sig in scalar_input_names: if lookbacks[sig] > max_scalar_lookback: max_scalar_lookback = lookbacks[sig] num_profiles = len(input_profile_names) num_targets = len(target_profile_names) num_actuators = len(actuator_names) num_scalars = len(scalar_input_names) if 'max_channels' in kwargs: max_channels = kwargs['max_channels'] else: max_channels = 32 profile_inputs = [] profiles = [] for i in range(num_profiles): profile_inputs.append( Input((lookbacks[input_profile_names[i]], profile_length), name='input_' + input_profile_names[i])) profiles.append(Reshape((max_profile_lookback, profile_length, 1)) (ZeroPadding1D(padding=(max_profile_lookback - lookbacks[input_profile_names[i]], 0))(profile_inputs[i]))) profiles = Concatenate(axis=-1)(profiles) # shape = (lookback, length, channels=num_profiles) profiles = Conv2D(filters=int(num_profiles*max_channels/8), kernel_size=(1, int(profile_length/12)), strides=(1, 1), padding='same', activation=std_activation)(profiles) profiles = Conv2D(filters=int(num_profiles*max_channels/4), kernel_size=(1, int(profile_length/8)), strides=(1, 1), padding='same', activation=std_activation)(profiles) profiles = Conv2D(filters=int(num_profiles*max_channels/2), kernel_size=(1, int(profile_length/6)), strides=(1, 1), padding='same', activation=std_activation)(profiles) profiles = Conv2D(filters=int(num_profiles*max_channels), kernel_size=(1, int(profile_length/4)), strides=(1, 1), padding='same', activation=std_activation)(profiles) # shape = (lookback, length, channels) if max_profile_lookback > 1: profiles = Conv2D(filters=int(num_profiles*max_channels), kernel_size=(profile_lookback, 1), strides=(1, 1), padding='valid', activation=std_activation)(profiles) profiles = Reshape((profile_length, int( num_profiles*max_channels)))(profiles) # shape = (length, channels) if num_scalars > 0: scalar_inputs = [] scalars = [] for i in range(num_scalars): scalar_inputs.append( Input((lookbacks[scalar_input_names[i]],), name='input_' + scalar_input_names[i])) scalars.append(Reshape((lookbacks[scalar_input_names[i]],1))(scalar_inputs[i])) scalars[i] = ZeroPadding1D(padding=(max_scalar_lookback - lookbacks[scalar_input_names[i]], 0))(scalars[i]) scalars = Concatenate(axis=-1)(scalars) # shaoe = (time, num_actuators) scalars = Dense(units=int(num_profiles*max_channels/8), activation=std_activation)(scalars) # actuators = Conv1D(filters=int(num_profiles*max_channels/8), kernel_size=3, strides=1, # padding='causal', activation=std_activation)(actuators) scalars = Dense(units=int(num_profiles*max_channels/4), activation=std_activation)(scalars) # actuators = Conv1D(filters=int(num_profiles*max_channels/4), kernel_size=3, strides=1, # padding='causal', activation=std_activation)(actuators) scalars = Dense(units=int(num_profiles*max_channels/2), activation=std_activation)(scalars) scalars = LSTM(units=int(num_profiles*max_channels), activation=std_activation, recurrent_activation='hard_sigmoid')(scalars) scalars = Reshape((int(num_profiles*max_channels), 1))(scalars) # shape = (channels, 1) scalars = Dense(units=int(profile_length/4), activation=std_activation)(scalars) scalars = Dense(units=int(profile_length/2), activation=std_activation)(scalars) scalars = Dense(units=profile_length, activation=None)(scalars) # shape = (channels, profile_length) scalars = Permute(dims=(2, 1))(scalars) # shape = (profile_length, channels) actuator_future_inputs = [] actuator_past_inputs = [] actuators = [] for i in range(num_actuators): actuator_future_inputs.append( Input((lookahead, ), name='input_future_' + actuator_names[i])) actuator_past_inputs.append( Input((lookbacks[actuator_names[i]], ), name='input_past_' + actuator_names[i])) actuators.append(Concatenate( axis=1)([actuator_past_inputs[i], actuator_future_inputs[i]])) actuators[i] = Reshape( (lookbacks[actuator_names[i]]+lookahead, 1))(actuators[i]) actuators[i] = ZeroPadding1D(padding=(max_actuator_lookback - lookbacks[actuator_names[i]], 0))(actuators[i]) actuators = Concatenate(axis=-1)(actuators) # shaoe = (time, num_actuators) actuators = Dense(units=int(num_profiles*max_channels/8), activation=std_activation)(actuators) # actuators = Conv1D(filters=int(num_profiles*max_channels/8), kernel_size=3, strides=1, # padding='causal', activation=std_activation)(actuators) actuators = Dense(units=int(num_profiles*max_channels/4), activation=std_activation)(actuators) # actuators = Conv1D(filters=int(num_profiles*max_channels/4), kernel_size=3, strides=1, # padding='causal', activation=std_activation)(actuators) actuators = Dense(units=int(num_profiles*max_channels/2), activation=std_activation)(actuators) actuators = LSTM(units=int(num_profiles*max_channels), activation=std_activation, recurrent_activation='hard_sigmoid')(actuators) actuators = Reshape((int(num_profiles*max_channels), 1))(actuators) # shape = (channels, 1) actuators = Dense(units=int(profile_length/4), activation=std_activation)(actuators) actuators = Dense(units=int(profile_length/2), activation=std_activation)(actuators) actuators = Dense(units=profile_length, activation=None)(actuators) # shape = (channels, profile_length) actuators = Permute(dims=(2, 1))(actuators) # shape = (profile_length, channels) if num_scalars > 0: merged = Add()([profiles, actuators, scalars]) else: merged = Add()([profiles, actuators]) merged = Reshape((1, profile_length, int( num_profiles*max_channels)))(merged) # shape = (1, length, channels) prof_act = [] for i in range(num_targets): prof_act.append(Conv2D(filters=max_channels, kernel_size=(1, int(profile_length/4)), strides=(1, 1), padding='same', activation=std_activation)(merged)) # shape = (1,length,max_channels) prof_act[i] = Conv2D(filters=int(max_channels/2), kernel_size=(1, int(profile_length/8)), strides=(1, 1), padding='same', activation=std_activation)(prof_act[i]) prof_act[i] = Conv2D(filters=int(max_channels/4), kernel_size=(1, int(profile_length/6)), strides=(1, 1), padding='same', activation=std_activation)(prof_act[i]) prof_act[i] = Conv2D(filters=int(max_channels/8), kernel_size=(1, int(profile_length/4)), strides=(1, 1), padding='same', activation=std_activation)(prof_act[i]) prof_act[i] = Conv2D(filters=1, kernel_size=(1, int(profile_length/4)), strides=(1, 1), padding='same', activation=None)(prof_act[i]) # shape = (1,length,1) if kwargs.get('predict_mean'): prof_act[i] = GlobalAveragePooling2D()(prof_act[i]) else: prof_act[i] = Reshape((profile_length,), name='target_' + target_profile_names[i])(prof_act[i]) model_inputs = profile_inputs + actuator_past_inputs + actuator_future_inputs if num_scalars > 0: model_inputs += scalar_inputs model_outputs = prof_act model = Model(inputs=model_inputs, outputs=model_outputs) return model
def build_dmn_model(num_story_sentences, story_sentence_length, question_length, answer_length, embedding_matrix, recur_size=256, recurrent_layers=1, iterations=3, gate_dense_size=128, use_mem_gru=False, gate_supervision=True, return_att_model=False, reuse_ep_encoder_state=False, apply_attention_to_hidden_state=True): """ Build and return a Dynamic Memory Network. For details on this architecture, please see: https://arxiv.org/abs/1506.07285. Args: num_story_sentences: The number of sentences in the story. story_sentence_length: The number of tokens in each of those sentences. question_length: The number of tokens in each question. answer_length: The number of tokens in each answer. embedding_matrix: A numpy matrix with shape `(vocab_size, embedding_size)` recur_size: The size of the hidden space used by the recurrent layers. recurrent_layers: The number of stacked recurrent layers to use. iterations: The number of passes the episode generating GRU makes over the input. gate_dense_size: The number of hidden units in the dense network that generates the attention gate weights. use_mem_gru: Whether or not to consolidate the episodic memories into a memory vector that is retained after each iteration. This is how they did it in the paper, but I found the network converged faster without it. Defaults to `False`. gate_supervision: Whether to build the network so that the gate weights of the final iteration are part of the output and loss function for the training model. If this option is used, the correct gate weights must be passed in during training. Defaults to `True`. return_att_model: Return an additional model that can but used to get the attention weights during prediction. These you to debug/visualize the attention gate weights on arbitrary inputs. Defaults to `False`. reuse_ep_encoder_state: Whether or not to reset the state of the episode generating GRU after each iteration. Defaults to `False`. apply_attention_to_hidden_state: If `True`, attention weights are applied to the hidden states of the episode generating GRU between timesteps. If a sentence has an attention weight of 1, then the hidden state resulting from that timestep is passed forward unmodified. If its weight is 0, then the hidden state from the previous timestep is passed forward and the sentence at the current timestep has no impact on the network. Values between 0 and 1 will have intermediate effects. If `False`, then the attention weights are applied to the the sentence vectors directly before they're feed to the episode generating GRU. Defaults to `True` as this was the architecture used in the original paper. Returns: (tuple): tuple containing: train_model: The Keras model used to train the weights. encoder_model: The Keras model used to encode input during prediction. decoder_model: The Keras model used to decode input during prediction. attention_model: Optionally, a model that can output the attention gate weights for arbitrary input. """ story_sent_inputs = [ Input(shape=(story_sentence_length, ), name=f'story_sentence_{i}_input') for i in range(num_story_sentences) ] question_input = Input(shape=(question_length, ), name='question_input') decoder_input = Input(shape=(answer_length, ), name='decoder_input') embedding_lookup = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], weights=[embedding_matrix], trainable=False, name='embedding_lookup') # # Setup reused layers. # encoders = [] for i in range(recurrent_layers): return_seq = False if i == recurrent_layers - 1 else True encoders.append( GRU(recur_size, return_sequences=return_seq, return_state=True, name=f'encoder_{i}')) ep_encoders = [] ep_gru_class = EpisodicGRU if apply_attention_to_hidden_state else GRU for i in range(recurrent_layers): return_seq = False if i == recurrent_layers - 1 else True ep_encoders.append( ep_gru_class(recur_size, return_sequences=return_seq, return_state=True, name=f'ep_encoder_{i}')) if use_mem_gru: mem_gru = GRU(recur_size) decoders = [] for i in range(recurrent_layers): decoders.append( GRU(recur_size, return_state=True, return_sequences=True, name=f'decoder_{i}')) gate_dense1 = Dense(gate_dense_size, activation='tanh') gate_dense2 = Dense(1, activation='sigmoid') # Sigmoid makes more sense. word_predictor = Dense(embedding_matrix.shape[0], activation='softmax', name='word_predictor') flatten = Flatten() repeat_recur_size_times = RepeatVector(recur_size) permute = Permute((2, 1)) repeat_num_sents_times = RepeatVector(num_story_sentences) repeat_once = RepeatVector(1) # # Encode input. # # Encode story sents. initial_state = None encoded_sents = [] for sent in story_sent_inputs: x = embedding_lookup(sent) for encoder in encoders: x, state = encoder(x, initial_state) if initial_state is None: initial_state = K.zeros_like(state) encoded_sents.append(x) # Merge encoded sents. reshaped_sents = [repeat_once(sent) for sent in encoded_sents] merged_sents = concatenate(reshaped_sents, axis=1) # Encode question. x = embedding_lookup(question_input) for encoder in encoders: x, _ = encoder(x, initial_state=initial_state) question_vector = x # # Generate memory vector. # per_layer_memory_vectors = [question_vector] * recur_size per_layer_episodes = [initial_state] * len(ep_encoders) question_vectors = repeat_num_sents_times(question_vector) pointwise1 = layers.multiply([merged_sents, question_vectors]) delta1 = layers.subtract([merged_sents, question_vectors]) delta1 = layers.multiply([delta1, delta1]) attention_outputs = [] for iteration in range(iterations): # Calculate the weights for the gate vectors. memory_vectors = repeat_num_sents_times(per_layer_memory_vectors[-1]) pointwise2 = layers.multiply([merged_sents, memory_vectors]) delta2 = layers.subtract([merged_sents, memory_vectors]) delta2 = layers.multiply([delta2, delta2]) gate_feature_vectors = concatenate( [pointwise1, pointwise2, delta1, delta2]) x = gate_dense1(gate_feature_vectors) attention_weights = gate_dense2(x) # Shape: (None, num_story_sents, 1) flattened_attention_weights = flatten( attention_weights) # Shape: (None, num_story_sents) attention_outputs.append(flattened_attention_weights) if apply_attention_to_hidden_state: ep_encoder_input = concatenate([merged_sents, attention_weights], axis=-1) else: x = repeat_recur_size_times( x) # Shape: (None, recur_size, num_story_sents) gate_weights = permute(x) # (None, num_story_sents, recur_size) ep_encoder_input = layers.multiply([merged_sents, gate_weights]) new_per_layer_episodes = [] x = ep_encoder_input for ep_encoder, prev_ep in zip(ep_encoders, per_layer_episodes): state = prev_ep if reuse_ep_encoder_state else initial_state x, episode = ep_encoder(x, initial_state=state) new_per_layer_episodes.append(episode) per_layer_episodes = new_per_layer_episodes # TODO: If you're using the multi-layer and mem_gru options # together, the way the mem_gru is resused here is not ideal. if use_mem_gru: new_per_layer_memory_vectors = [] for memory_vector, episode in zip(per_layer_memory_vectors, per_layer_episodes): episode = repeat_once(episode) memory_vector = mem_gru(episode, initial_state=memory_vector) new_per_layer_memory_vectors.append(memory_vector) per_layer_memory_vectors = new_per_layer_memory_vectors else: per_layer_memory_vectors = per_layer_episodes # Decode answer. repeated_question = RepeatVector(answer_length)(question_vector) x = embedding_lookup(decoder_input) x = concatenate([x, repeated_question]) for decoder, memory_vector in zip(decoders, per_layer_memory_vectors): x, _ = decoder(x, initial_state=memory_vector) answer_prediction = word_predictor(x) # # Build models. # # Build training model. inputs = story_sent_inputs + [question_input, decoder_input] outputs = [answer_prediction] losses = ['sparse_categorical_crossentropy'] if gate_supervision: outputs.append(attention_outputs[-1]) losses.append('binary_crossentropy') train_model = Model(inputs=inputs, outputs=outputs) train_model.compile(loss=losses, optimizer='rmsprop', metrics=['accuracy']) # Build encoder model. inputs = story_sent_inputs + [question_input] outputs = per_layer_memory_vectors + [question_vector] encoder_model = Model(inputs=inputs, outputs=outputs) # Build decoder model. decoder_prev_predict_input = Input(shape=(1, ), name='decoder_prev_predict_input') decoder_question_input = Input(shape=(recur_size, ), name='decoder_question_input') decoder_state_inputs = [ Input(shape=(recur_size, ), name=f'decoder_state_input_{i}') for i in range(recurrent_layers) ] x = embedding_lookup(decoder_prev_predict_input) repeated_question = repeat_once(decoder_question_input) x = concatenate([x, repeated_question]) decoder_states = [] for decoder, decoder_state_input in zip(decoders, decoder_state_inputs): x, decoder_state = decoder(x, initial_state=decoder_state_input) decoder_states.append(decoder_state) x = word_predictor(x) inputs = [decoder_prev_predict_input, decoder_question_input ] + decoder_state_inputs outputs = [x] + decoder_states decoder_model = Model(inputs=inputs, outputs=outputs) models = [train_model, encoder_model, decoder_model] # Build attention mdoel. if return_att_model: inputs = story_sent_inputs + [question_input] att_model = Model(inputs=inputs, outputs=attention_outputs) models.append(att_model) return models
def get_brats_nets(n_channels, filters_list, kernel_size_list, nlabels, dense_size, drop=0.5): # Init n_blocks = len(filters_list) input_shape = (n_channels, ) + (n_blocks * 2 + 3, ) * 3 inputs = Input(shape=input_shape, name='seg_inputs') cnn_tensor = inputs fcnn_tensor = inputs unet_tensor = inputs ucnn_tensor = inputs unet_list = list() ucnn_list = list() for i, (filters, kernel_size) in enumerate(zip(filters_list, kernel_size_list)): conv_cnn = Conv3D(filters, kernel_size=kernel_size, activation='relu', data_format='channels_first') cnn_tensor = Dropout(drop)(conv_cnn(cnn_tensor)) conv_fcnn = Conv3D(filters, kernel_size=kernel_size, activation='relu', data_format='channels_first') fcnn_tensor = Dropout(drop)(conv_fcnn(fcnn_tensor)) conv_unet = Conv3D(filters, kernel_size=kernel_size, activation='relu', data_format='channels_first') unet_tensor = Dropout(drop)(conv_unet(unet_tensor)) unet_list.append(unet_tensor) conv_ucnn = Conv3D(filters, kernel_size=kernel_size, activation='relu', data_format='channels_first') ucnn_tensor = Dropout(drop)(conv_ucnn(ucnn_tensor)) ucnn_list.append(ucnn_tensor) # > Convolutional only stuff # CNN cnn_tensor = Dense(dense_size)(Flatten()(cnn_tensor)) cnn_tensor = Dense(nlabels)(cnn_tensor) # FCNN fcnn_tensor = Conv3D(dense_size, kernel_size=(1, 1, 1), data_format='channels_first')(fcnn_tensor) fcnn_tensor = Conv3D(nlabels, kernel_size=(1, 1, 1), data_format='channels_first')(fcnn_tensor) fcnn_tensor = Reshape((nlabels, -1))(fcnn_tensor) fcnn_tensor = Permute((2, 1))(fcnn_tensor) # > U-stuff deconv_zip = zip(filters_list[-2::-1], kernel_size_list[-2::-1], unet_list[-2::-1], ucnn_list[-2::-1]) unet_tensor = Conv3DTranspose(filters_list[-1], kernel_size=kernel_size_list[-1], activation='relu', data_format='channels_first')(unet_tensor) ucnn_tensor = Conv3DTranspose(filters_list[-1], kernel_size=kernel_size_list[-1], activation='relu', data_format='channels_first')(ucnn_tensor) for i, (filters, kernel_size, prev_unet_tensor, prev_ucnn_tensor) in enumerate(deconv_zip): concat_unet = concatenate([prev_unet_tensor, unet_tensor], axis=1) deconv_unet = Conv3DTranspose(filters, kernel_size=kernel_size, activation='relu', data_format='channels_first') unet_tensor = Dropout(drop)(deconv_unet(concat_unet)) concat_ucnn = concatenate([prev_ucnn_tensor, ucnn_tensor], axis=1) deconv_ucnn = Conv3DTranspose(filters, kernel_size=kernel_size, activation='relu', data_format='channels_first') ucnn_tensor = Dropout(drop)(deconv_ucnn(concat_ucnn)) dense_unet = Conv3D(nlabels, kernel_size=(1, 1, 1), data_format='channels_first') unet_tensor = dense_unet(unet_tensor) unet_tensor = Reshape((nlabels, -1))(unet_tensor) unet_tensor = Permute((2, 1))(unet_tensor) dense_ucnn = Conv3D(nlabels, kernel_size=(1, 1, 1), data_format='channels_first') ucnn_tensor = dense_ucnn(ucnn_tensor) ucnn_tensor = Dense(nlabels)(Flatten()(ucnn_tensor)) unet_out = Activation('softmax', name='unet_seg')(unet_tensor) cnn_out = Activation('softmax', name='cnn_seg')(cnn_tensor) fcnn_out = Activation('softmax', name='fcnn_seg')(fcnn_tensor) ucnn_out = Activation('softmax', name='ucnn_seg')(ucnn_tensor) unet = Model(inputs=inputs, outputs=unet_out) unet.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy']) cnn = Model(inputs=inputs, outputs=cnn_out) cnn.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy']) fcnn = Model(inputs=inputs, outputs=fcnn_out) fcnn.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy']) ucnn = Model(inputs=inputs, outputs=ucnn_out) ucnn.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy']) # Instead of having the 4 independent networks trained independently, we also make them sahre wieghts # and update them with a global network. nets_outputs = [unet(inputs), cnn(inputs), fcnn(inputs), ucnn(inputs)] nets = Model(inputs=inputs, outputs=nets_outputs) nets.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy'], loss_weights=[2, 2, 2, 2]) return nets, unet, cnn, fcnn, ucnn
def memLstm_custom_model(hparams, context, context_mask, utterances, kb, kb_flag, kb_mask): print("context_shape: ", context._keras_shape) print("utterances_shape: ", utterances._keras_shape) print("context_mask: ", context_mask._keras_shape) print("kb_flag shape: ", kb_flag._keras_shape) # Use embedding matrix pretrained by Gensim embeddings_W = np.load(hparams.embedding_path) print("embeddings_W: ", embeddings_W.shape) ################################## Define Regular Layers ################################## # Utterances Embedding (Output shape: NUM_OPTIONS(100) x BATCH_SIZE(?) x LEN_SEQ(160) x EMBEDDING_DIM(300)) embedding_context_layer = Embedding( input_dim=hparams.vocab_size, output_dim=hparams.memn2n_embedding_dim, weights=[embeddings_W], input_length=hparams.max_context_len, mask_zero=True, trainable=False) embedding_utterance_layer = Embedding( input_dim=hparams.vocab_size, output_dim=hparams.memn2n_embedding_dim, weights=[embeddings_W], input_length=hparams.max_utterance_len, mask_zero=True, trainable=False) embedding_kb_layer = Embedding(input_dim=hparams.vocab_size, output_dim=hparams.memn2n_embedding_dim, weights=[embeddings_W], input_length=hparams.max_kb_len, mask_zero=True, trainable=False) # Define LSTM Context encoder 1 LSTM_A = LSTM(hparams.memn2n_rnn_dim, input_shape=(hparams.max_context_len, hparams.memn2n_embedding_dim), use_bias=True, unit_forget_bias=True, return_state=True, return_sequences=True) # Define LSTM Utterances encoder LSTM_B = LSTM(hparams.memn2n_rnn_dim, input_shape=(hparams.max_utterance_len, hparams.memn2n_embedding_dim), use_bias=True, unit_forget_bias=True, return_state=False, return_sequences=False) # Define LSTM KBs LSTM_K = LSTM(hparams.memn2n_rnn_dim, input_shape=(hparams.max_kb_len, hparams.memn2n_embedding_dim), use_bias=True, unit_forget_bias=True, return_state=False, return_sequences=False) # Define Dense layer to transform utterances Matrix_utterances = Dense( hparams.memn2n_rnn_dim, use_bias=False, kernel_initializer=keras.initializers.TruncatedNormal(mean=0.0, stddev=1.0, seed=None), input_shape=(hparams.memn2n_rnn_dim, )) Matrix_kb = Dense(hparams.memn2n_rnn_dim, use_bias=False, kernel_initializer=keras.initializers.TruncatedNormal( mean=0.0, stddev=1.0, seed=None), input_shape=(hparams.memn2n_rnn_dim, )) # Define Dense layer to do softmax Dense_2 = Dense(1, use_bias=False, kernel_initializer=keras.initializers.TruncatedNormal( mean=0.0, stddev=1.0, seed=None), input_shape=(hparams.memn2n_rnn_dim, )) Dense_3 = Dense(1, use_bias=False, kernel_initializer=keras.initializers.TruncatedNormal( mean=0.0, stddev=1.0, seed=None), input_shape=(2, )) ################################## Define Custom Layers ################################## # Define repeat element layer custom_repeat_layer = Lambda( lambda x: K.repeat_elements(x, hparams.max_context_len, 1)) custom_repeat_layer2 = Lambda(lambda x: K.repeat_elements( x, hparams.num_utterance_options + hparams.num_kb_options, 1)) # Expand dimension layer expand_dim_layer = Lambda(lambda x: K.expand_dims(x, axis=1)) # Amplify layer amplify_layer = Lambda(lambda x: x * hparams.amplify_val) # Define Softmax layer softmax_layer = Lambda(lambda x: K.softmax(Masking()(x), axis=-1)) softmax_layer2 = Lambda(lambda x: K.softmax(Masking()(x), axis=1)) # Define Stack & Concat layers Stack = Lambda(lambda x: K.stack(x, axis=1)) # Naming tensors kb_attention_layer = Lambda(lambda x: x, name='kb_attention') responses_attention_layer = Lambda(lambda x: x, name='responses_attention') context_attention_layer = Lambda(lambda x: x, name='context_attention') # Concat = Lambda(lambda x: K.concatenate(x, axis=1)) # Sum up last dimension Sum = Lambda(lambda x: K.sum(x, axis=-1)) Sum2 = Lambda(lambda x: K.sum(x, axis=1)) # Normalize layer Normalize = Lambda(lambda x: K.l2_normalize(x, axis=-1)) # Define tensor slice layer GetFirstHalfTensor = Lambda(lambda x: x[:, :, :hparams.memn2n_rnn_dim]) GetLastHalfTensor = Lambda(lambda x: x[:, :, hparams.memn2n_rnn_dim:]) GetFirstTensor = Lambda(lambda x: x[:, 0, :]) GetLastTensor = Lambda(lambda x: x[:, -1, :]) GetUtterancesTensor = Lambda( lambda x: x[:, :hparams.num_utterance_options, :]) GetKbTensor = Lambda(lambda x: x[:, hparams.num_utterance_options:, :]) GetReverseTensor = Lambda(lambda x: K.reverse(x, axes=1)) ################################## Apply layers ################################## # Prepare Masks utterances_mask = Reshape((1, hparams.max_context_len))(context_mask) utterances_mask = custom_repeat_layer2(utterances_mask) context_mask = Reshape((hparams.max_context_len, 1))(context_mask) kb_mask = Reshape((1, hparams.num_kb_options))(kb_mask) # Context Embedding: (BATCH_SIZE(?) x CONTEXT_LEN x EMBEDDING_DIM) context_embedded = embedding_context_layer(context) print("context_embedded: ", context_embedded._keras_shape) print("context_embedded (history): ", context_embedded._keras_history, '\n') # Skip this? # context_embedded = Concatenate(axis=-1)([context_embedded, context_speaker]) # Utterances Embedding: (BATCH_SIZE(?) x NUM_OPTIONS x UTTERANCE_LEN x EMBEDDING_DIM) utterances_embedded = TimeDistributed( embedding_utterance_layer, input_shape=(hparams.num_utterance_options, hparams.max_utterance_len))(utterances) print("Utterances_embedded: ", utterances_embedded._keras_shape) print("Utterances_embedded (history): ", utterances_embedded._keras_history, '\n') # KB embedding: (? x NUM_KB_OPTIONSS x MAX_KB_LEN x EMBEDING_DIM) kb_embedded = TimeDistributed(embedding_kb_layer, input_shape=(hparams.num_kb_options, hparams.max_kb_len))(kb) print("KB_embedded: ", kb_embedded._keras_shape) print("KB_embedded: (history)", kb_embedded._keras_history, '\n') # Encode context A: (BATCH_SIZE(?) x CONTEXT_LEN x RNN_DIM) all_context_encoded_Forward,\ all_context_encoded_Forward_h,\ all_context_encoded_Forward_c = LSTM_A(context_embedded) all_context_encoded_Backward,\ all_context_encoded_Backward_h,\ all_context_encoded_Backward_c = LSTM_A(Masking()(GetReverseTensor(context_embedded)))#, #initial_state=[all_context_encoded_Forward_h, all_context_encoded_Forward_c]) all_context_encoded_Backward = Masking()( GetReverseTensor(all_context_encoded_Backward)) # print("context_encoded_A: ", len(context_encoded_A)) print("all_context_encoded_Forward: ", all_context_encoded_Forward._keras_shape) print("all_context_encoded_Forward (history): ", all_context_encoded_Forward._keras_history) print("all_context_encoded_Backward: ", all_context_encoded_Backward._keras_shape) print("all_context_encoded_Backward (history): ", all_context_encoded_Backward._keras_history, '\n') all_context_encoded_Bidir_sum = Add()( [all_context_encoded_Forward, all_context_encoded_Backward]) # Encode utterances B: (BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM) all_utterances_encoded_B = TimeDistributed( LSTM_B, input_shape=(hparams.num_utterance_options, hparams.max_utterance_len, hparams.memn2n_embedding_dim))(utterances_embedded) all_utterances_encoded_B = TimeDistributed( Matrix_utterances, input_shape=(hparams.num_utterance_options, hparams.memn2n_rnn_dim))(all_utterances_encoded_B) print("all_utterances_encoded_B: ", all_utterances_encoded_B._keras_shape) print("all_utterances_encoded_B: (history)", all_utterances_encoded_B._keras_history, '\n') # Encode utterances B: (BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM) all_kb_encoded_K = TimeDistributed( LSTM_K, input_shape=(hparams.num_kb_options, hparams.max_kb_len, hparams.memn2n_embedding_dim))(kb_embedded) all_kb_encoded_K = TimeDistributed( Matrix_kb, input_shape=(hparams.num_kb_options, hparams.memn2n_rnn_dim))(all_kb_encoded_K) print("all_kb_encoded_K: ", all_kb_encoded_K._keras_shape) print("all_kb_encoded_K: (history)", all_kb_encoded_K._keras_history, '\n') # Stack all utterances and kb options: (? x (NUM_OPTIONS+NUM_KBs) x RNN_DIM) all_utterances_kb_encoded = Concatenate(axis=1)( [all_utterances_encoded_B, all_kb_encoded_K]) print("all_utterances_kb_encoded: ", all_utterances_kb_encoded._keras_shape) print("all_utterances_kb_encoded: (history)", all_utterances_kb_encoded._keras_history, '\n') responses_attention = [] kb_attention = [] for i in range(hparams.hops): print(str(i + 1) + 'th hop:') # 1st Attention & Weighted Sum # between Utterances_B(NUM_OPTIONS x RNN_DIM) and Contexts_encoded_Forward(CONTEXT_LEN x RNN_DIM) # and apply Softmax # (Output shape: BATCH_SIZE(?) x (NUM_OPTIONS + NUM_KB) x CONTEXT_LEN) attention_Forward = Dot(axes=[2, 2])([ all_utterances_kb_encoded, # all_context_encoded_Forward]) all_context_encoded_Bidir_sum ]) attention_Forward = amplify_layer(attention_Forward) attention_Forward = Add()([attention_Forward, utterances_mask]) attention_Forward = softmax_layer(attention_Forward) print("attention_Forward: ", attention_Forward._keras_shape) print("attention_Forward: (history)", attention_Forward._keras_history) # between Attention(NUM_OPTIONS x CONTEXT_LEN) and Contexts_A(CONTEXT_LEN x RNN_DIM) # equivalent to weighted sum of Contexts_A according to Attention # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM) weighted_sum_Forward = Dot(axes=[2, 1])([ attention_Forward, # all_context_encoded_Forward]) all_context_encoded_Bidir_sum ]) print("weighted_sum: ", weighted_sum_Forward._keras_shape) print("weighted_sum: (history)", weighted_sum_Forward._keras_history, '\n') # (Output shape: ? x NUM_OPTIONS(100) x RNN_DIM) all_utterances_kb_encoded = Add()( [weighted_sum_Forward, all_utterances_kb_encoded]) # 2nd Attention & Weighted Sum # between Utterances_B(NUM_OPTIONS x RNN_DIM) and Contexts_encoded_Backward(CONTEXT_LEN x RNN_DIM) # and apply Softmax # (Output shape: BATCH_SIZE(?) x (NUM_OPTIONS + NUM_KB) x CONTEXT_LEN) attention_Backward = Dot(axes=[2, 2])( [all_utterances_kb_encoded, all_context_encoded_Backward]) attention_Backward = amplify_layer(attention_Backward) attention_Backward = Add()([attention_Backward, utterances_mask]) attention_Backward = softmax_layer(attention_Backward) print("attention_Backward: ", attention_Backward._keras_shape) print("attention_Backward: (history)", attention_Backward._keras_history) # between Attention(NUM_OPTIONS x CONTEXT_LEN) and Contexts_A(CONTEXT_LEN x RNN_DIM) # equivalent to weighted sum of Contexts_A according to Attention # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM) weighted_sum_Backward = Dot(axes=[2, 1])( [attention_Backward, all_context_encoded_Backward]) print("weighted_sum_Backward: ", weighted_sum_Backward.shape) print("weighted_sum_Backward: (history)", weighted_sum_Backward._keras_history, '\n') # (Output shape: ? x NUM_OPTIONS(100) x RNN_DIM) all_utterances_kb_encoded = Add()( [weighted_sum_Backward, all_utterances_kb_encoded]) att_responses_Forward = expand_dim_layer( GetUtterancesTensor(attention_Forward)) att_responses_Backward = expand_dim_layer( GetUtterancesTensor(attention_Backward)) att_kb_Forward = expand_dim_layer(GetKbTensor(attention_Forward)) att_kb_Backward = expand_dim_layer(GetKbTensor(attention_Backward)) merge_responses = Concatenate(axis=1)( [att_responses_Forward, att_responses_Backward]) merge_kb = Concatenate(axis=1)([att_kb_Forward, att_kb_Backward]) responses_attention.append(merge_responses) kb_attention.append(merge_kb) print("repsonses_attention[i]:", merge_responses._keras_shape) print("repsonses_attention[i]: (history)", merge_responses._keras_history) print("kb_attention[i]:", merge_kb._keras_shape) print("kb_attention[i]: (history)", merge_kb._keras_history, '\n') if i < hparams.hops - 1: continue ''' temp = all_context_encoded_Forward all_context_encoded_Forward = all_context_encoded_Backward all_context_encoded_Backward = temp ''' else: print("hop ended") # split encoded utterances & kb all_utterances_encoded_B = GetUtterancesTensor( all_utterances_kb_encoded) all_kb_encoded_K = GetKbTensor(all_utterances_kb_encoded) print("all_utterances_encoded_B: ", all_utterances_encoded_B._keras_shape) print("all_utterances_encoded_B: (history)", all_utterances_encoded_B._keras_history, '\n') print("all_kb_encoded_K: ", all_utterances_encoded_B._keras_shape) print("all_kb_encoded_K: (history)", all_utterances_encoded_B._keras_history, '\n') ############# Attention to Context ############# # (Output shape: ? x MAX_CONTEXT_LEN x 1) attention_Forward_wrt_context =\ TimeDistributed(Dense_2, input_shape=(hparams.max_context_len, hparams.memn2n_rnn_dim))(all_context_encoded_Forward) attention_Forward_wrt_context = amplify_layer( attention_Forward_wrt_context) attention_Forward_wrt_context = Add()( [attention_Forward_wrt_context, context_mask]) attention_Forward_wrt_context = softmax_layer2( attention_Forward_wrt_context) print("attention_Forward_wrt_context: ", attention_Forward_wrt_context._keras_shape) print("attention_Forward_wrt_context: (history)", attention_Forward_wrt_context._keras_history) # (Output shape: ? x 1 x RNN_DIM) weighted_sum_Forward_wrt_context = Dot(axes=[1, 1])( [attention_Forward_wrt_context, all_context_encoded_Bidir_sum]) print("weighted_sum_Forward_wrt_context: ", weighted_sum_Forward_wrt_context._keras_shape) print("weighted_sum_Forward_wrt_context: (history)", weighted_sum_Forward_wrt_context._keras_history, '\n') # (Output shape: ? x MAX_CONTEXT_LEN x 1) attention_Backward_wrt_context =\ TimeDistributed(Dense_2, input_shape=(hparams.max_context_len, hparams.memn2n_rnn_dim))(all_context_encoded_Backward) attention_Backward_wrt_context = amplify_layer( attention_Backward_wrt_context) attention_Backward_wrt_context = Add()( [attention_Backward_wrt_context, context_mask]) attention_Backward_wrt_context = softmax_layer2( attention_Backward_wrt_context) print("attention_Backward_wrt_context: ", attention_Backward_wrt_context._keras_shape) print("attention_Backward_wrt_context: (history)", attention_Backward_wrt_context._keras_history) # (Output shape: ? x 1 x RNN_DIM) weighted_sum_Backward_wrt_context = Dot(axes=[1, 1])([ attention_Backward_wrt_context, all_context_encoded_Bidir_sum ]) print("weighted_sum_Backward_wrt_context: ", weighted_sum_Backward_wrt_context._keras_shape) print("weighted_sum_Backward_wrt_context: (history)", weighted_sum_Backward_wrt_context._keras_history, '\n') att_Forward_wrt_context = Reshape( (1, hparams.max_context_len))(attention_Forward_wrt_context) att_Backward_wrt_context = Reshape( (1, hparams.max_context_len))(attention_Backward_wrt_context) context_attention = Concatenate(axis=1)( [att_Forward_wrt_context, att_Backward_wrt_context]) context_encoded_AplusC = Add()([ weighted_sum_Forward_wrt_context, weighted_sum_Backward_wrt_context ]) # context_encoded_AplusC = Reshape((1,hparams.memn2n_rnn_dim))(context_encoded_AplusC) print("context_encoded_AplusC: ", context_encoded_AplusC.shape) print("context_encoded_AplusC: (history)", context_encoded_AplusC._keras_history, '\n') # (output shape: ? x 1 x NUM_KB_OPTIONS) kb_score = Dot(axes=[2, 2])( [context_encoded_AplusC, all_kb_encoded_K]) kb_score = amplify_layer(kb_score) kb_score = Add()([kb_score, kb_mask]) kb_score = softmax_layer(kb_score) print("kb_score: ", kb_score._keras_shape) print("kb_score: (history)", kb_score._keras_history) # (output shape: ? x 1 x RNN_DIM) kb_weighted_sum = Dot(axes=[2, 1])([kb_score, all_kb_encoded_K]) print("kb_weighted_sum: ", kb_weighted_sum._keras_shape) print("kb_weighted_sum: (history)", kb_weighted_sum._keras_history, '\n') ########## Normal Sum or Wighted Sum between context and external knowledge ########## ### Normal Sum ### # context_encoded_AplusCplusKB = Add()([context_encoded_AplusC, # kb_weighted_sum]) ### Weighted Sum ### context_encoded_AplusCplusKB = Concatenate(axis=1)( [context_encoded_AplusC, kb_weighted_sum]) context_encoded_AplusCplusKB = Permute( (2, 1), input_shape=( 2, hparams.memn2n_rnn_dim))(context_encoded_AplusCplusKB) print("context_encoded_AplusCplusKB: ", context_encoded_AplusCplusKB.shape) print("context_encoded_AplusCplusKB: (history)", context_encoded_AplusCplusKB._keras_history, '\n') context_encoded_AplusCplusKB = TimeDistributed( Dense_3, input_shape=(hparams.memn2n_rnn_dim, 2))(context_encoded_AplusCplusKB) context_encoded_AplusCplusKB = Permute( (2, 1), input_shape=(hparams.memn2n_rnn_dim, 1))(context_encoded_AplusCplusKB) print("context_encoded_AplusCplusKB: ", context_encoded_AplusCplusKB.shape) print("context_encoded_AplusCplusKB: (history)", context_encoded_AplusCplusKB._keras_history, '\n') # (Output shape: ? x 1 x NUM_OPTIONS(100)) logits = Dot(axes=[2, 2])( [context_encoded_AplusCplusKB, all_utterances_encoded_B]) logits = Reshape((hparams.num_utterance_options, ))(logits) print("logits: ", logits.shape) print("logits: (history)", logits._keras_history, '\n') # Softmax layer for probability of each of Dot products in previous layer # Softmaxing logits (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100)) probs = Activation('softmax', name='probs')(logits) print("probs: ", probs.shape) print("final History: ", probs._keras_history, '\n') # Return probabilities(likelihoods) of each of utterances # Those will be used to calculate the loss ('sparse_categorical_crossentropy') if hparams.hops == 1: responses_attention = expand_dim_layer(responses_attention[0]) kb_attention = expand_dim_layer(kb_attention[0]) else: responses_attention = Stack(responses_attention) kb_attention = Stack(kb_attention) context_attention = context_attention_layer(context_attention) responses_attention = responses_attention_layer(responses_attention) kb_attention = kb_attention_layer(kb_attention) print("context_attention:", context_attention._keras_shape) print("repsonses_attention:", responses_attention._keras_shape) print("kb_attention:", kb_attention._keras_shape) return probs, context_attention, responses_attention, kb_attention
merged = Merge([passage_net, question_net], mode='dot') # merged = Merge([passage_net, question_net], mode='cos') print("merged layer shape:", question_net.layers[-1].output_shape) model = Sequential() model.add(merged) # model.add(MyLayer(400)) # model.add(Reshape((1, 400, 400))) # model.add(Permute((0, 2, 1))) # model.add(MaxPooling2D(pool_size=(1, 25), border_mode='valid')) # model.add(AveragePooling2D(pool_size=(1, 4), border_mode='valid')) # model.add(Permute((0, 2, 1))) model.add(Permute((2, 1))) model.add(MaxPooling1D(pool_length=25, stride=None, border_mode='valid')) model.add(AveragePooling1D(pool_length=10, stride=None, border_mode='valid')) model.add(Permute((2, 1))) model.add(Flatten()) model.add(Dense(MAX_PASSAGE_LENGTH, activation='tanh')) #significantly improved accuracy # model.add(Dropout(.2)) model.add(Dense(MAX_PASSAGE_LENGTH, activation='softmax')) plot(model, to_file='model.png', show_shapes=True) # train a 1D convnet with global maxpooling model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])
def attention_3d_block(inputs): a = Permute((2, 1))(inputs) a = Dense(30, activation='softmax')(a) a_probs = Permute((2, 1))(a) output_attention_mul = Multiply()([inputs, a_probs]) return output_attention_mul
print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) y_ext = keras.utils.to_categorical(y_ext, num_classes) model = Sequential() model.add( Conv2D(16, kernel_size=(3, 3), activation='relu', strides=2, input_shape=input_shape)) model.add(Conv2D(32, (3, 3), strides=2, activation='relu')) model.add(Permute([1, 2, 3])) model.add(Reshape((6 * 6 * 32, ))) model.add(Dense(num_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adadelta(), metrics=['accuracy']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_data=(x_test, y_test)) model.fit(x_ext, y_ext,
# encode input sequence and questions (which are indices) # to sequences of dense vectors input_encoded_m = input_encoder_m(input_sequence) input_encoded_c = input_encoder_c(input_sequence) question_encoded = question_encoder(question) # compute a 'match' between the first input vector sequence # and the question vector sequence # shape: `(samples, story_maxlen, query_maxlen)` match = dot([input_encoded_m, question_encoded], axes=(2, 2)) match = Activation('softmax')(match) # add the match matrix with the second input vector sequence response = add([match, input_encoded_c]) # (samples, story_maxlen, query_maxlen) response = Permute((2, 1))(response) # (samples, query_maxlen, story_maxlen) # # concatenate the match matrix with the question vector sequence answer = concatenate([response, question_encoded]) # # # the original paper uses a matrix multiplication for this reduction step. # # # we choose to use a RNN instead. answer = LSTM(32)(answer) # (samples, 32) answer = Dropout(0.3)(answer) answer = Dense(vocab_size ,activation='softmax')(answer) # (samples, vocab_size) # we output a probability distribution over the vocabulary #answer = Activation('softmax')(answer) # one regularization layer -- more would probably be needed. # answer1 = Dropout(0.3)(answer) # answer1 = Dense(vocab_size)(answer1) # (samples, vocab_size) # # we output a probability distribution over the vocabulary
def fcn32_blank(image_size=512): withDO = False # no effect during evaluation but usefull for fine-tuning if True: mdl = Sequential() # First layer is a dummy-permutation = Identity to specify input shape mdl.add(Permute( (1, 2, 3), input_shape=(3, image_size, image_size))) # WARNING : axis 0 is the sample dim for l in convblock(64, 1, bits=2): mdl.add(l) for l in convblock(128, 2, bits=2): mdl.add(l) for l in convblock(256, 3, bits=3): mdl.add(l) for l in convblock(512, 4, bits=3): mdl.add(l) for l in convblock(512, 5, bits=3): mdl.add(l) mdl.add( Convolution2D(4096, 7, 7, border_mode='same', activation='relu', name='fc6')) # WARNING border if withDO: mdl.add(Dropout(0.5)) mdl.add( Convolution2D(4096, 1, 1, border_mode='same', activation='relu', name='fc7')) # WARNING border if withDO: mdl.add(Dropout(0.5)) # WARNING : model decapitation i.e. remove the classifier step of VGG16 (usually named fc8) mdl.add( Convolution2D( 21, 1, 1, border_mode= 'same', # WARNING : zero or same ? does not matter for 1x1 activation='relu', name='score_fr')) convsize = mdl.layers[-1].output_shape[2] deconv_output_size = (convsize - 1) * 2 + 4 # INFO: =34 when images are 512x512 mdl.add( Deconvolution2D( 21, 4, 4, output_shape=(None, 21, deconv_output_size, deconv_output_size), subsample=(2, 2), border_mode='valid', # WARNING : valid, same or full ? activation=None, name='score2')) extra_margin = deconv_output_size - convsize * 2 # INFO: =2 when images are 512x512 assert (extra_margin > 0) assert (extra_margin % 2 == 0) mdl.add( Cropping2D( cropping=((extra_margin / 2, extra_margin / 2), (extra_margin / 2, extra_margin / 2)))) # INFO : cropping as deconv gained pixels return mdl else: # See following link for a version based on Keras functional API : # gist.github.com/EncodeTS/6bbe8cb8bebad7a672f0d872561782d9 raise ValueError('not implemented')
def get_resnet_model(save_path, model_res=1024, image_size=256, depth=2, size=0, activation='elu', loss='logcosh', optimizer='adam'): # Build model if os.path.exists(save_path): print('Loading model') return load_model(save_path) print('Building model') model_scale = int(2*(math.log(model_res,2)-1)) # For example, 1024 -> 18 if size <= 0: from keras.applications.resnet50 import ResNet50 resnet = ResNet50(include_top=False, pooling=None, weights='imagenet', input_shape=(image_size, image_size, 3)) else: from keras_applications.resnet_v2 import ResNet50V2, ResNet101V2, ResNet152V2 if size == 1: resnet = ResNet50V2(include_top=False, pooling=None, weights='imagenet', input_shape=(image_size, image_size, 3), backend = keras.backend, layers = keras.layers, models = keras.models, utils = keras.utils) if size == 2: resnet = ResNet101V2(include_top=False, pooling=None, weights='imagenet', input_shape=(image_size, image_size, 3), backend = keras.backend, layers = keras.layers, models = keras.models, utils = keras.utils) if size >= 3: resnet = ResNet152V2(include_top=False, pooling=None, weights='imagenet', input_shape=(image_size, image_size, 3), backend = keras.backend, layers = keras.layers, models = keras.models, utils = keras.utils) layer_size = model_scale*8*8*8 if is_square(layer_size): # work out layer dimensions layer_l = int(math.sqrt(layer_size)+0.5) layer_r = layer_l else: layer_m = math.log(math.sqrt(layer_size),2) layer_l = 2**math.ceil(layer_m) layer_r = layer_size // layer_l layer_l = int(layer_l) layer_r = int(layer_r) x_init = None inp = Input(shape=(image_size, image_size, 3)) x = resnet(inp) if (depth < 0): depth = 1 if (size <= 1): if (size <= 0): x = Conv2D(model_scale*8, 1, activation=activation)(x) # scale down x = Reshape((layer_r, layer_l))(x) else: x = Conv2D(model_scale*8*4, 1, activation=activation)(x) # scale down a little x = Reshape((layer_r*2, layer_l*2))(x) else: if (size == 2): x = Conv2D(1024, 1, activation=activation)(x) # scale down a bit x = Reshape((256, 256))(x) else: x = Reshape((256, 512))(x) # all weights used while (depth > 0): # See https://github.com/OliverRichter/TreeConnect/blob/master/cifar.py - TreeConnect inspired layers instead of dense layers. x = LocallyConnected1D(layer_r, 1, activation=activation)(x) x = Permute((2, 1))(x) x = LocallyConnected1D(layer_l, 1, activation=activation)(x) x = Permute((2, 1))(x) if x_init is not None: x = Add()([x, x_init]) # add skip connection x_init = x depth-=1 x = Reshape((model_scale, 512))(x) # train against all dlatent values model = Model(inputs=inp,outputs=x) model.compile(loss=loss, metrics=[], optimizer=optimizer) # By default: adam optimizer, logcosh used for loss. return model
def model(is_training=True, img_shape=(32, 256, 1), num_classes=11, max_label_length=26): initializer = keras.initializers.he_normal() picture_height, picture_width, picture_channel = img_shape # CNN part vgg 7*conv inputs = Input(shape=(picture_height, picture_width, picture_channel), name='pic_inputs') # H×W×1 32*256*1 x = Conv2D(64, (3, 3), strides=(1, 1), padding="same", kernel_initializer=initializer, use_bias=True, name='conv2d_1')(inputs) # 32*256*64 x = BatchNormalization(name="BN_1")(x) x = Activation("relu", name="relu_1")(x) x = MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid', name='maxpl_1')(x) # 16*128*64 x = Conv2D(128, (3, 3), strides=(1, 1), padding="same", kernel_initializer=initializer, use_bias=True, name='conv2d_2')(x) # 16*128*128 x = BatchNormalization(name="BN_2")(x) x = Activation("relu", name="relu_2")(x) x = MaxPooling2D(pool_size=(2, 2), strides=2, padding='valid', name='maxpl_2')(x) # 8*64*128 x = Conv2D(256, (3, 3), strides=(1, 1), padding="same", kernel_initializer=initializer, use_bias=True, name='conv2d_3')(x) # 8*64*256 x = BatchNormalization(name="BN_3")(x) x = Activation("relu", name="relu_3")(x) x = Conv2D(256, (3, 3), strides=(1, 1), padding="same", kernel_initializer=initializer, use_bias=True, name='conv2d_4')(x) # 8*64*256 x = BatchNormalization(name="BN_4")(x) x = Activation("relu", name="relu_4")(x) x = MaxPooling2D(pool_size=(2, 1), strides=(2, 1), name='maxpl_3')(x) # 4*64*256 x = Conv2D(512, (3, 3), strides=(1, 1), padding="same", kernel_initializer=initializer, use_bias=True, name='conv2d_5')(x) # 4*64*512 x = BatchNormalization(axis=-1, name='BN_5')(x) x = Activation("relu", name='relu_5')(x) x = Conv2D(512, (3, 3), strides=(1, 1), padding="same", kernel_initializer=initializer, use_bias=True, name='conv2d_6')(x) # 4*64*512 x = BatchNormalization(axis=-1, name='BN_6')(x) x = Activation("relu", name='relu_6')(x) x = MaxPooling2D(pool_size=(2, 1), strides=(2, 1), name='maxpl_4')(x) # 2*64*512 x = Conv2D(512, (2, 2), strides=(1, 1), padding='same', activation='relu', kernel_initializer=initializer, use_bias=True, name='conv2d_7')(x) # 2*64*512 x = BatchNormalization(name="BN_7")(x) x = Activation("relu", name="relu_7")(x) conv_otput = MaxPooling2D(pool_size=(2, 1), name="conv_output")(x) # 1*64*512 # Map2Sequence part x = Permute((2, 3, 1), name='permute')(conv_otput) # 64*512*1 rnn_input = TimeDistributed(Flatten(), name='for_flatten_by_time')(x) # 64*512 # RNN part y = Bidirectional(LSTM(256, kernel_initializer=initializer, return_sequences=True), merge_mode='sum', name='LSTM_1')(rnn_input) # 64*512 y = BatchNormalization(name='BN_8')(y) y = Bidirectional(LSTM(256, kernel_initializer=initializer, return_sequences=True), name='LSTM_2')(y) # 64*512 # 尝试跳过rnn层 y_pred = Dense(num_classes, activation='softmax', name='y_pred')(y) # 64*11 这用来做evaluation 和 之后的test检测 # 在backend的实现ctc_loss的时候没有执行softmax操作所以这里必须要在使用softmax!!!! base_model = keras.models.Model(inputs=inputs, outputs=y_pred) print('BASE_MODEL: ') base_model.summary() # Transcription part (CTC_loss part) y_true = Input(shape=[max_label_length], name='y_true') y_pred_length = Input(shape=[1], name='y_pred_length') y_true_length = Input(shape=[1], name='y_true_length') ctc_loss_output = Lambda(ctc_loss_layer, output_shape=(1, ), name='ctc_loss_output')([ y_true, y_pred, y_pred_length, y_true_length ]) model = keras.models.Model( inputs=[y_true, inputs, y_pred_length, y_true_length], outputs=ctc_loss_output) print("FULL_MODEL: ") model.summary() if is_training: return model else: return base_model
############################################################################################################ ############################# CNN RESNET 50 #################################### ############################################################################################################ input1 = Input(shape=(1, 29, 112, 112)) pad1 = ZeroPadding3D((1, 3, 3))(input1) conv1 = Conv3D(64, (5, 7, 7), name="conv1", strides=(1, 2, 2), padding="valid")(pad1) B1 = BatchNormalization(axis=1)(conv1) act1 = Activation('relu')(B1) padm1 = ZeroPadding3D((0, 1, 1))(act1) m1 = MaxPooling3D((1, 3, 3), strides=(1, 2, 2))(padm1) perm1 = Permute(dims=(2, 1, 3, 4))(m1) Flat1 = Reshape((27, 64 * 28 * 28))(perm1) lin1 = TimeDistributed(Dense(384))(Flat1) B_lin1 = BatchNormalization(axis=-1)(lin1) act_lin1 = Activation('relu')(B_lin1) lin2 = TimeDistributed(Dense(384))(act_lin1) B_lin2 = BatchNormalization(axis=-1)(lin2) act_lin2 = Activation('relu')(B_lin2) lin3 = TimeDistributed(Dense(256))(act_lin2) B_lin3 = BatchNormalization(axis=-1)(lin3) act_lin3 = Activation('relu')(B_lin3) conv2 = Conv1D(512, 5, name="conv2", strides=2, padding="valid")(act_lin3)
d_input1 = Input(shape=(que_pad, )) d_input2 = Input(shape=(1, )) #,mask_zero=True con = concatenate([d_input1, d_input2], axis=1) d_emb = Embedding(num_words + 1, dim, input_length=(que_pad + 1))(con) t1 = Lambda(slice1)(d_emb) t2 = Lambda(slice2)(d_emb) activations = GRU(dim, return_sequences=True)(t1) attention_weight = TimeDistributed(Dense(1, activation='tanh'))(activations) attention_weight = Flatten()(attention_weight) attention_weight = Activation('softmax')(attention_weight) attention = RepeatVector(dim)(attention_weight) attention = Permute([2, 1])(attention) sent_representation = multiply([activations, attention]) sent_representation = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(dim, ))(sent_representation) activations2 = GRU(dim, return_sequences=True)(t2) attention_weight2 = TimeDistributed(Dense(1, activation='tanh'))(activations2) attention_weight2 = Flatten()(attention_weight2) attention_weight2 = Activation('softmax')(attention_weight2) attention2 = RepeatVector(dim)(attention_weight2) attention2 = Permute([2, 1])(attention2) sent_representation2 = multiply([activations2, attention2]) sent_representation2 = Lambda(lambda xin: K.sum(xin, axis=-2), output_shape=(dim, ))(sent_representation2) final_represent = concatenate([sent_representation, sent_representation2],