def __depthwise_conv_block(input, pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1), id=1): channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 pointwise_conv_filters = int(pointwise_conv_filters * alpha) x = DepthwiseConvolution2D(kernel_size=(3, 3), padding='same', depth_multiplier=depth_multiplier, strides=strides, use_bias=False, name='conv_dw_%d' % id)(input) x = BatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % id)(x) x = Activation(lambda x: relu(x, max_value=6), name='conv_dw_%d_relu' % id)(x) x = Convolution2D(pointwise_conv_filters, (1, 1), padding='same', use_bias=False, strides=(1, 1), name='conv_pw_%d' % id)(x) x = BatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % id)(x) x = Activation(lambda x: relu(x, max_value=6), name='conv_pw_%d_relu' % id)(x) return x
def call(self, inputs): x, y = inputs x =, self.x_weights) # batch, (pool_k*pool_o) y =, self.y_weights) # batch, (pool_k*pool_o) out = self.ewmultiply([x, y]) # batch, (pool_k*pool_o) out = self.reshape(out) # batch, pool_k, pool_o out = K.sum(out, axis=2) # batch, pool_o out = K.sqrt(relu(out)) - K.sqrt(relu(-out)) #Signed Square Root out = K.l2_normalize(out) # batch, pool_o return (out)
def step(self, x, states): ytm, stm = states # repeat the hidden state to the length of the sequence _stm = K.repeat(stm, self.timesteps) # now multiplty the weight matrix with the repeated hidden state _Wxstm =, self.W_a) # calculate the attention probabilities # this relates how much other timesteps contributed to this one. et = + self._uxpb), K.expand_dims(self.V_a)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.timesteps) at /= at_sum_repeated # vector of size (batchsize, timesteps, 1) # calculate the context vector context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1) # ~~~> calculate new hidden state # first calculate the "r" gate: rt = activations.sigmoid(, self.W_r) +, self.U_r) +, self.C_r) + self.b_r) # now calculate the "z" gate zt = activations.sigmoid(, self.W_z) +, self.U_z) +, self.C_z) + self.b_z) # calculate the proposal hidden state: s_tp = activations.relu(, self.W_p) + * stm), self.U_p) +, self.C_p) + self.b_p) # new hidden state: st = (1 - zt) * stm + zt * s_tp yt = activations.softmax(, self.W_o) +, self.U_o) +, self.C_o) + self.b_o) if self.return_probabilities: return at, [yt, st] else: return yt, [yt, st]
def model_land_mark_detection_inception(input_shape): x_train_input = Input(input_shape) x = Conv2D(64, (7, 7), strides=(2, 2), data_format='channels_last')(x_train_input) x = Activation(activation=lambda x: relu(x, max_value=96))(x) x = MaxPooling2D(pool_size=3, strides=1, data_format='channels_last')(x) x = Conv2D(192, (3, 3), strides=(1, 1), data_format='channels_last')(x) x = Activation(activation=lambda x: relu(x, max_value=96))(x) x = MaxPooling2D(pool_size=3, strides=1, data_format='channels_last')(x) dict_c = {'1x1': 64, '3x3_reduce': 96, '3x3': 128, '5x5_reduce': 16, '5x5': 32, 'pool_proj': 32} x = inception_block(x, name='a', channels=dict_c) dict_c = {'1x1': 128, '3x3_reduce': 128, '3x3': 192, '5x5_reduce': 32, '5x5': 96, 'pool_proj': 64} x = inception_block(x, name='b', channels=dict_c) x = MaxPooling2D(pool_size=3, strides=2, data_format='channels_last')(x) dict_c = {'1x1': 192, '3x3_reduce': 96, '3x3': 208, '5x5_reduce': 16, '5x5': 48, 'pool_proj': 64} x = inception_block(x, name='c', channels=dict_c) dict_c = {'1x1': 160, '3x3_reduce': 112, '3x3': 224, '5x5_reduce': 24, '5x5': 64, 'pool_proj': 64} x = inception_block(x, name='d', channels=dict_c) dict_c = {'1x1': 128, '3x3_reduce': 128, '3x3': 256, '5x5_reduce': 24, '5x5': 64, 'pool_proj': 64} x = inception_block(x, name='e', channels=dict_c) dict_c = {'1x1': 112, '3x3_reduce': 144, '3x3': 288, '5x5_reduce': 32, '5x5': 64, 'pool_proj': 64} x = inception_block(x, name='f', channels=dict_c) dict_c = {'1x1': 256, '3x3_reduce': 160, '3x3': 320, '5x5_reduce': 32, '5x5': 128, 'pool_proj': 128} x = inception_block(x, name='g', channels=dict_c) x = MaxPooling2D(pool_size=3, strides=2, data_format='channels_last')(x) dict_c = {'1x1': 256, '3x3_reduce': 160, '3x3': 320, '5x5_reduce': 32, '5x5': 128, 'pool_proj': 128} x = inception_block(x, name='h', channels=dict_c) dict_c = {'1x1': 384, '3x3_reduce': 192, '3x3': 384, '5x5_reduce': 48, '5x5': 128, 'pool_proj': 128} x = inception_block(x, name='r', channels=dict_c) x = AveragePooling2D(pool_size=7, strides=1, data_format='channels_last')(x) x = Flatten()(x) x = Dense(1024, activation=lambda x: relu(x, max_value=96))(x) x = Dropout(0.4)(x) x = Dense(30, activation=lambda x: relu(x, max_value=96))(x) model = Model(inputs=x_train_input, outputs=x) return model
def identity_block_2D(self, X, filters, kernel_size, stage, block): """ Args: X: Input data/tensor. filters: List of 3 ints defining number of filters in each Conv2d layer. kernel_size: Int defining the kernel_size of the middle Conv2d layer. stage: Name of stage of blocks in the total network (a descriptor). block: Name of block within stage (a descriptor). """ X_shortcut = X F1, F2, F3 = filters ks = (kernel_size, kernel_size) conv_name = 'Conv2D_Stage_' + str(stage) + '_Block_' + str(block) BN_name = 'BN2D_Stage_' + str(stage) + '_Block_' + str(block) # first block X = Conv2D(filters=F1, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_initializer='glorot_uniform', name=conv_name + '_a')(X) X = BatchNormalization(axis=-1, name=BN_name + '_a')(X) X = relu(X) # middle block X = Conv2D(filters=F2, kernel_size=ks, strides=(1, 1), padding='same', kernel_initializer='glorot_uniform', name=conv_name + '_b')(X) X = BatchNormalization(axis=-1, name=BN_name + '_b')(X) X = relu(X) # last block X = Conv2D(filters=F3, kernel_size=(1, 1), strides=(1, 1), padding='valid', kernel_initializer='glorot_uniform', name=conv_name + '_c')(X) X = BatchNormalization(axis=-1, name=BN_name + '_c')(X) X = Add()([X, X_shortcut]) X = relu(X) return X
def global_context_block(x): """ 高维矩阵乘法[n,1,c,hw]*[n,1,hw,1]=[n,1,c,1] GC_block:global context block :parameter x:input layers or tensor """ bs, h, w, c = x.get_shape().as_list() input_x = x input_x = Reshape((-1, c))(input_x) # [N, H*W, C] input_x = tf.transpose(input_x, perm=[0, 2, 1]) # [N,C,H*W] input_x = tf.expand_dims(input_x, axis=1) context_mask = Conv2D(filters=1, kernel_size=(1, 1))(x) context_mask = Reshape((-1, 1))(context_mask) context_mask = softmax(context_mask, axis=1) # [N, H*W, 1] context_mask = tf.transpose(context_mask, [0, 2, 1]) context_mask = tf.expand_dims(context_mask, axis=-1) context = tf.matmul(input_x, context_mask) # [N,1,c,1] context = Reshape((1, 1, c))(context) context_transform = Conv2D(int(c / 8), (1, 1))(context) context_transform = LayerNormalization()(context_transform) context_transform = relu(context_transform) context_transform = Conv2D(c, (1, 1))(context_transform) x = x + context_transform return x
def test_relu(): x = K.placeholder(ndim=2) f = K.function([x], [activations.relu(x)]) test_values = get_standard_values() result = f([test_values])[0] assert_allclose(result, test_values, rtol=1e-05)
def gcnet_layer(inputs): x = inputs bs, h, w, c = x.get_shape().as_list() input_x = x input_x = Reshape((-1, c))(input_x) # [N, H*W, C] input_x = Lambda(transpose)(input_x) # [N,C,H*W] input_x = Lambda(expand_dims1)(input_x) context_mask = Conv2D(filters=1, kernel_size=(1, 1))(x) context_mask = Reshape((-1, 1))(context_mask) context_mask = softmax(context_mask, axis=1) # [N, H*W, 1] context_mask = Lambda(transpose)(context_mask) context_mask = Lambda(expand_dims2)(context_mask) context = Lambda(matmul)([input_x, context_mask]) # [N,1,c,1] context = Reshape((1, 1, c))(context) context_transform = Conv2D(int(c / 8), (1, 1))(context) context_transform = LayerNormalization()(context_transform) context_transform = relu(context_transform) context_transform = Conv2D(c, (1, 1))(context_transform) x = add([x, context_transform]) return x
def call(self, x): assert isinstance(x, list) signal = x[0] if self.tree_spacing > 0: signal = pool_input_kd(x[0], self.tree_spacing, pool_mode='max') patches_idx = x[1] conv_kernel = x[2] # y = sh_invar_conv(signal, x[1], x[2], self.kernel_weights, self.l_max) patches = tf.gather_nd(signal, patches_idx) y = tf.einsum('bvprn,bvpc->bvcrn', conv_kernel, patches) y = tf.einsum('ijrn,bvjrn->bvi', self.kernel_weights, y) # K.bias_add(y, self.biases) y = tf.nn.bias_add(y, self.biases) if self.with_relu: y = activations.relu(y) if self.max_pool > 0: y = pool_input_kd(y, self.max_pool, pool_mode='max') if self.keep_num_points and self.strides > 0: y = kd_tree_upsample(y, self.strides + self.max_pool) return y
def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id, skip_connection, rate=1): in_channels = inputs._keras_shape[-1] pointwise_conv_filters = int(filters * alpha) pointwise_filters = _make_divisible(pointwise_conv_filters, 8) x = inputs prefix = 'expanded_conv_{}_'.format(block_id) if block_id: # Expand x = Conv2D(expansion * in_channels, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'expand')(x) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')(x) x = Activation(relu6, name=prefix + 'expand_relu')(x) else: prefix = 'expanded_conv_' # Depthwise x = DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same', dilation_rate=(rate, rate), name=prefix + 'depthwise')(x) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise_BN')(x) # x = Activation(relu(x, max_value=6.), name=prefix + 'depthwise_relu')(x) x = Lambda(lambda x: relu(x, max_value=6.), name=prefix + 'depthwise_relu')(x) x = Conv2D(pointwise_filters, kernel_size=1, padding='same', use_bias=False, activation=None, name=prefix + 'project')(x) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x) if skip_connection: return Add(name=prefix + 'add')([inputs, x]) # if in_channels == pointwise_filters and stride == 1: # return Add(name='res_connect_' + str(block_id))([inputs, x]) return x
def call(self, x, mask=None): h = Activation.relu( x, self.W)) # size (batch, seq_len, phone_len, h_dim) h =, self.A) # size (batch, seq_len, phone_len, num_heads) att = K.permute_dimensions(h, (0, 1, 3, 2)) att = self.softmax(att) att = K.permute_dimensions(att, (0, 1, 3, 2)) return att
def _create_critic_model(self, model_name, hidden, lr): state_input = Input(shape=[self.__state_size], name='state_inputs') action_input = Input( shape=[self.__action_size + self.__action_param_size], name='action_inputs') input_layer = concatenate([state_input, action_input], name='input_layer') layer_size = hidden[0] layer = Dense(layer_size, activation='linear', name='dense' + str(layer_size) + '_layer')(input_layer) layer = Activation(lambda x: relu(x, alpha=0.01), name='activation_' + str(layer_size))(layer) layers = iter(hidden) next(layers) for layer_size in layers: layer = Dense(layer_size, activation='linear', name='dense' + str(layer_size) + '_layer')(layer) layer = Activation(lambda x: relu(x, alpha=0.01), name='activation_' + str(layer_size))(layer) output_layer = Dense(1, activation='linear', name='q_values_layer')(layer) model = Model(inputs=[state_input, action_input], outputs=output_layer, name=model_name) #model = multi_gpu_model(model, gpus=3) model.compile(loss='mse', optimizer=Adam(lr=lr, beta_1=FLAGS.momentum, beta_2=FLAGS.momentum2, clipnorm=FLAGS.clip_grad, decay=0.0, epsilon=0.00000001)) logging.debug(model_name + ' model:') logging.debug(model.summary()) return model, state_input, action_input
def f(x): x = Lambda(relu(x)) convLayer = Conv1D( filter_count, kernel_size, kernel_initializer=RandomNormal(mean=0.0, stddev=0.001), kernel_regularizer=keras.regularizers.l2(l2_reg_convo))(x) return convLayer
def test_relu(): x = K.placeholder(ndim=2) f = K.function([x], [activations.relu(x)]) test_values = get_standard_values() result = f([test_values])[0] assert_allclose(result, test_values, rtol=1e-05) # Test max_value test_values = np.array([[0.5, 1.5]], dtype=K.floatx()) f = K.function([x], [activations.relu(x, max_value=1.)]) result = f([test_values])[0] assert np.max(result) <= 1. # Test max_value == 6. test_values = np.array([[0.5, 6.]], dtype=K.floatx()) f = K.function([x], [activations.relu(x, max_value=1.)]) result = f([test_values])[0] assert np.max(result) <= 6.
def testRelu(): # testing rectified linear units x = K.placeholder(ndim=2) f = K.function([x], [activations.relu(x)]) testValues = getStdValues() result = f([testValues])[0] assert_allclose(result, testValues, rtol=1e-05) # testing max_value testValues = np.array([[0.5, 1.5]], dtype=K.floatx()) f = K.function([x], [activations.relu(x, max_value=1.)]) result = f([testValues])[0] assert np.max(result) <= 1. # testing max_value == 6. testValues = np.array([[0.5, 6.]], dtype=K.floatx()) f = K.function([x], [activations.relu(x, max_value=1.)]) result = f([testValues])[0] assert np.max(result) <= 6.
def inverted_res_block(self, inputs, expansion, stride, alpha, filters, block_id, skip_connection, rate=1, eps=1e-3): in_channels = inputs._keras_shape[-1] pointwise_conv_filters = int(filters * alpha) pointwise_filters = make_divisible(pointwise_conv_filters, 8) x = inputs if block_id: # Expand x = Conv2D(expansion * in_channels, kernel_size=1, padding='same', use_bias=False, activation=None)(x) x = BatchNormalization(epsilon=1e-3, momentum=0.999)(x) x = Lambda(lambda x: relu(x, max_value=6.))(x) # Depthwise x = DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same', dilation_rate=(rate, rate))(x) x = BatchNormalization(epsilon=1e-3, momentum=0.999)(x) x = Lambda(lambda x: relu(x, max_value=6.))(x) x = Conv2D(pointwise_filters, kernel_size=1, padding='same', use_bias=False, activation=None)(x) x = BatchNormalization(epsilon=1e-3, momentum=0.999)(x) if skip_connection: return Add()([inputs, x]) return x
def call(self, x): assert isinstance(x, list) g, x = x x_ = Conv2D(self.output_dim, (1, 1))(x) x_ = MaxPooling2D((2, 2))(x_) g_ = Conv2D(self.output_dim, (1, 1))(g) g_ = relu(x_ + g_) g_ = Conv2D(1, (1, 1), activation='sigmoid')(g_) a = UpSampling2D((2, 2), interpolation='bilinear')(g_) return a
def call(self, input, mask=None): prev_values = input[0] prev_prev_values = input[1] current_values = input[2] s_input = K.concatenate( [prev_values, prev_prev_values, current_values], axis=1) s = relu(, self.W_S1) + self.b_S1) policy = K.exp(, self.W_S2) + self.b_S2) return policy
def call(self, inputs): X = inputs[0] # Node features (N x F) A = inputs[1] # Adjacency matrix (N x N) outputs = [] for head in range(self.attn_heads): kernel = self.kernels[head] # W in the paper (F x F') attention_kernel = self.attn_kernels[ head] # Attention kernel a in the paper (2F' x 1) # Compute inputs to attention network features =, kernel) # (N x F') # Compute feature combinations # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_2]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j] attn_for_self = features, attention_kernel[0]) # (N x 1), [a_1]^T [Wh_i] attn_for_neighs = features, attention_kernel[1]) # (N x 1), [a_2]^T [Wh_j] # Attention head a(Wh_i, Wh_j) = a^T[[Wh_i], [Wh_j]] dense = attn_for_self + K.transpose( attn_for_neighs) # (N x N) via broadcasting # Add nonlinearty dense = activations.relu(dense, alpha=0.2) # Mask values before activation (Vaswani et al., 2017) mask = -10e9 * (1.0 - A) dense += mask # Apply softmax to get attention coefficients dense = K.softmax(dense) # (N x N) # Apply dropout to features and attention coefficients dropout_attn = Dropout(self.dropout_rate)(dense) # (N x N) dropout_feat = Dropout(self.dropout_rate)(features) # (N x F') # Linear combination with neighbors' features node_features =, dropout_feat) # (N x F') if self.use_bias: node_features = K.bias_add(node_features, self.biases[head]) # Add output of attention head to final output outputs.append(node_features) # Aggregate the heads' output according to the reduction method if self.attn_heads_reduction == 'concat': output = K.concatenate(outputs) # (N x KF') else: output = K.mean(K.stack(outputs), axis=0) # (N x F') output = self.activation(output) return output
def call(self, x): output =, self.kernel) if self.use_bias: output = K.bias_add(output, self.bias, data_format="channels_last") if self.activation == 'relu': output = activations.relu(output) elif self.activation == 'sigmoid': output = activations.sigmoid(output) return output
def test_relu(self): x = backend.placeholder(ndim=2) f = backend.function([x], [activations.relu(x)]) positive_values = np.random.random((2, 5)) result = f([positive_values])[0] self.assertAllClose(result, positive_values, rtol=1e-05) negative_values = np.random.uniform(-1, 0, (2, 5)) result = f([negative_values])[0] expected = np.zeros((2, 5)) self.assertAllClose(result, expected, rtol=1e-05)
def get_initial_state(self, inputs): # apply the matrix on the first time step to get the initial s0. s0 = activations.relu([:, 0], self.W_s)) #relu # from keras.layers.recurrent to initialize a vector of (batchsize, # output_dim) y0 = K.zeros_like(inputs) # (samples, timesteps, input_dims) y0 = K.sum(y0, axis=(1, 2)) # (samples, ) y0 = K.expand_dims(y0) # (samples, 1) y0 = K.tile(y0, [1, self.output_dim]) return [y0, s0]
def build_residual_bn_api(num_channels, inception_input, inception_num): conv1 = Conv1D(num_channels, kernel_size=2, padding='same', name=f'res_{inception_num}_x1_conv1d') bn1 = BatchNormalization() conv2 = Conv1D(num_channels, kernel_size=2, padding='same', name=f'res_{inception_num}_x2_conv1d') bn2 = BatchNormalization() x2 = conv1(inception_input) x2 = bn1(x2) x2 = activations.relu(x2) x2 = conv2(x2) x2 = bn2(x2) conv3 = layers.Conv1D(num_channels, kernel_size=1, name=f'res_{inception_num}_x3_conv1d') x3 = conv3(inception_input) return activations.relu(x2 + x3)
def calc_action(self, values): # Вычисляем действие, которое надо выполнить s_input = K.concatenate([ values['stack_current'][:, self.hidden_dim:], values['stack_prev'][:, self.hidden_dim:], values['input_current'][:, self.hidden_dim:] ], axis=1) s = relu(, self.W_S1) + self.b_S1) policy = K.exp(, self.W_S2) + self.b_S2) action = TS.switch(TS.le(policy[:, 0], policy[:, 1]), 1, 0) return action, policy
def mae_mse_combined_loss(y_true, y_pred): y_true_myo = relu(y_true - 1.0 / 3.0) + 1.0 / 3.0 y_pred_myo = relu(y_pred - 1.0 / 3.0) + 1.0 / 3.0 y_true_myi = relu(y_true - 2.0 / 3.0) + 2.0 / 3.0 y_pred_myi = relu(y_pred - 2.0 / 3.0) + 2.0 / 3.0 # myo_error = mean_squared_error(y_true_myo,y_pred_myo) # myi_error = mean_absolute_error(y_true_myi,y_pred_myi) loss_types = loss_type.split('+') if loss_types[0] == 'mse': loss1 = mean_squared_error(y_true_myo, y_pred_myo) elif loss_types[0] == 'mae': loss1 = mean_absolute_error(y_true_myo, y_pred_myo) if loss_types[1] == 'mse': loss2 = mean_squared_error(y_true_myi, y_pred_myi) elif loss_types[1] == 'mae': loss2 = mean_absolute_error(y_true_myi, y_pred_myi) return (loss1 + loss2 * infarction_weight) / restrict_chn
def build_model(): model = models.Sequential() model.add( LSTM(100, activation=activations.relu(), input_shape=(timeSteps, 1))) model.add(Dropout(1)) #model.add(layers.Dense(20, activation='relu')) model.add(layers.Dense(1, activation=activations.linear())) print("made it2") model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.001), loss=tf.keras.losses.mse, metrics=['acc']) return model
def test_relu(): ''' Relu implementation doesn't depend on the value being a theano variable. Testing ints, floats and theano tensors. ''' x = K.placeholder(ndim=2) f = K.function([x], [activations.relu(x)]) test_values = get_standard_values() result = f([test_values])[0] # because no negatives in test values assert_allclose(result, test_values, rtol=1e-05)
def custom_layer_model_stacked_B(input_shape, n_filters, use_bias): # test my Keras layer X_input = Input(shape=input_shape, name='input_layer') E2E1 = EdgeToEdge(n_filters[0], use_bias[0])(X_input) E2E2 = relu(E2E1) E2E2 = EdgeToEdge(n_filters[1], use_bias[1])(E2E1) E2N = EdgeToNode(n_filters[2], use_bias[2])(E2E2) N2G = NodeToGraph(n_filters[3], use_bias[3])(E2N) model = Model(inputs = X_input, outputs= N2G) #model = Model(inputs = X_input, outputs= E2E2) return model
def get_unet(arg1, trainable=True): inputs = Input(shape=(None, None, 3)) conv1 = Conv2D(3, (1, 1), kernel_initializer='random_normal', activation='relu', trainable=trainable)(inputs) conv2 = Conv2D(3, (3, 3), kernel_initializer='random_normal', activation='relu', padding='same', trainable=trainable)(conv1) concat1 = concatenate([conv1, conv2], axis=-1) conv3 = Conv2D(3, (5, 5), activation='relu', kernel_initializer='truncated_normal', padding='same', trainable=trainable)(concat1) concat2 = concatenate([conv2, conv3], axis=-1) conv4 = Conv2D(3, (7, 7), activation='relu', kernel_initializer='random_normal', padding='same', trainable=trainable)(concat2) concat3 = concatenate([conv1, conv2, conv3, conv4], axis=-1) K = Conv2D(3, (3, 3), activation='relu', kernel_initializer='truncated_normal', padding='same', trainable=True)(concat3) print(inputs.shape, K.shape) product = keras.layers.Multiply()([K, inputs]) sum1 = keras.layers.Subtract()([product, K]) sum2 = Lambda(lambda x: 1 + x)(sum1) #sum2 = keras.layers.Add()([sum1, ones_tensor]) out_layer = Lambda(lambda x: relu(x))(sum2) ##out_layer = relu(sum2)# if arg1 == 1: model = Model(inputs=inputs, outputs=out_layer) else: model = Model(inputs=inputs, outputs=conv1) return model
def __conv_block(input, filters, alpha, kernel=(3, 3), strides=(1, 1)): channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 filters = int(filters * alpha) x = Convolution2D(filters, kernel, padding='same', use_bias=False, strides=strides, name='conv1')(input) x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x) x = Activation(lambda x: relu(x, max_value=6), name='conv1_relu')(x) return x
def clipped_relu(x): return relu(x, max_value=20)
def _add_model(self): nodes = self.nodes config = self.model_config['PHM'] p = config['dropout_p'] mlp_l2 = config['l2'] D = config['mlp_output_dim'] activation = lambda x: relu(x, alpha=config['leaky_alpha']) # SENTENCE LEVEL # answer plus question nodes['question_encoding_repeated'] = RepeatVector(self.answer_size)(nodes['question_encoding']) nodes['answer_plus_question'] = merge([nodes['answer_encoding'], nodes['question_encoding_repeated']], mode='sum') # story mlp and dropout ninputs, noutputs = ['story_encoding1'], ['story_encoding_mlp'] for ngram in config['ngram_inputs']: ninputs.append('story_encoding1_%sgram' % ngram) noutputs.append('story_encoding_mlp_%sgram' % ngram) story_encoding_mlp = NTimeDistributed(Dense(D, init="identity", activation=activation, W_regularizer=l2(mlp_l2), trainable=config['trainable_story_encoding_mlp'])) for input, output in zip(ninputs, noutputs): nodes[output] = story_encoding_mlp(self._get_node(input)) qa_encoding_mlp = NTimeDistributed(Dense(D, init="identity", activation=activation, W_regularizer=l2(mlp_l2), trainable=config['trainable_answer_plus_question_mlp'])) nodes['answer_plus_question_mlp'] = qa_encoding_mlp(nodes['answer_plus_question']) nodes['story_encoding_mlp_dropout'] = Dropout(p)(nodes['story_encoding_mlp']) nodes['answer_plus_question_mlp_dropout'] = Dropout(p)(nodes['answer_plus_question_mlp']) # norm unit_layer = UnitNormalization() nodes['story_encoding_mlp_dropout_norm'] = unit_layer(nodes['story_encoding_mlp_dropout']) nodes['answer_plus_question_norm'] = unit_layer(nodes['answer_plus_question_mlp_dropout']) # cosine nodes['story_dot_answer'] = merge([nodes['story_encoding_mlp_dropout_norm'], nodes['answer_plus_question_norm']], mode='dot', dot_axes=[2, 2]) # WORD LEVEL # story mlps for word score and distance score trainable_word_mlp = self.model_config['PHM']['trainable_word_mlp'] if trainable_word_mlp: story_word_dense = NTimeDistributed( Dense(D, init="identity", activation=activation, W_regularizer=l2(mlp_l2), trainable=trainable_word_mlp), first_n=3) # q mlps for word and distance scores q_or_a_word_dense = NTimeDistributed( Dense(D, init="identity", activation=activation, W_regularizer=l2(mlp_l2), trainable=trainable_word_mlp), first_n=3) else: linear_activation = Activation('linear') story_word_dense = linear_activation q_or_a_word_dense = linear_activation ninputs, noutputs = [], [] tpls = [(True, 'story_word_embedding1', 'story_word_mlp'), ('use_slide_window_inside_sentence', 'reordered_story_word_embedding', 'reordered_story_word_mlp'), ('use_slide_window_word', 'story_attentive_word_embedding', 'story_attentive_word_embedding_mlp'), ('use_slide_window_reordered_word', 'reordered_story_attentive_word_embedding', 'reordered_story_attentive_word_embedding_mlp') ] for tpl in tpls: a, b, c = tpl if a is True or config[a]: ninputs.append(b) noutputs.append(c) if b in ['reordered_story_word_embedding', 'story_word_embedding1']: for ngram in config['ngram_inputs']: ninputs.append('%s_%sgram' % (b, ngram)) noutputs.append('%s_%sgram' % (c, ngram)) for input, output in zip(ninputs, noutputs): nodes[output] = story_word_dense(self._get_node(input)) inputs = ['question_word_embedding', 'answer_word_embedding', 'qa_word_embedding'] outputs = ['question_word_mlp', 'answer_word_mlp', 'qa_word_mlp'] for input, output in zip(inputs, outputs): nodes[output] = q_or_a_word_dense(self._get_node(input)) # SIMILARITY MATRICES # first for word scores # cosine similarity matrix based on sentence and q nodes['sim_matrix_q'] = WordByWordMatrix(is_q=True)([nodes['story_word_mlp'], nodes['question_word_mlp']]) # cosine similarity matrix based on sentence and a nodes['sim_matrix_a'] = WordByWordMatrix()([nodes['story_word_mlp'], nodes['answer_word_mlp']]) # WORD-BY-WORD SCORES # q nodes['s_q_wbw_score'] = WordByWordScores(trainable=False, is_q=True, alpha=1., threshold=0.15, wordbyword_merge_type=config['wordbyword_merge_type'], )([nodes['sim_matrix_q'], nodes['__w_question_wbw']]) # a nodes['s_a_wbw_score'] = WordByWordScores(trainable=False, alpha=1., threshold=0.15, wordbyword_merge_type=config['wordbyword_merge_type'], )( [nodes['sim_matrix_a'], nodes['__w_answer_wbw']]) # mean nodes['story_dot_answer_words'] = GeneralizedMean(mean_type=config['mean_type'], trainable=config['trainable_story_dot_answer_words']) \ ([nodes['s_q_wbw_score'], nodes['s_a_wbw_score']]) # SLIDING WINDOW INSIDE SENTENCE if config['use_slide_window_inside_sentence']: # q+a mlp for word score # construct cosine similarity matrix based on sentence and qa, for word score _inputs = [nodes['reordered_story_word_mlp'], nodes['qa_word_mlp']] nodes['wordbyword_slide_sum_within_sentence'] = \ WordByWordSlideSumInsideSentence(len(_inputs), window_size=config['window_size_word_inside'], alpha=config['alpha_slide_window_word_inside'], use_gaussian_window=config['use_gaussian_window_word_inside'], gaussian_std=config['gaussian_sd_word_inside'], trainable=config['trainable_slide_window_word_inside'])(_inputs) # COMBINE LEVELS # sum word-based and sentence-based similarity scores inputs = ['story_dot_answer_words', 'story_dot_answer'] if config['use_slide_window_sentence']: inputs.append('story_dot_answer_slide') nodes["story_dot_answer_slide"] = SlideSum(alpha=config['alpha_slide_window'], use_gaussian_window=config['use_gaussian_window'], trainable=config['trainable_slide_window'])( nodes['story_dot_answer']) if config['use_slide_window_inside_sentence']: inputs.append('wordbyword_slide_sum_within_sentence') if self.model_config['PHM']['use_depend_score']: # SENTENCE-QA DEPENDENCY LEVEL inputs.append('lcc_score_matrix') nodes['lcc_score_matrix'] = DependencyDistanceScore(config['alpha_depend_score'])( self._get_node('input_dep')) # sum scores from different component of the model on sentence level. # sentence level score merge layers_s_input = [nodes[x] for x in inputs] weights_s = [1.] * len(layers_s_input) nodes['word_plus_sent_sim'] = Combination(len(layers_s_input), input_dim=3, weights=weights_s, combination_type=config['sentence_ensemble'], trainable=config['trainable_sentence_ensemble'])(layers_s_input) # extract max over sentences nodes['story_dot_answer_max'] = TimeDistributedMerge(mode='max', axis=1)(nodes['word_plus_sent_sim']) # word sliding window word_sliding_window_output = ['story_dot_answer_max'] if config['use_slide_window_word']: # q+a mlp for word score # construct cosine similarity matrix based on sentence and qa, for word score temp_inputs = [nodes['story_attentive_word_embedding_mlp'], nodes['qa_word_mlp']] if config['use_qa_idf']: temp_inputs.append(nodes['__w_question_answer']) nodes['wordbyword_slide_sum'] = WordByWordSlideSum(len(temp_inputs), window_size=config['window_size_word'], alpha=config['alpha_slide_window_word'], use_gaussian_window=config['use_gaussian_window_word'], gaussian_std=config['gaussian_sd_word'], trainable=config['trainable_slide_window_word'])( temp_inputs) word_sliding_window_output.append('wordbyword_slide_sum') if config['use_slide_window_reordered_word']: # q+a mlp for word score # construct cosine similarity matrix based on sentence and qa, for word score temp_inputs = [nodes['reordered_story_attentive_word_embedding_mlp'], nodes['qa_word_mlp']] if config['use_qa_idf']: temp_inputs.append(nodes['__w_question_answer']) nodes['reordered_wordbyword_slide_sum'] = WordByWordSlideSum(len(temp_inputs), window_size=config[ 'window_size_reordered_word'], alpha=config[ 'alpha_slide_window_reordered_word'], use_gaussian_window=config[ 'use_gaussian_window_reordered_word'], gaussian_std=config[ 'gaussian_sd_reordered_word'], trainable=config[ 'trainable_slide_window_reordered_word'])( temp_inputs ) word_sliding_window_output.append('reordered_wordbyword_slide_sum') # Extract top_n sentence for each answer if config['top_n_wordbyword']: layers_name = ['word_plus_sent_sim', 'story_word_embedding1', 'qa_word_embedding', '__w_question_answer'] layers = [nodes[x] for x in layers_name] top_n_name = 'top_n_wordbyword' nodes[top_n_name] = TopNWordByWord(top_n=config['top_n'], nodes=nodes, use_sum=config['top_n_use_sum'], trainable=True)(layers) word_sliding_window_output.append(top_n_name) ngram_output = [self._add_ngram_network(ngram, story_encoding_mlp) for ngram in config['ngram_inputs']] # final score merge layers_input = [nodes[x] for x in word_sliding_window_output + ngram_output] weights = [1.] * len(layers_input) for i in range(len(ngram_output)): weights[-i - 1] = 1. """ # also aggregate scores that were already aggregated on sentence level. sentence_level_weight = 0.1 for layer_name in sentence_level_merge_layers: layer_max = layer_name + "_max" if layer_max not in nodes: add_node(TimeDistributedMergeEnhanced(mode='max'), layer_max, input=layer_name) layers_input.append(nodes[layer_max]) weights.append(sentence_level_weight)""" nodes['story_dot_answer_combined_max'] = Combination(len(layers_input), weights=weights, combination_type=config['answer_ensemble'], trainable=config['trainable_answer_ensemble'])( layers_input) # apply not-switch input_mul = self._get_node('input_negation_questions') nodes['story_dot_answer_max_switch'] = merge([nodes['story_dot_answer_combined_max'], input_mul], mode='mul') activation_final = Activation('linear', name='y_hat') \ if self.model_config['optimizer']['loss'] == 'ranking_loss' else Activation( 'softmax', name='y_hat') prediction = activation_final(nodes['story_dot_answer_max_switch']) inputs = self.inputs_nodes.values() model = Model(input=inputs, output=prediction) optimizer = self._get_optimizer() model.compile(loss=self._get_loss_dict(), optimizer=optimizer, metrics={'y_hat': 'accuracy'}) self.graph = model