def call(self, x): a = K.reshape(K.softmax(K.sum(K.dot(x, self.kernel),axis=-1)), (-1,15,1)) #a = K.softmax(K.sum(K.dot(x, self.kernel),axis=-1)) return a
def call(self, x, mask=None): weights = self.feedforward(x) weights = K.squeeze(weights, axis=-1) weights = K.softmax(weights) return K.batch_dot(x, weights, axes=1)
def attention(inputs, attention_size, time_major=False, return_alphas=False): """ Attention mechanism layer which reduces RNN/Bi-RNN outputs with Attention vector. The idea was proposed in the article by Z. Yang et al., "Hierarchical Attention Networks for Document Classification", 2016: http://www.aclweb.org/anthology/N16-1174. Variables notation is also inherited from the article Args: inputs: The Attention inputs. Matches outputs of RNN/Bi-RNN layer (not final state): In case of RNN, this must be RNN outputs `Tensor`: If time_major == False (default), this must be a tensor of shape: `[batch_size, max_time, cell.output_size]`. If time_major == True, this must be a tensor of shape: `[max_time, batch_size, cell.output_size]`. In case of Bidirectional RNN, this must be a tuple (outputs_fw, outputs_bw) containing the forward and the backward RNN outputs `Tensor`. If time_major == False (default), outputs_fw is a `Tensor` shaped: `[batch_size, max_time, cell_fw.output_size]` and outputs_bw is a `Tensor` shaped: `[batch_size, max_time, cell_bw.output_size]`. If time_major == True, outputs_fw is a `Tensor` shaped: `[max_time, batch_size, cell_fw.output_size]` and outputs_bw is a `Tensor` shaped: `[max_time, batch_size, cell_bw.output_size]`. attention_size: Linear size of the Attention weights. time_major: The shape format of the `inputs` Tensors. If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using `time_major = True` is a bit more efficient because it avoids transposes at the beginning and end of the RNN calculation. However, most TensorFlow data is batch-major, so by default this function accepts input and emits output in batch-major form. return_alphas: Whether to return attention coefficients variable along with layer's output. Used for visualization purpose. Returns: The Attention output `Tensor`. In case of RNN, this will be a `Tensor` shaped: `[batch_size, cell.output_size]`. In case of Bidirectional RNN, this will be a `Tensor` shaped: `[batch_size, cell_fw.output_size + cell_bw.output_size]`. """ if isinstance(inputs, tuple): # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. inputs = tf.concat(inputs, 2) if time_major: # (T,B,D) => (B,T,D) inputs = tf.array_ops.transpose(inputs, [1, 0, 2]) hidden_size = inputs.shape[ 2].value # D value - hidden size of the RNN layer # Trainable parameters w_omega = tf.Variable( tf.random_normal([hidden_size, attention_size], stddev=0.1)) b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) with tf.name_scope('v'): # Applying fully connected layer with non-linear activation to each of the B*T timestamps; # the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size v = tf.tanh(tf.tensordot(inputs, w_omega, axes=1) + b_omega) # For each of the timestamps its vector of size A from `v` is reduced with `u` vector vu = tf.tensordot(v, u_omega, axes=1, name='vu') # (B,T) shape alphas = K.softmax(vu) # (B,T) shape # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape output = K.sum(inputs * tf.expand_dims(alphas, -1), 1) if not return_alphas: return output else: return output, alphas
rk = K.placeholder(len(r)) rfk = K.dot(K.constant(matmap), K.reshape(rk, (-1, 1))) rffk = K.reshape(rfk, (-1, 1)) v = K.reshape(rfk, (-1, 1)) gamma = 0.90 beta = 10.0 for _ in range(50): q0 = K.dot(K.constant(mattrans[0]), v) q1 = K.dot(K.constant(mattrans[1]), v) q2 = K.dot(K.constant(mattrans[2]), v) q3 = K.dot(K.constant(mattrans[3]), v) q4 = K.dot(K.constant(mattrans[4]), v) Q = K.concatenate([q0, q1, q2, q3, q4]) pi = K.softmax(beta * Q) v = rffk + gamma * K.reshape(K.sum(Q * pi, axis=1), (-1, 1)) planner = K.function([rk], [pi, Q]) r = np.array([0, -1, -1, -1, 10]) piout, Qout = planner([r]) def findpol(grid, pi, r, c): if grid[r][c] != 6: return maxprob = max(pi[r * ncols + c, :]) a = 6 for ana in range(5): if pi[r * ncols + c, ana] == maxprob: a = ana grid[r][c] = a
def compute_loss(self, inputs): y_true, y_pred = inputs loss = K.categorical_crossentropy(y_true, K.softmax(y_pred)) return K.mean(loss)
def get_target_ranks(num_users=200, num_words=5000, mask=False, user_data_ratio=0., save_probs=False): user_src_texts, user_trg_texts, test_user_src_texts, test_user_trg_texts, src_vocabs, trg_vocabs \ = load_cornell_movie_by_user(num_users, num_words, test_on_user=True, user_data_ratio=user_data_ratio) train_users = sorted(user_src_texts.keys()) test_users = sorted(test_user_src_texts.keys()) save_dir = OUTPUT_PATH + 'target_{}{}/'.format( num_users, '_dr' if 0. < user_data_ratio < 1. else '') if not os.path.exists(save_dir): os.mkdir(save_dir) model_path = 'cornell_movie_dialog' if 0. < user_data_ratio < 1.: model_path += '_dr{}'.format(user_data_ratio) heldout_src_texts, heldout_trg_texts = load_train_users_heldout_data( train_users, src_vocabs, trg_vocabs) for u in train_users: user_src_texts[u] += heldout_src_texts[u] user_trg_texts[u] += heldout_trg_texts[u] model = build_dialogue_model(Vs=num_words, Vt=num_words, mask=mask, drop_p=0.) model.load_weights(MODEL_PATH + '{}_{}.h5'.format(model_path, num_users)) src_input_var, trg_input_var = model.inputs prediction = model.output trg_label_var = K.placeholder((None, None), dtype='float32') prediction = K.softmax(prediction) prob_fn = K.function( [src_input_var, trg_input_var, trg_label_var, K.learning_phase()], [prediction]) save_users_rank_results(users=train_users, save_probs=save_probs, user_src_texts=user_src_texts, user_trg_texts=user_trg_texts, src_vocabs=src_vocabs, trg_vocabs=trg_vocabs, cross_domain=False, prob_fn=prob_fn, save_dir=save_dir, member_label=1) save_users_rank_results(users=test_users, save_probs=save_probs, user_src_texts=test_user_src_texts, user_trg_texts=test_user_trg_texts, src_vocabs=src_vocabs, trg_vocabs=trg_vocabs, cross_domain=False, prob_fn=prob_fn, save_dir=save_dir, member_label=0)
model = Model(inputs=inp1, outputs=[x1, x2]) model.compile(optimizer=opt, loss=losses, loss_weights=lossWeights, metrics=["accuracy", "mse"]) history = model.fit(x_train, { "recon": x_train, "classacc": y_train }, validation_data=(x_test, { "recon": x_test, "classacc": y_test }), epochs=num_epochs, verbose=1) probabilities = K.get_value(K.softmax( model.get_layer('tinyLayerE').logits)) dl = np.zeros(model.get_layer('tinyLayerE').logits.shape) p = K.get_value(model.get_layer('tinyLayerE').logits) for j in range(dl.shape[0]): ind = np.argmax(p, axis=None) x = ind // dl.shape[1] y = ind % dl.shape[1] dl[x][y] = 1 p[x] = -np.ones(dl.shape[1]) p[:, y] = -np.ones(dl.shape[0]) indices = K.get_value(K.argmax(dl)) hist_df = pd.DataFrame(history.history) hist_csv_file = rd + ds + "_" + str(nfeat) + "_" + str(ii) + "_history.csv" with open(hist_csv_file, mode='w') as f:
def __call__(self, tensor): return K.softmax(tensor / self.temperature)
def identity_loss_v3(y_true, y_pred): y_true_reshaped = K.mean(K.reshape(y_true, (-1, select, 30)), axis=1) y_pred_reshaped = K.softmax(K.mean(K.reshape(y_pred, (-1, select, 30)), axis=1)) final_val = K.mean(K.categorical_crossentropy(y_pred_reshaped, y_true_reshaped)) return final_val + y_pred * 0
def calculate_attention_weight(confidences, att_preds): softmaxes = [] for x, y in zip(confidences, att_preds): softmaxes.append(Lambda(lambda x: K.softmax(x[0] * x[1]))([x, y])) return Add()(softmaxes)
def call(self, x): return K.softmax(K.dot(x, self.W_s) + self.b_s)
def temperature_softmax(x): return K.softmax(x / T)
def main(): # 加载MNIST数据集 (x_train, y_train_), (x_test, y_test_) = mnist.load_data() image_size = x_train.shape[1] x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 # 网络参数 input_shape = (image_size, image_size, 1) batch_size = 100 kernel_size = 3 filters = 16 num_latents = 32 classes_per_latent = 10 # 这里假设隐变量是num_latents维、classes_per_latent元随机变量 epochs = 30 x_in = Input(shape=input_shape) x = x_in for i in range(2): filters *= 2 x = Conv2D(filters=filters, kernel_size=kernel_size, activation='relu', strides=2, padding='same')(x) # 备份当前shape,等下构建decoder的时候要用 shape = K.int_shape(x) x = Flatten()(x) x = Dense(32, activation='relu')(x) logits = Dense(num_latents * classes_per_latent)(x) logits = Reshape((num_latents, classes_per_latent))(logits) class GumbelSoftmax(Layer): """Gumbel Softmax重参数 """ def __init__(self, tau=1., **kwargs): super(GumbelSoftmax, self).__init__(**kwargs) self.tau = K.variable(tau) def call(self, inputs): # epsilon = K.random_uniform(shape=K.shape(inputs)) # epsilon = - K.log(epsilon + K.epsilon()) # epsilon = - K.log(epsilon + K.epsilon()) # outputs = inputs + epsilon # outputs = K.softmax(outputs / self.tau, -1) outputs = K.softmax(inputs, -1) return outputs gumbel_softmax = GumbelSoftmax() z_sample = gumbel_softmax(logits) # 解码层,也就是生成器部分 # 先搭建为一个独立的模型,然后再调用模型 latent_inputs = Input(shape=(num_latents, classes_per_latent)) x = Reshape((num_latents * classes_per_latent, ))(latent_inputs) x = Dense(32, activation='relu')(x) x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(x) x = Reshape((shape[1], shape[2], shape[3]))(x) for i in range(2): x = Conv2DTranspose(filters=filters, kernel_size=kernel_size, activation='relu', strides=2, padding='same')(x) filters //= 2 outputs = Conv2DTranspose(filters=1, kernel_size=kernel_size, activation='sigmoid', padding='same')(x) # 搭建为一个独立的模型 decoder = Model(latent_inputs, outputs) x_out = decoder(z_sample) # 建立模型 vae = Model(x_in, x_out) # xent_loss是重构loss,kl_loss是KL loss xent_loss = K.sum(K.binary_crossentropy(x_in, x_out), axis=[1, 2, 3]) p = K.clip(K.softmax(logits, -1), K.epsilon(), 1 - K.epsilon()) # 假设先验分布为均匀分布,那么kl项简化为负熵 kl_loss = K.sum(p * K.log(p), axis=[1, 2]) vae_loss = K.mean(xent_loss + kl_loss) # add_loss是新增的方法,用于更灵活地添加各种loss vae.add_loss(vae_loss) vae.compile(optimizer='rmsprop') vae.summary() class Trainer(Callback): def __init__(self): self.max_tau = 1. self.min_tau = 0.01 self._tau = self.max_tau - self.min_tau def on_batch_begin(self, batch, logs=None): tau = self.min_tau + self._tau K.set_value(gumbel_softmax.tau, tau) self._tau *= 0.999 def on_epoch_begin(self, epoch, logs=None): tau = K.eval(gumbel_softmax.tau) print('epoch: %s, tau: %.5f' % (epoch + 1, tau)) trainer = Trainer() vae.fit(x_train, shuffle=True, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None), callbacks=[trainer]) # 观察隐变量的两个维度变化是如何影响输出结果的 n = 15 # figure with 15x15 digits digit_size = 28 figure = np.zeros((digit_size * n, digit_size * n)) for i in range(n): for j in range(n): z_sample = np.zeros((1, num_latents, classes_per_latent)) for iz in range(num_latents): jz = np.random.choice(classes_per_latent) z_sample[0, iz, jz] = 1 x_decoded = decoder.predict(z_sample) digit = x_decoded[0].reshape(digit_size, digit_size) figure[i * digit_size:(i + 1) * digit_size, j * digit_size:(j + 1) * digit_size] = digit plt.figure(figsize=(10, 10)) plt.imshow(figure, cmap='Greys_r') plt.show()
contextEmbd = Embedding(output_dim=EMBEDDING_DIM, input_dim=vocab_size, weights=[embedding_matrix], input_length=context_maxlen, trainable=False)(context_input) #mask_zero=True, Q = Bidirectional(GRU(128, return_sequences=True))(questionEmbd) D = Bidirectional(GRU(128, return_sequences=True))(contextEmbd) Q1 = Bidirectional(GRU(160, return_sequences=False))(Q) Qh1 = RepeatVector(context_maxlen)(Q1) DQ = merge([Qh1, D], mode='concat', name='merge1') D1 = SimpleAttention2(320, 320, return_sequences=True)(DQ) output1 = TimeDistributed(Dense(1, activation='sigmoid'))(D1) # batchsize, len, 1 output1reshap = Reshape((context_maxlen,))(output1) answerPtrBegin_output = Lambda(lambda x: K.softmax(x))(output1reshap) # batchsize, len D1merge = merge([D1, RepeatVector(context_maxlen)(answerPtrBegin_output)], \ mode='concat', name='merge2') output2 = TimeDistributed(Dense(1, activation='sigmoid'))(D1merge) output2reshape = Reshape((context_maxlen,))(output2) answerPtrEnd_output = Lambda(lambda x: K.softmax(x))(output2reshape) model = Model(input=[context_input, question_input], output=[answerPtrBegin_output, answerPtrEnd_output]) rms = optimizers.RMSprop(lr=0.0001) model.compile(optimizer=rms, loss='categorical_crossentropy', loss_weights=[.04, 0.04], metrics=['accuracy']) model.summary() # checkpoint
def call(self, inputs): self.V = K.reshape(self.V, (-1, 1)) H = K.tanh(K.dot(inputs, self.W) + self.b) score = K.softmax(K.dot(H, self.V), axis=1) outputs = K.sum(score * inputs, axis=1) return outputs
def gradient_descent(self, sess, models): def compare(outputs, labels): y = np.argmax(labels) pred = np.argmax(outputs) if self.TARGETED: return (pred == y) else: return (pred != y) shape = (1, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS) # the variable to optimize over modifier = tf.Variable(np.zeros(shape, dtype=np.float32)) tau = tf.placeholder(tf.float32, []) simg = tf.placeholder(tf.float32, shape) timg = tf.placeholder(tf.float32, shape) tlab = tf.placeholder(tf.float32, (1, FLAGS.NUM_CLASSES)) const = tf.placeholder(tf.float32, []) newimg = tf.clip_by_value(simg + modifier, 0, 1) model = models[0] outputs = [] preds = [] output = model(newimg) outputs.append(output) preds.append(K.softmax(output)) orig_output = model(timg) real = tf.reduce_sum((tlab) * output) other = tf.reduce_max((1 - tlab) * output - (tlab * 10000)) if self.TARGETED: # if targeted, optimize for making the other class most likely loss1 = tf.maximum(0.0, other - real + self.CONFIDENCE) else: # if untargeted, optimize for making this class least likely. loss1 = tf.maximum(0.0, real - other + self.CONFIDENCE) if len(models) >= 1: for i in range(1, len(models)): model = models[i] output_tmp = model(newimg) outputs.append(output_tmp) preds.append(K.softmax(output_tmp)) real = tf.reduce_sum((tlab) * output_tmp) other = tf.reduce_max((1 - tlab) * output_tmp - (tlab * 10000)) if self.TARGETED: # if targetted, optimize for making the other class most likely loss1 += tf.maximum(0.0, other - real + self.CONFIDENCE) else: # if untargeted, optimize for making this class least likely. loss1 += tf.maximum(0.0, real - other + self.CONFIDENCE) # sum up the losses loss2 = tf.reduce_sum(tf.maximum(0.0, tf.abs(newimg - timg) - tau)) loss = const * loss1 + loss2 # setup the adam optimizer and keep track of variables we're creating start_vars = set(x.name for x in tf.global_variables()) optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE) #optimizer = tf.train.GradientDescentOptimizer(self.LEARNING_RATE) train = optimizer.minimize(loss, var_list=[modifier]) end_vars = tf.global_variables() new_vars = [x for x in end_vars if x.name not in start_vars] init = tf.variables_initializer(var_list=[modifier] + new_vars) def doit(oimgs, labs, starts, tt, CONST): prev_scores = None imgs = np.array(oimgs) starts = np.array(starts) # initialize the variables sess.run(init) while CONST < self.LARGEST_CONST: # try solving for each value of the constant # print('try const', CONST) for step in range(self.MAX_ITERATIONS): feed_dict = { timg: imgs, tlab: labs, tau: tt, simg: starts, const: CONST, K.learning_phase(): 0 } # # if step % (self.MAX_ITERATIONS//10) == 0: # print(step, sess.run((loss,loss1,loss2),feed_dict=feed_dict)) # perform the update step _, works, linf = sess.run([train, loss, loss2], feed_dict=feed_dict) # print(works, linf) # it worked if works < .0001 * CONST and (self.ABORT_EARLY or step == CONST - 1): works = True for i in len(outputs): get = sess.run(preds[i], feed_dict=feed_dict) works = works & compare(get, labs) # get = sess.run(K.softmax(output), feed_dict=feed_dict) # works = compare(get, labs) if works: scores, origscores, nimg = sess.run( (output, orig_output, newimg), feed_dict=feed_dict) return scores, origscores, nimg, CONST # we didn't succeed, increase constant and try again if linf >= 0.1 * self.EPS: # perturbation is too large if prev_scores is None: return prev_scores return prev_scores, prev_origscores, prev_nimg, CONST else: # didn't reach target confidence CONST *= self.const_factor prev_scores, prev_origscores, prev_nimg = sess.run( (output, orig_output, newimg), feed_dict=feed_dict) scores, origscores, nimg = sess.run((output, orig_output, newimg), feed_dict=feed_dict) return scores, origscores, nimg, CONST return doit
def get_shadow_ranks(exp_id=0, num_users=200, num_words=5000, mask=False, cross_domain=False, rnn_fn='lstm', h=128, emb_h=128, rerun=False): shadow_user_path = 'shadow_users{}_{}_{}_{}.npz'.format( exp_id, rnn_fn, num_users, 'cd' if cross_domain else '') shadow_train_users = np.load(MODEL_PATH + shadow_user_path)['arr_0'] shadow_train_users = list(shadow_train_users) print shadow_user_path, shadow_train_users save_dir = OUTPUT_PATH + 'shadow_exp{}_{}/'.format(exp_id, num_users) if not os.path.exists(save_dir): os.mkdir(save_dir) if cross_domain: user_src_texts, user_trg_texts, test_user_src_texts, test_user_trg_texts, src_vocabs, trg_vocabs \ = load_cross_domain_shadow_user_data(shadow_train_users, num_users, num_words) else: user_src_texts, user_trg_texts, test_user_src_texts, test_user_trg_texts, src_vocabs, trg_vocabs \ = load_shadow_user_data(shadow_train_users, num_users, num_words) shadow_test_users = sorted(test_user_src_texts.keys()) model_path = '{}_shadow_exp{}_{}_{}.h5'.format( 'ubuntu_dialog' if cross_domain else 'cornell_movie_dialog', exp_id, rnn_fn, num_users) model = build_dialogue_model(Vs=num_words, Vt=num_words, mask=mask, drop_p=0., h=h, demb=emb_h, rnn_fn=rnn_fn) model.load_weights(MODEL_PATH + model_path) src_input_var, trg_input_var = model.inputs prediction = model.output trg_label_var = K.placeholder((None, None), dtype='float32') prediction = K.softmax(prediction) prob_fn = K.function( [src_input_var, trg_input_var, trg_label_var, K.learning_phase()], [prediction]) save_users_rank_results(users=shadow_train_users, rerun=rerun, user_src_texts=user_src_texts, user_trg_texts=user_trg_texts, src_vocabs=src_vocabs, trg_vocabs=trg_vocabs, cross_domain=cross_domain, prob_fn=prob_fn, save_dir=save_dir, member_label=1) save_users_rank_results(users=shadow_test_users, rerun=rerun, user_src_texts=test_user_src_texts, user_trg_texts=test_user_trg_texts, src_vocabs=src_vocabs, trg_vocabs=trg_vocabs, cross_domain=cross_domain, prob_fn=prob_fn, save_dir=save_dir, member_label=0)
def softmax_by_string(t): sh = K.shape(t) string_sm = [] for i in range(NUM_STRINGS): string_sm.append(K.expand_dims(K.softmax(t[:, i, :]), axis=1)) return K.concatenate(string_sm, axis=1)
def memLstm_custom_model(hparams, context, context_mask, utterances): print("context_shape: ", context._keras_shape) print("utterances_shape: ", utterances._keras_shape) print("context_mask: ", context_mask._keras_shape) # Use embedding matrix pretrained by Gensim embeddings_W = np.load(hparams.embedding_path) print("embeddings_W: ", embeddings_W.shape) ################################## Define Regular Layers ################################## # Utterances Embedding (Output shape: NUM_OPTIONS(100) x BATCH_SIZE(?) x LEN_SEQ(160) x EMBEDDING_DIM(300)) embedding_context_layer = Embedding( input_dim=hparams.vocab_size, output_dim=hparams.memn2n_embedding_dim, weights=[embeddings_W], input_length=hparams.max_context_len, mask_zero=True, trainable=False) embedding_utterance_layer = Embedding( input_dim=hparams.vocab_size, output_dim=hparams.memn2n_embedding_dim, weights=[embeddings_W], input_length=hparams.max_utterance_len, mask_zero=True, trainable=False) # Define LSTM Context encoder 1 LSTM_A = LSTM(hparams.memn2n_rnn_dim, input_shape=(hparams.max_context_len, hparams.memn2n_embedding_dim + 2), use_bias=True, unit_forget_bias=True, return_state=True, return_sequences=True) # Define LSTM Utterances encoder LSTM_B = LSTM(hparams.memn2n_rnn_dim, input_shape=(hparams.max_utterance_len, hparams.memn2n_embedding_dim), use_bias=True, unit_forget_bias=True, return_state=False, return_sequences=False) ''' # Define LSTM Context encoder 2 LSTM_C = LSTM(hparams.memn2n_rnn_dim, input_shape=(hparams.max_context_len, hparams.memn2n_embedding_dim+2), unit_forget_bias=True, return_state=False, return_sequences=True) ''' # Define Dense layer to transform utterances Dense_1 = Dense(hparams.memn2n_rnn_dim, use_bias=False, kernel_initializer=keras.initializers.TruncatedNormal( mean=0.0, stddev=1.0, seed=None), input_shape=(hparams.memn2n_rnn_dim, )) # Define Dense layer to do softmax Dense_2 = Dense(1, use_bias=False, kernel_initializer=keras.initializers.TruncatedNormal( mean=0.0, stddev=1.0, seed=None), input_shape=(hparams.memn2n_rnn_dim, )) ################################## Define Custom Layers ################################## # Define repeat element layer custom_repeat_layer = Lambda( lambda x: K.repeat_elements(x, hparams.max_context_len, 1)) custom_repeat_layer2 = Lambda( lambda x: K.repeat_elements(x, hparams.num_utterance_options, 1)) # Expand dimension layer expand_dim_layer = Lambda(lambda x: K.expand_dims(x, axis=1)) # Amplify layer amplify_layer = Lambda(lambda x: x * hparams.amplify_val) # Define Softmax layer softmax_layer = Lambda(lambda x: K.softmax(Masking()(x), axis=-1)) softmax_layer2 = Lambda(lambda x: K.softmax(Masking()(x), axis=1)) # Define Stack & Concat layers Stack = Lambda(lambda x: K.stack(x, axis=1)) # Naming tensors responses_dot_layer = Lambda(lambda x: x, name='responses_dot') responses_attention_layer = Lambda(lambda x: x, name='responses_attention') context_attention_layer = Lambda(lambda x: x, name='context_attention') # Concat = Lambda(lambda x: K.concatenate(x, axis=1)) # Sum up last dimension Sum = Lambda(lambda x: K.sum(x, axis=-1)) Sum2 = Lambda(lambda x: K.sum(x, axis=1)) # Normalize layer Normalize = Lambda(lambda x: K.l2_normalize(x, axis=-1)) # Define tensor slice layer GetFirstHalfTensor = Lambda(lambda x: x[:, :, :hparams.memn2n_rnn_dim]) GetFirstTensor = Lambda(lambda x: x[:, 0, :]) GetLastHalfTensor = Lambda(lambda x: x[:, :, hparams.memn2n_rnn_dim:]) GetLastTensor = Lambda(lambda x: x[:, -1, :]) GetReverseTensor = Lambda(lambda x: K.reverse(x, axes=1)) ################################## Apply layers ################################## # Prepare Masks utterances_mask = Reshape((1, hparams.max_context_len))(context_mask) utterances_mask = custom_repeat_layer2(utterances_mask) context_mask = Reshape((hparams.max_context_len, 1))(context_mask) # Context Embedding: (BATCH_SIZE(?) x CONTEXT_LEN x EMBEDDING_DIM) context_embedded = embedding_context_layer(context) print("context_embedded: ", context_embedded.shape) print("context_embedded (history): ", context_embedded._keras_history, '\n') # Skip this? # context_embedded = Concatenate(axis=-1)([context_embedded, context_speaker]) # Utterances Embedding: (BATCH_SIZE(?) x NUM_OPTIONS x UTTERANCE_LEN x EMBEDDING_DIM) utterances_embedded = TimeDistributed( embedding_utterance_layer, input_shape=(hparams.num_utterance_options, hparams.max_utterance_len))(utterances) print("Utterances_embedded: ", utterances_embedded.shape) print("Utterances_embedded (history): ", utterances_embedded._keras_history, '\n') # Encode context A: (BATCH_SIZE(?) x CONTEXT_LEN x RNN_DIM) all_context_encoded_Forward,\ all_context_encoded_Forward_h,\ all_context_encoded_Forward_c = LSTM_A(context_embedded) all_context_encoded_Backward,\ all_context_encoded_Backward_h,\ all_context_encoded_Backward_c = LSTM_A(Masking()(GetReverseTensor(context_embedded)))#, #initial_state=[all_context_encoded_Forward_h, all_context_encoded_Forward_c]) all_context_encoded_Backward = Masking()( GetReverseTensor(all_context_encoded_Backward)) # print("context_encoded_A: ", len(context_encoded_A)) print("all_context_encoded_Forward: ", all_context_encoded_Forward.shape) print("all_context_encoded_Forward (history): ", all_context_encoded_Forward._keras_history) print("all_context_encoded_Backward: ", all_context_encoded_Backward.shape) print("all_context_encoded_Backward (history): ", all_context_encoded_Backward._keras_history, '\n') # Define bi-directional all_context_encoded_Bidir = Add()( [all_context_encoded_Forward, all_context_encoded_Backward]) # Encode utterances B: (BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM) all_utterances_encoded_B = TimeDistributed( LSTM_B, input_shape=(hparams.num_utterance_options, hparams.max_utterance_len, hparams.memn2n_embedding_dim))(utterances_embedded) all_utterances_encoded_B = TimeDistributed( Dense_1, input_shape=(hparams.num_utterance_options, hparams.memn2n_rnn_dim))(all_utterances_encoded_B) print("all_utterances_encoded_B: ", all_utterances_encoded_B.shape) print("all_utterances_encoded_B: (history)", all_utterances_encoded_B._keras_history, '\n') responses_attention = [] responses_dot = [] for i in range(hparams.hops): print(str(i + 1) + 'th hop:') # 1st Attention & Weighted Sum # between Utterances_B(NUM_OPTIONS x RNN_DIM) and Contexts_encoded_Forward(CONTEXT_LEN x RNN_DIM) # and apply Softmax # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x CONTEXT_LEN) attention_Forward = Dot(axes=[2, 2])( [all_utterances_encoded_B, all_context_encoded_Forward]) dot_Forward = attention_Forward attention_Forward = amplify_layer(attention_Forward) attention_Forward = Add()([attention_Forward, utterances_mask]) attention_Forward = softmax_layer(attention_Forward) print("attention_Forward: ", attention_Forward.shape) print("attention_Forward: (history)", attention_Forward._keras_history) # between Attention(NUM_OPTIONS x CONTEXT_LEN) and Contexts_A(CONTEXT_LEN x RNN_DIM) # equivalent to weighted sum of Contexts_A according to Attention # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM) weighted_sum_Forward = Dot(axes=[2, 1])( [attention_Forward, all_context_encoded_Bidir]) print("weighted_sum: ", weighted_sum_Forward.shape) print("weighted_sum: (history)", weighted_sum_Forward._keras_history, '\n') # (Output shape: ? x NUM_OPTIONS(100) x RNN_DIM) all_utterances_encoded_B = Add()( [weighted_sum_Forward, all_utterances_encoded_B]) # 2nd Attention & Weighted Sum # between Utterances_B(NUM_OPTIONS x RNN_DIM) and Contexts_encoded_Backward(CONTEXT_LEN x RNN_DIM) # and apply Softmax # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x CONTEXT_LEN) attention_Backward = Dot(axes=[2, 2])( [all_utterances_encoded_B, all_context_encoded_Backward]) dot_Backward = attention_Backward attention_Backward = amplify_layer(attention_Backward) attention_Backward = Add()([attention_Backward, utterances_mask]) attention_Backward = softmax_layer(attention_Backward) print("attention_Backward: ", attention_Backward.shape) print("attention_Backward: (history)", attention_Backward._keras_history) # between Attention(NUM_OPTIONS x CONTEXT_LEN) and Contexts_A(CONTEXT_LEN x RNN_DIM) # equivalent to weighted sum of Contexts_A according to Attention # (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100) x RNN_DIM) weighted_sum_Backward = Dot(axes=[2, 1])( [attention_Backward, all_context_encoded_Bidir]) print("weighted_sum_Backward: ", weighted_sum_Backward.shape) print("weighted_sum_Backward: (history)", weighted_sum_Backward._keras_history, '\n') # (Output shape: ? x NUM_OPTIONS(100) x RNN_DIM) all_utterances_encoded_B = Add()( [weighted_sum_Backward, all_utterances_encoded_B]) dot_Forward = Reshape((1, hparams.num_utterance_options, hparams.max_context_len))(dot_Forward) dot_Backward = Reshape((1, hparams.num_utterance_options, hparams.max_context_len))(dot_Backward) att_Forward = expand_dim_layer(attention_Forward) att_Backward = expand_dim_layer(attention_Backward) merge_dots = Concatenate(axis=1)([dot_Forward, dot_Backward]) merge_responses = Concatenate(axis=1)([att_Forward, att_Backward]) responses_dot.append(merge_dots) responses_attention.append(merge_responses) print("repsonses_attention[i]:", merge_responses._keras_shape) if i < hparams.hops - 1: continue ''' temp = all_context_encoded_Forward all_context_encoded_Forward = all_context_encoded_Backward all_context_encoded_Backward = temp ''' else: print("hop ended") ############# Attention to Context ############# # (Output shape: ? x MAX_CONTEXT_LEN x 1) attention_Forward_wrt_context =\ TimeDistributed(Dense_2, input_shape=(hparams.max_context_len, hparams.memn2n_rnn_dim))(all_context_encoded_Forward) attention_Forward_wrt_context = amplify_layer( attention_Forward_wrt_context) attention_Forward_wrt_context = Add()( [attention_Forward_wrt_context, context_mask]) attention_Forward_wrt_context = softmax_layer2( attention_Forward_wrt_context) # (Output shape: ? x 1 x RNN_DIM) weighted_sum_Forward_wrt_context = Dot(axes=[1, 1])( [attention_Forward_wrt_context, all_context_encoded_Bidir]) # (Output shape: ? x MAX_CONTEXT_LEN x 1) attention_Backward_wrt_context =\ TimeDistributed(Dense_2, input_shape=(hparams.max_context_len, hparams.memn2n_rnn_dim))(all_context_encoded_Backward) attention_Backward_wrt_context = amplify_layer( attention_Backward_wrt_context) attention_Backward_wrt_context = Add()( [attention_Backward_wrt_context, context_mask]) attention_Backward_wrt_context = softmax_layer2( attention_Backward_wrt_context) # (Output shape: ? x 1 x RNN_DIM) weighted_sum_Backward_wrt_context = Dot(axes=[1, 1])( [attention_Backward_wrt_context, all_context_encoded_Bidir]) att_Forward_wrt_context = Reshape( (1, hparams.max_context_len))(attention_Forward_wrt_context) att_Backward_wrt_context = Reshape( (1, hparams.max_context_len))(attention_Backward_wrt_context) context_attention = Concatenate(axis=1)( [att_Forward_wrt_context, att_Backward_wrt_context]) context_encoded_AplusC = Add()([ weighted_sum_Forward_wrt_context, weighted_sum_Backward_wrt_context ]) #context_encoded_A = Dense_1(context_encoded_A) context_encoded_AplusC = Reshape( (1, hparams.memn2n_rnn_dim))(context_encoded_AplusC) print("context_encoded_AplusC: ", context_encoded_AplusC.shape) print("context_encoded_AplusC: (history)", context_encoded_AplusC._keras_history, '\n') # (Output shape: ? x 1 x NUM_OPTIONS(100)) logits = Dot(axes=[2, 2])( [context_encoded_AplusC, all_utterances_encoded_B]) logits = Reshape((hparams.num_utterance_options, ))(logits) print("logits: ", logits.shape) print("logits: (history)", logits._keras_history, '\n') # Softmax layer for probability of each of Dot products in previous layer # Softmaxing logits (Output shape: BATCH_SIZE(?) x NUM_OPTIONS(100)) probs = Activation('softmax', name='probs')(logits) print("probs: ", probs.shape) print("final History: ", probs._keras_history, '\n') # Return probabilities(likelihoods) of each of utterances # Those will be used to calculate the loss ('sparse_categorical_crossentropy') if hparams.hops == 1: responses_dot = Reshape((1, 2, hparams.num_utterance_options, hparams.max_context_len))(responses_dot[0]) responses_attention = Reshape( (1, 2, hparams.num_utterance_options, hparams.max_context_len))(responses_attention[0]) else: responses_dot = Stack(responses_dot) responses_attention = Stack(responses_attention) responses_dot = responses_dot_layer(responses_dot) responses_attention = responses_attention_layer(responses_attention) context_attention = context_attention_layer(context_attention) print("repsonses_attention:", responses_attention._keras_shape) print("context_attention:", context_attention._keras_shape) return probs, context_attention, responses_attention, responses_dot
def soft_logloss(y_true, y_pred): logits = y_true[:, nb_classes:] y_soft = K.softmax(logits / temperature) y_pred_soft = y_pred[:, nb_classes:] return logloss(y_soft, y_pred_soft)
def call(self, x): return K.softmax(K.dot(x, self.kernel))
def call(self, inputs): q, k, v = inputs[:3] v_mask, q_mask = None, None # 这里的mask.shape=[batch_size, seq_len]或[batch_size, seq_len, 1] if len(inputs) > 3: v_mask = inputs[3] if len(inputs) > 4: q_mask = inputs[4] # 线性变换 qw = self.reuse(self.q_dense, q) kw = self.reuse(self.k_dense, k) vw = self.reuse(self.v_dense, v) # 形状变换 qw = K.reshape(qw, (-1, K.shape(qw)[1], self.heads, self.key_size)) kw = K.reshape(kw, (-1, K.shape(kw)[1], self.heads, self.key_size)) vw = K.reshape(vw, (-1, K.shape(vw)[1], self.heads, self.size_per_head)) # 维度置换 qw = K.permute_dimensions(qw, (0, 2, 1, 3)) kw = K.permute_dimensions(kw, (0, 2, 1, 3)) vw = K.permute_dimensions(vw, (0, 2, 1, 3)) # Attention a = K.batch_dot(qw, kw, [3, 3]) / self.key_size**0.5 a = K.permute_dimensions(a, (0, 3, 2, 1)) a = to_mask(a, v_mask, 'add') a = K.permute_dimensions(a, (0, 3, 2, 1)) if (self.mask_right is not False) or (self.mask_right is not None): if self.mask_right is True: ones = K.ones_like(a[:1, :1]) mask = (ones - K.tf.matrix_band_part(ones, -1, 0)) * 1e10 a = a - mask else: # 这种情况下,mask_right是外部传入的0/1矩阵,shape=[q_len, k_len] mask = (1 - K.constant(self.mask_right)) * 1e10 mask = K.expand_dims(K.expand_dims(mask, 0), 0) self.mask = mask a = a - mask a = K.softmax(a) self.a = a # 完成输出 o = K.batch_dot(a, vw, [3, 2]) o = K.permute_dimensions(o, (0, 2, 1, 3)) o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim)) o = to_mask(o, q_mask, 'mul') return o
def custom_softmax(x): sh = K.shape(x) x = K.reshape(x, (sh[0] * sh[1] * sh[2], num_classes)) x = K.softmax(x) x = K.reshape(x, (sh[0], sh[1], sh[2], num_classes)) return x
def my_model(opt, word_index, embedding_matrix): # sen = [batch, max_sentence_length] sen = Input(shape=(opt['max_sentence_length'], ), name='Sentence') # asp = [1, N_ASPECT] asp = Lambda( lambda x: tf.constant([[word_index[w] for w in aspect_label_id.keys()]]), name='Aspect')([]) batch_size = K.shape(sen)[0] # Embedding module E = Embedding(*embedding_matrix.shape, trainable=False, embeddings_initializer=Constant(embedding_matrix), name='WordVec') # BiLSTM module # asen = [batch_size, max_sentence_len, 2*lstm_hidden_size] asen = Bidirectional(LSTM(opt['lstm_hidden_size'], return_sequences=True, dropout=opt['drop_rate'], recurrent_dropout=opt['drop_rate']), name='BLSTM-Sen')(E(sen)) # aasp = [1, N_ASPECT, 2*lstm_hidden_size] aasp = Bidirectional(LSTM(opt['lstm_hidden_size'], return_sequences=True, dropout=opt['drop_rate'], recurrent_dropout=opt['drop_rate']), name='BLSTM-Asp')(E(asp)) # aasp = [batch, N_ASPECT, 2*lstm_hidden_size] aasp = Lambda(lambda attn: tf.reshape( tf.tile(tf.reshape(attn, (-1, )), [batch_size]), (batch_size, N_ASPECT, 2 * opt['lstm_hidden_size'])), name='Repeat')(aasp) # AOA module # X = [batch_size, max_sentence_len, N_ASPECT] X = Dot(-1, name='Project')([asen, aasp]) # attn = [batch_size, max_sentence_len, N_ASPECT] attn = Softmax(1, name='Within-Aspect')(X) # column-wise-softmax # X = [batch_size, N_ASPECT, 2*lstm_hidden_size] X = Dot(1, name='Attention')([attn, asen]) X = Dropout(opt['drop_rate'], name='Dropout')(X) # X = [batch_size, N_ASPECT * 2 * lstm_hidden_size] X = Flatten(name='Flatten')(X) # Prediction module # X = [batch, dense_hidden_size] X = Dense(opt['dense_hidden_size'], kernel_regularizer=regularizers.l2(opt['reg_rate']), name='Asp-Senti-Clf-1')(X) X = LeakyReLU(alpha=opt['leakyRelu_alpha'], name='LeakyReLU')(X) # X = [batch, N_SENTI * N_ASPECT] X = Dense(N_SENTI * N_ASPECT, kernel_regularizer=regularizers.l2(opt['reg_rate']), name='Asp-Senti-Clf-2')(X) # X = [batch, N_ASPECT, N_SENTI] X = Lambda(lambda x: tf.reshape( K.softmax(tf.reshape(x, (batch_size, N_ASPECT, N_SENTI))), (batch_size, N_SENTI * N_ASPECT)), name='Aspect-Softmax')(X) return Model(inputs=sen, outputs=X)
def MultiHeadsAttModel(self, In_agent, In_neighbor, l=5, d=128, dv=16, dout=128, nv=8, suffix=-1): """ input:[bacth,agent,128] output: -hidden state: [batch,agent,32] -attention: [batch,agent,neighbor] """ """ agent repr """ pass #print("In_agent.shape,In_neighbor.shape,l, d, dv, dout, nv", In_agent.shape,In_neighbor.shape,l, d, dv, dout, nv) #[batch,agent,dim]->[batch,agent,1,dim] agent_repr = Reshape((self.num_agents, 1, d))(In_agent) """ neighbor repr """ #[batch,agent,dim]->(reshape)[batch,1,agent,dim]->(tile)[batch,agent,agent,dim] neighbor_repr = RepeatVector3D(self.num_agents)(In_agent) pass #print("neighbor_repr.shape", neighbor_repr.shape) #[batch,agent,neighbor,agent]x[batch,agent,agent,dim]->[batch,agent,neighbor,dim] neighbor_repr = Lambda(lambda x: batch_dot(x[0], x[1]))( [In_neighbor, neighbor_repr]) pass #print("neighbor_repr.shape", neighbor_repr.shape) """ attention computation """ #multi-head #[batch,agent,1,dim]->[batch,agent,1,dv*nv] agent_repr_head = Dense(dv * nv, activation='relu', kernel_initializer='random_normal', name='agent_repr_%d' % suffix)(agent_repr) #[batch,agent,1,dv,nv]->[batch,agent,nv,1,dv] agent_repr_head = Reshape( (self.num_agents, 1, dv, nv))(agent_repr_head) agent_repr_head = Lambda(lambda x: K.permute_dimensions( x, (0, 1, 4, 2, 3)))(agent_repr_head) #agent_repr_head=Lambda(lambda x:K.permute_dimensions(K.reshape(x,(-1,self.num_agents,1,dv,nv)),(0,1,4,2,3)))(agent_repr_head) #[batch,agent,neighbor,dim]->[batch,agent,neighbor,dv*nv] neighbor_repr_head = Dense(dv * nv, activation='relu', kernel_initializer='random_normal', name='neighbor_repr_%d' % suffix)(neighbor_repr) #[batch,agent,neighbor,dv,nv]->[batch,agent,nv,neighbor,dv] pass #print("DEBUG",neighbor_repr_head.shape) pass #print("self.num_agents,self.num_neighbors,dv,nv", self.num_agents,self.num_neighbors,dv,nv) neighbor_repr_head = Reshape( (self.num_agents, self.num_neighbors, dv, nv))(neighbor_repr_head) neighbor_repr_head = Lambda(lambda x: K.permute_dimensions( x, (0, 1, 4, 2, 3)))(neighbor_repr_head) #neighbor_repr_head=Lambda(lambda x:K.permute_dimensions(K.reshape(x,(-1,self.num_agents,self.num_neighbors,dv,nv)),(0,1,4,2,3)))(neighbor_repr_head) #[batch,agent,nv,1,dv]x[batch,agent,nv,neighbor,dv]->[batch,agent,nv,1,neighbor] att = Lambda(lambda x: K.softmax(batch_dot(x[0], x[1], axes=[4, 4])))( [agent_repr_head, neighbor_repr_head]) #[batch,agent,nv,1,neighbor]->[batch,agent,nv,neighbor] att_record = Reshape((self.num_agents, nv, self.num_neighbors))(att) #self embedding again neighbor_hidden_repr_head = Dense(dv * nv, activation='relu', kernel_initializer='random_normal', name='neighbor_hidden_repr_%d' % suffix)(neighbor_repr) neighbor_hidden_repr_head = Reshape( (self.num_agents, self.num_neighbors, dv, nv))(neighbor_hidden_repr_head) neighbor_hidden_repr_head = Lambda(lambda x: K.permute_dimensions( x, (0, 1, 4, 2, 3)))(neighbor_hidden_repr_head) out = Lambda(lambda x: K.mean(batch_dot(x[0], x[1]), axis=2))( [att, neighbor_hidden_repr_head]) out = Reshape((self.num_agents, dv))(out) out = Dense(dout, activation="relu", kernel_initializer='random_normal', name='MLP_after_relation_%d' % suffix)(out) return out, att_record
def predict_window_mulgpu(model,batch, imgs_test, img_deps, img_rows, img_cols, multiloss): window_deps = (img_deps/3)*2 window_rows = (img_rows/3)*2 window_cols = (img_cols/3)*2 current_test = imgs_test x = current_test.shape[0] y = current_test.shape[1] z = current_test.shape[2] score = np.zeros((x,y,z,2), dtype= 'float32') score_num = np.zeros((x,y,z,2), dtype= 'int16') count = 0 deplist = [] rowlist = [] collist = [] num = 0 box_test = np.zeros((batch,img_deps,img_rows,img_cols,1), dtype="float32") for deps in xrange(0,x-img_deps+window_deps,window_deps): print (deps) for rows in xrange(0, y-img_rows+window_rows, window_rows): for cols in xrange(0,z-img_cols+window_cols,window_cols): if deps>x-img_deps: deps = x-img_deps elif rows > y-img_rows: rows = y-img_rows elif cols>z-img_cols: cols = z-img_cols elif deps>x-img_deps and rows > y - img_rows: deps = x - img_deps rows = y - img_rows elif deps>x-img_deps and cols > z - img_cols: deps = x - img_deps cols = z - img_cols elif rows>y-img_rows and cols > z-img_cols: rows = y - img_rows cols = z - img_cols elif rows>y-img_rows and cols > z-img_cols and deps > x-img_deps: deps = x - img_deps rows = y - img_rows cols = z - img_cols if count == batch: count = 0 deplist = [] rowlist = [] collist = [] box_test = np.zeros((batch, img_deps, img_rows, img_cols, 1), dtype="float32") patch_test = current_test[deps:deps+img_deps, rows:rows+img_rows, cols:cols+img_cols] deplist.append(deps) rowlist.append(rows) collist.append(cols) box_test[count,:,:,:,0] = patch_test count += 1 del patch_test if count == batch: num = num+1 print ('num: ',num) print ('box:', box_test.shape) patch_test_mask = model.predict(box_test, verbose=0) if multiloss: patch_test_mask = patch_test_mask[2] patch_test_mask = K.softmax(patch_test_mask) patch_test_mask = K.eval(patch_test_mask) print ('predict finish') for i in xrange(batch): score[deplist[i]:deplist[i]+img_deps, rowlist[i]:rowlist[i]+img_rows, collist[i]:collist[i]+img_cols,:] += patch_test_mask[i] score_num[deplist[i]:deplist[i]+img_deps, rowlist[i]:rowlist[i]+img_rows, collist[i]:collist[i]+img_cols,:] += 1 # print ('queue finish') del box_test, patch_test_mask, deplist, rowlist, collist score = score / (score_num) score2 = score[:,:,:,1] return score2
def call(self, x): et = K.squeeze(K.tanh(K.dot(x, self.W) + self.b), axis=-1) at = K.softmax(et) at = K.expand_dims(at, axis=-1) output = x * at return K.sum(output, axis=1)
def call(self, inputs): def hw_flatten(x): return kl.Reshape(target_shape=(int(x.shape[1]) * int(x.shape[2]), int(x.shape[3])))(x) # s = x.shape.as_list() # return K.reshape(x, shape=[-1,s[1]*s[2],s[3]]) text, img, masks = inputs if masks is not None: self.masks = masks # self.text_input_shape = tuple(x1.shape[1:].as_list()) q = kl.Dense(self.filters_q, use_bias=True)(text) q = kl.Activation(activation)(q) # q = kl.tanh(alpha=1.0)(q) k = kl.Conv2D(filters=self.filters_k, strides=(1, 1), kernel_size=(1, 1), padding='same')(img) k = kl.Activation(activation)(k) # k = kl.tanh(alpha=1.0)(k) v = kl.Conv2D(filters=self.filters_v, strides=(1, 1), kernel_size=(1, 1), padding='same')(img) v = kl.Activation(activation)(v) # v = kl.tanh(alpha=1.0)(v) # print('q.shape,k.shape,v.shape,',q.shape,k.shape,v.shape) s = K.batch_dot(q, K.permute_dimensions(hw_flatten(k), (0, 2, 1))) # # [bs, N, M] if self.masks is not None: beta = kl.Multiply()([s, self.masks]) else: beta = s # print('s.shape:',s.shape) scores = K.softmax(beta, axis=-1) # attention map # self.beta_shape = tuple(beta.shape[1:].as_list()) # print('hw_flatten(v).shape:',hw_flatten(v).shape) o = K.batch_dot(scores, hw_flatten(v)) # [bs, N, C] # print('o.shape:',o.shape) # o = K.reshape(o, shape=K.shape(x2)) # [bs, h, w, C] # o = K.conv1d(o, # kernel=self.kernel_o, # strides=(1,), padding='same') # o = K.bias_add(o, self.bias_o) # o = kl.tanh(alpha=1.0)(o) # print('o.shape:',o.shape) # x_text = self.gamma1 * x1 # # print('x_text.shape:',x_text,x_text.shape) # x_att = self.gamma2 * o # # print('x_att.shape:',x_att,x_att.shape) # x_out = K.concatenate([x_text,x_att],axis=-1) #kl.Concatenate()([x_text,x_att]) # print('x_out.shape:',x_out,x_out.shape) self.text_sh = tuple(text.shape.as_list()) self.q_sh = tuple(q.shape.as_list()) self.k_sh = tuple(k.shape.as_list()) self.v_sh = tuple(v.shape.as_list()) self.s_sh = tuple(s.shape.as_list()) self.scores_sh = tuple(scores.shape.as_list()) self.beta_sh = tuple(beta.shape.as_list()) self.o_sh = tuple(o.shape.as_list()) return [text, q, k, v, s, scores, beta, o]
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def DARC1(y_true, y_pred): y_pred_softmax = K.softmax(y_pred) xentropy = K.categorical_crossentropy(y_true, y_pred_softmax) reg = K.max(K.sum(K.abs(y_pred), axis=0)) alpha = 0.001 return xentropy + alpha * reg