def soft_attention_alignment(input_1, input_2): "Align text representation with neural soft attention" attention = Dot(axes=-1)([input_1, input_2]) w_att_1 = Lambda(lambda x: softmax(x, axis=1), output_shape=unchanged_shape)(attention) w_att_2 = Permute((2,1))(Lambda(lambda x: softmax(x, axis=2), output_shape=unchanged_shape)(attention)) in1_aligned = Dot(axes=1)([w_att_1, input_1]) in2_aligned = Dot(axes=1)([w_att_2, input_2]) return in1_aligned, in2_aligned
def __gumbelSample(self, latent): U = K.in_train_phase( K.log(-K.log(K.random_uniform(K.shape(latent)) + 1e-20) + 1e-20), 0.0) y = latent - U y = softmax(K.reshape(y, (-1, ) + self.latent_shape) / self.tau) return K.reshape(y, (-1, self.latent_units))
def call(self, x): """ x: Nx D1 x D2 W1 : D1 x d1 W2: D2 x d2 W: D2 x D2 """ # first mode projection x = nmodeproduct(x, self.W1, 1) # N x d1 x D2 # enforcing constant (1) on the diagonal W = self.W - self.W * K.eye(self.in_shape[2], dtype='float32') + K.eye( self.in_shape[2], dtype='float32') / self.in_shape[2] # calculate attention attention = Activations.softmax(nmodeproduct(x, W, 2), axis=-1) # N x d1 x D2 # apply attention x = self.alpha * x + (1.0 - self.alpha) * x * attention # second mode projection x = nmodeproduct(x, self.W2, 2) # bias add x = x + self.bias if self.output_dim[1] == 1: x = K.squeeze(x, axis=-1) return x
def sampling(self, logits): # # TODO: should it be logits or log(softmax(logits))? From the paper (Cat. reparam.) it looks like the latter! # U = K.random_uniform(K.shape(logits), 0, 1) # y = logits - K.log(-K.log(U + 1e-20) + 1e-20) # logits + gumbel noise # y = K.reshape(y, (-1, self.N, self.M)) q_y = K.reshape(logits, (-1, self.N, self.M)) q_y = softmax(q_y) log_q_y = K.log(q_y + 1e-20) U = K.random_uniform(K.shape(log_q_y), 0, 1) y = log_q_y - K.log( -K.log(U + 1e-20) + 1e-20) # log_prob + gumbel noise z = softmax(y / self.tau) z = K.reshape(z, (-1, self.N * self.M)) return z
def call(self, inputs): inputs_x = inputs[0] inputs_y = inputs[1] input_length = K.sum(inputs_x**2., axis = 1, keepdims = True)**0.5 input_length /= self.s ** 0.5 input_length += 0.0001 kernel_length = K.sum(self.kernel**2., axis = 0, keepdims = True)**0.5 kernel_length /= self.s ** 0.5 kernel_length += 0.0001 inputs_norm = inputs_x / input_length kernel_norm = self.kernel / kernel_length label_onehot = inputs_y negative_mask = tf.fill([self.units, self.units], 1.) - tf.eye(self.units) # shape = [#spk, #spk] loss_BS = K.mean(tf.matmul(kernel_norm, kernel_norm, adjoint_a = True # transpose second matrix ) * negative_mask ) inner_output = K.dot(inputs_x, self.kernel) softmax_output = softmax(inner_output) loss_s = K.categorical_crossentropy(inputs_y, softmax_output) final_loss = loss_s + loss_BS return final_loss
def call(self, x): # Calling updates updates = [] updates.append((self.sample_sum, self.sample_sum + 1)) updates.append( (self.epoch_nr, self.sample_sum / self.samples_per_epoch)) updates.append( (self.tau, K.max([ self.tau_init * K.exp(-self.anneal_rate * self.epoch_nr), self.min_temperature ]))) # These updates will be called after each sample. self.add_update(updates, x) U = K.random_uniform(K.shape(x), 0, 1) # Logits + Gumbel noise y = x - K.log(-K.log(U + K.epsilon()) + K.epsilon()) y = softmax( K.reshape(y, (self.batch_size, self.nr_of_samples, self.softmax_size)) / self.tau) if self.transpose: y = K.permute_dimensions(y, (0, 2, 1)) return y
def test_temporal_softmax(self): x = backend.placeholder(shape=(2, 2, 3)) f = backend.function([x], [activations.softmax(x)]) test_values = np.random.random((2, 2, 3)) * 10 result = f([test_values])[0] expected = _ref_softmax(test_values[0, 0]) self.assertAllClose(result[0, 0], expected, rtol=1e-05)
def user_rep(): openfile = open("Training_Body_Title_user.p", "rb") x = pickle.load(openfile) repre = {} count = 0 use = 0 for y in x: tag_string = y['tags'].encode('utf-8') #print tag_string tag_list = utils.get_tag_list(tag_string) tag_enc = get_tag_encoding(tag_list) count += 1 print count try: repre[user_id[y['OwnerUserId']]] += tag_enc except: try: repre[user_id[y['OwnerUserId']]] = np.zeros(len(tag_dict)) repre[user_id[y['OwnerUserId']]] += tag_enc except: # use += 1 # print use continue for key in repre: repre[key] = softmax(repre[key]) print repre[key].shape return repre
def predict_txt(east_detect, img_path, txt_path, pixel_threshold, quiet=False): img = image.load_img(img_path) d_wight, d_height = resize_image(img, cfg.max_predict_img_size) scale_ratio_w = d_wight / img.width scale_ratio_h = d_height / img.height img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB') img = image.img_to_array(img) img = preprocess_input(img, mode='tf') x = np.expand_dims(img, axis=0) y = east_detect.predict(x) y = np.squeeze(y, axis=0) #activate the output layer y[:, :, :4] = softmax(y[:, :, :4]) y[:, :, 4:6] = sigmoid(y[:, :, 4:6]) # cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) quad_scores, quad_after_nms = nms(y, activation_pixels) txt_items = [] for score, geo in zip(quad_scores, quad_after_nms): if np.amin(score) > 0: rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] rescaled_geo_list = np.reshape(rescaled_geo, (8,)).tolist() txt_item = ','.join(map(str, rescaled_geo_list)) txt_items.append(txt_item + '\n') elif not quiet: print('quad invalid with vertex num less then 4.') if cfg.predict_write2txt and len(txt_items) > 0: with open(txt_path, 'w') as f_txt: f_txt.writelines(txt_items)
def multiplicative_self_attention(units, n_hidden=None, n_output_features=None, activation=None): """ Compute multiplicative self attention for time series of vectors (with batch dimension) the formula: score(h_i, h_j) = <W_1 h_i, W_2 h_j>, W_1 and W_2 are learnable matrices with dimensionality [n_hidden, n_input_features] Args: units: tf tensor with dimensionality [batch_size, time_steps, n_input_features] n_hidden: number of units in hidden representation of similarity measure n_output_features: number of features in output dense layer activation: activation at the output Returns: output: self attended tensor with dimensionality [batch_size, time_steps, n_output_features] """ n_input_features = K.int_shape(units)[2] if n_hidden is None: n_hidden = n_input_features if n_output_features is None: n_output_features = n_input_features exp1 = Lambda(lambda x: expand_tile(x, axis=1))(units) exp2 = Lambda(lambda x: expand_tile(x, axis=2))(units) queries = Dense(n_hidden)(exp1) keys = Dense(n_hidden)(exp2) scores = Lambda(lambda x: K.sum(queries * x, axis=3, keepdims=True))(keys) attention = Lambda(lambda x: softmax(x, axis=2))(scores) mult = Multiply()([attention, exp1]) attended_units = Lambda(lambda x: K.sum(x, axis=2))(mult) output = Dense(n_output_features, activation=activation)(attended_units) return output
def additive_self_attention(units, n_hidden=None, n_output_features=None, activation=None): """ Compute additive self attention for time series of vectors (with batch dimension) the formula: score(h_i, h_j) = <v, tanh(W_1 h_i + W_2 h_j)> v is a learnable vector of n_hidden dimensionality, W_1 and W_2 are learnable [n_hidden, n_input_features] matrices Args: units: tf tensor with dimensionality [batch_size, time_steps, n_input_features] n_hidden: number of2784131 units in hidden representation of similarity measure n_output_features: number of features in output dense layer activation: activation at the output Returns: output: self attended tensor with dimensionality [batch_size, time_steps, n_output_features] """ n_input_features = K.int_shape(units)[2] if n_hidden is None: n_hidden = n_input_features if n_output_features is None: n_output_features = n_input_features exp1 = Lambda(lambda x: expand_tile(x, axis=1))(units) exp2 = Lambda(lambda x: expand_tile(x, axis=2))(units) units_pairs = Concatenate(axis=3)([exp1, exp2]) query = Dense(n_hidden, activation="tanh")(units_pairs) attention = Dense(1, activation=lambda x: softmax(x, axis=2))(query) attended_units = Lambda(lambda x: K.sum(attention * x, axis=2))(exp1) output = Dense(n_output_features, activation=activation)(attended_units) return output
def __init__(self, max_len_input, dense_size = 10): self.max_len_input = max_len_input self.dense_1 = Dense(dense_size, activation='tanh', name='AttentionDense_1') self.dense_2 = Dense(1, name='AttentionDense_2') self.concatenate = Concatenate(axis=-1, name='AttentionConcat') self.repeatvector = RepeatVector(max_len_input, name='AttentionRepeat') self.dot = Dot(axes = 1, name='AttentionDot') self.softmax_over_time = Lambda(lambda x: softmax(x, axis=1), name = 'AttentionSoftMaxOverTime')
def _dynamic_routing(self,u_hat,b_ij): for i in range(self.iterations): c_ij = softmax(b_ij,axis=1) s_j = K.batch_dot(c_ij,u_hat,[2,2]) v_j = squash(s_j) if i<self.iterations-1: b_ij += K.batch_dot(v_j,u_hat,[2,3]) return v_j
def call(self, x, mask=None): energy = K.squeeze(self.layer(x), 2) p_matrix = softmax(energy) if mask is not None: mask = self.squash_mask(mask) p_matrix = make_safe(p_matrix * mask) # remove unwanted items p_matrix = p_matrix / K.sum(p_matrix, axis=-1, keepdims=True) # renormalize return make_safe(p_matrix)
def call(self, inputs): inputs_x = inputs[0] inputs_y = inputs[1] input_length = K.sum(inputs_x**2., axis=1, keepdims=True)**0.5 input_length /= self.s**0.5 input_length += 0.0001 kernel_length = K.sum(self.kernel**2., axis=0, keepdims=True)**0.5 kernel_length /= self.s**0.5 kernel_length += 0.0001 inputs_norm = inputs_x / input_length kernel_norm = self.kernel / kernel_length #label_onehot = tf.one_hot(tf.reshape(inputs_y, [-1]), self.units) label_onehot = inputs_y # shape = [#batch_sample, #spk] negative_mask = tf.fill([self.units, self.units], 1.) - tf.eye( self.units) # shape = [#spk, #spk] negative_mask2 = tf.fill([self.num_batch, self.units], 1.) - label_onehot # shape = [#batch_sample, #spk] loss_BS = K.mean( tf.matmul( kernel_norm, kernel_norm, adjoint_a=True # transpose second matrix ) * negative_mask) if self.with_H: cos_output = K.dot(inputs_norm, kernel_norm) cos_target = K.sum(cos_output * label_onehot, axis=1, keepdims=True) cos_diff = K.exp(cos_output - cos_target) * negative_mask2 hard_negatives, _ = tf.nn.top_k(cos_diff, k=self.negative_k, sorted=False) loss_H = K.mean(K.log(1. + hard_negatives), axis=1) final_loss = loss_H + loss_BS else: inner_output = K.dot(inputs_x, self.kernel) softmax_output = softmax(inner_output) #loss_s = K.sparse_categorical_crossentropy(inputs_y, softmax_output) loss_s = K.categorical_crossentropy(inputs_y, softmax_output) final_loss = loss_s + loss_BS return final_loss
def step(self, x, states): print('step : ') ytm, stm = states # repeat the hidden state to the length of the sequence _stm = K.repeat(stm, self.timesteps) # now multiplty the weight matrix with the repeated hidden state _Wxstm = K.dot(_stm, self.W_a) # calculate the attention probabilities # this relates how much other timesteps contributed to this one. et = K.dot(activations.tanh(_Wxstm + self._uxpb), K.expand_dims(self.V_a)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.timesteps) at /= at_sum_repeated # vector of size (batchsize, timesteps, 1) # calculate the context vector context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1) # ~~~> calculate new hidden state # first calculate the "r" gate: rt = activations.sigmoid( K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) + K.dot(context, self.C_r) + self.b_r) # now calculate the "z" gate zt = activations.sigmoid( K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) + K.dot(context, self.C_z) + self.b_z) # calculate the proposal hidden state: s_tp = activations.tanh( K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) + K.dot(context, self.C_p) + self.b_p) # new hidden state: st = (1-zt)*stm + zt * s_tp yt = activations.softmax( K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) + K.dot(context, self.C_o) + self.b_o) if self.return_probabilities: return at, [yt, st] else: return yt, [yt, st]
def test_softmax_invalid(): """Test for the expected exception behaviour on invalid input """ x = K.placeholder(ndim=1) # One dimensional arrays are supposed to raise a value error with pytest.raises(ValueError): f = K.function([x], [activations.softmax(x)])
def get_sentence_vector(input): atten_vec = Dense(1)(input) atten_vec = Lambda(lambda x: softmax(x, axis=1), output_shape=unchanged_shape)(atten_vec) atten_vec = Lambda(lambda x: K.squeeze(x, axis=-1), output_shape=vec_output_shape)(atten_vec) sen_vector = Dot(axes=1)([atten_vec, input]) return sen_vector
def step(self, x, states): ytm, stm = states # repeat the hidden state to the length of the sequence _stm = K.repeat(stm, self.timesteps) # now multiplty the weight matrix with the repeated hidden state _Wxstm = K.dot(_stm, self.W_a) # calculate the attention probabilities # this relates how much other timesteps contributed to this one. et = K.dot(activations.tanh(_Wxstm + self._uxpb), K.expand_dims(self.V_a)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.timesteps) at /= at_sum_repeated # vector of size (batchsize, timesteps, 1) # calculate the context vector context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1) # ~~~> calculate new hidden state # first calculate the "r" gate: rt = activations.sigmoid( K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) + K.dot(context, self.C_r) + self.b_r) # now calculate the "z" gate zt = activations.sigmoid( K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) + K.dot(context, self.C_z) + self.b_z) # calculate the proposal hidden state: s_tp = activations.tanh( K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) + K.dot(context, self.C_p) + self.b_p) # new hidden state: st = (1-zt)*stm + zt * s_tp yt = activations.softmax( K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) + K.dot(context, self.C_o) + self.b_o) if self.return_probabilities: return at, [yt, st] else: return yt, [yt, st]
def call(self, x, mask=None): x_Ws1 = K.tanh(K.dot(x, self.W_s1)) H = K.dot(x_Ws1, self.W_s2) A = softmax(H, axis=1) A_reshape = K.permute_dimensions(A, pattern=[0, 2, 1]) M = K.batch_dot(A_reshape, x, axes=(2, 1)) if self.return_attention_vector: return [A_reshape, M] return M
def __init__(self, model): """ Keras classifier wrapper. Note that the wrapped classifier should spit logits as output. """ layer_id = len(model.layers)-2 self.model = Model(inputs=model.layers[0].input, outputs=model.layers[layer_id].output) self.softmax = Sequential() self.softmax.add(Lambda(lambda X: softmax(X, axis=1), input_shape=(10,)))
def test_softmax_3d_axis_tuple(self): x = backend.placeholder(ndim=3) f = backend.function([x], [activations.softmax(x, axis=(1, 2))]) test_values = np.random.random((2, 3, 5)) result = f([test_values])[0] expected = np.zeros((2, 3, 5)) for i in range(2): expected[i, :, :] = _ref_softmax(test_values[i, :, :]) self.assertAllClose(result, expected, rtol=1e-05)
def test_softmax_2d_axis_0(self): x = backend.placeholder(ndim=2) f = backend.function([x], [activations.softmax(x, axis=0)]) test_values = np.random.random((2, 5)) result = f([test_values])[0] expected = np.zeros((2, 5)) for i in range(5): expected[:, i] = _ref_softmax(test_values[:, i]) self.assertAllClose(result, expected, rtol=1e-05)
def call(self, inputs): x = [K.expand_dims(v, axis=-1) for v in inputs] x = K.concatenate(x, axis=-1) x = K.permute_dimensions(x, pattern=[0, 1, 3, 2]) weights = K.tanh(K.dot(x, self.W1) + K.dot(self.vm, self.W2)) weights = K.dot(weights, K.transpose(self.vm)) weights = softmax(weights, axis=-2) outputs = K.sum(x * weights, axis=-2) return outputs
def gumbel_loss(x, x_hat): q_y = K.reshape(logits_y, (-1, N, M)) q_y = softmax(q_y) log_q_y = K.log(q_y + 1e-20) kl_tmp = q_y * (log_q_y - K.log(1.0/M)) KL = K.sum(kl_tmp, axis=(1, 2)) elbo = latent_dim * bce(x, x_hat) - KL # elbo = latent_dim * mse(x, x_hat) - KL return elbo
def build(self): def tensor_product(x): a = x[0] b = x[1] y = K.batch_dot(a, b, axis=1) y = K.einsum('ijk, ikl->ijl', a, b) return y # here "query" and "doc" are the name query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text1_maxlen'], self.config['bin_num'])) show_layer_info('Input', doc) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=False) q_embed = embedding(query) show_layer_info('Embedding', q_embed) q_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)(q_embed) show_layer_info('Dense', q_w) q_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text1_maxlen'], ))(q_w) show_layer_info('Lambda-softmax', q_w) z = doc #z = Dropout(rate=self.config['dropout_rate'])(z) #show_layer_info('Dropout', z) for i in range(self.config['num_layers'] - 1): dense_layer = Dense(self.config['hidden_sizes'][i], kernel_initializer=self.initializer_fc) z = dense_layer(z) z = Activation('tanh')(z) show_layer_info('Dense', z) dense_layer2 = Dense( self.config['hidden_sizes'][self.config['num_layers'] - 1], kernel_initializer=self.initializer_fc) z = dense_layer2(z) show_layer_info('Dense', z) z = Permute((2, 1))(z) show_layer_info('Permute', z) z = Reshape((self.config['text1_maxlen'], ))(z) show_layer_info('z shape', z) q_w = Reshape((self.config['text1_maxlen'], ))(q_w) show_layer_info('q_w shape', q_w) out_ = Dot(axes=[1, 1])([z, q_w]) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(out_) show_layer_info('Dense', out_) model = Model(inputs=[query, doc], outputs=[out_]) return model
def minus_soft_attention_alignment(input_1, input_2): """Align text representation with neural soft attention""" attention = SimilarityMatrix()([input_1, input_2]) w_att_1 = Lambda(lambda x: softmax(x, axis=1), output_shape=unchanged_shape)(attention) in1_aligned = Dot(axes=1)([w_att_1, input_1]) return in1_aligned
def build(self): query = Input(name='query', shape=(self.config['text1_maxlen'], )) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'], )) show_layer_info('Input', doc) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable) q_embed = embedding(query) show_layer_info('Embedding', q_embed) d_embed = embedding(doc) show_layer_info('Embedding', d_embed) mm = Dot(axes=[2, 2], normalize=True)([q_embed, d_embed]) show_layer_info('Dot', mm) # compute term gating w_g = Dense(1)(q_embed) show_layer_info('Dense', w_g) g = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text1_maxlen'], ))(w_g) show_layer_info('Lambda-softmax', g) g = Reshape((self.config['text1_maxlen'], ))(g) show_layer_info('Reshape', g) mm_k = Lambda(lambda x: K.tf.nn.top_k( x, k=self.config['topk'], sorted=True)[0])(mm) show_layer_info('Lambda-topk', mm_k) for i in range(self.config['num_layers']): mm_k = Dense(self.config['hidden_sizes'][i], activation='softplus', kernel_initializer='he_uniform', bias_initializer='zeros')(mm_k) show_layer_info('Dense', mm_k) mm_k_dropout = Dropout(rate=self.config['dropout_rate'])(mm_k) show_layer_info('Dropout', mm_k_dropout) mm_reshape = Reshape((self.config['text1_maxlen'], ))(mm_k_dropout) show_layer_info('Reshape', mm_reshape) mean = Dot(axes=[1, 1])([mm_reshape, g]) show_layer_info('Dot', mean) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(mean) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Reshape((1, ))(mean) show_layer_info('Dense', out_) model = Model(inputs=[query, doc], outputs=out_) model.summary() return model
def __init__(self, model, num_classes=10): """ Keras classifier wrapper. Note that the wrapped classifier should spit logits as output. classifier_path: Path to Keras classifier file. """ self.model = model self.softmax = Sequential() self.softmax.add( Lambda(lambda X: softmax(X, axis=1), input_shape=(num_classes, )))
def soft_attention_alignment(input_1, input_2): """Align text representation with neural soft attention""" attention = Dot(axes=-1)([input_1, input_2]) w_att = Lambda(lambda x: softmax(x, axis=1), output_shape=unchanged_shape)(attention) in_aligned = Dot(axes=1)([w_att, input_1]) return in_aligned, w_att
def call(self, encoder_outputs, dec_output, mask=None): w1_e = self.W1(encoder_outputs) w2_d = self.W2(dec_output) tanh_output = tanh(w1_e + w2_d) v_dot_tanh = self.V(tanh_output) if mask is not None: v_dot_tanh += (mask * -1e9) attention_weights = softmax(v_dot_tanh, axis=1) att_shape = K.shape(attention_weights) return K.reshape(attention_weights, (att_shape[0], att_shape[1]))
def call(self, inputs): CAR_sent1_vec, CAR_sent2_vec, CAR_sent3_vec, CAR_sent4_vec, c1_vec, c2_vec, c3_vec, c4_vec=inputs Wh1=K.dot(CAR_sent1_vec, self.kernel) # (b, s, 2h) Wh2=K.dot(CAR_sent2_vec, self.kernel) Wh3=K.dot(CAR_sent3_vec, self.kernel) Wh4=K.dot(CAR_sent4_vec, self.kernel) bh1=K.dot(CAR_sent1_vec, self.bias) # (b, s, 1) bh2=K.dot(CAR_sent2_vec, self.bias) bh3=K.dot(CAR_sent3_vec, self.bias) bh4=K.dot(CAR_sent4_vec, self.bias) u1=K.expand_dims(c1_vec, axis=2) # (b, 2h) -> (b, 2h, 1) u2=K.expand_dims(c2_vec, axis=2) u3=K.expand_dims(c3_vec, axis=2) u4=K.expand_dims(c4_vec, axis=2) u1_Wh1=K.batch_dot(Wh1, u1, axes=[2,1]) # (b, s, 1) u2_Wh2=K.batch_dot(Wh2, u2, axes=[2,1]) u3_Wh3=K.batch_dot(Wh3, u3, axes=[2,1]) u4_Wh4=K.batch_dot(Wh4, u4, axes=[2,1]) attn_1=softmax(u1_Wh1+bh1, axis=1) # (b, s, 1) attn_2=softmax(u2_Wh2+bh2, axis=1) attn_3=softmax(u3_Wh3+bh3, axis=1) attn_4=softmax(u4_Wh4+bh4, axis=1) attn1_h=CAR_sent1_vec*attn_1 # (b, s, 2h) attn2_h=CAR_sent2_vec*attn_2 attn3_h=CAR_sent3_vec*attn_3 attn4_h=CAR_sent4_vec*attn_4 P1=K.sum(attn1_h, axis=1) # (b, s, 2h) -> (b, 2h) P2=K.sum(attn2_h, axis=1) P3=K.sum(attn3_h, axis=1) P4=K.sum(attn4_h, axis=1) return [P1, P2, P3, P4]
def step(self,inputs,states): input_shape = self.input_spec[0].shape states = states[:-self._num_constants] en_seq = states[-1] _, [h, c] = super(PointerLSTM, self).call(x_input, states[:-1]) dec_seq = K.repeat(h, input_shape[1]) Eij = K.dot(self.W1, en_seq) Dij = K.dot(self.W2, dec_seq) U = self.vt * tanh(Eij + Dij) U = K.squeeze(U, 2) pointer = softmax(U) return pointer, [h, c]
def __init__(self, classifier_path): """ Keras classifier wrapper. Note that the wrapped classifier should spit logits as output. classifier_path: Path to Keras classifier file. """ self.path = classifier_path self.model = load_model(classifier_path) self.softmax = Sequential() self.softmax.add( Lambda(lambda X: softmax(X, axis=1), input_shape=(10, )))
def test_softmax(): # Test using a reference implementation of softmax def softmax(values): m = np.max(values) e = np.exp(values - m) return e / np.sum(e) x = K.placeholder(ndim=2) f = K.function([x], [activations.softmax(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = softmax(test_values) assert_allclose(result, expected, rtol=1e-05)
def test_softmax_3d(): """Test using a reference implementation of softmax. """ def softmax(values, axis): m = np.max(values, axis=axis, keepdims=True) e = np.exp(values - m) return e / np.sum(e, axis=axis, keepdims=True) x = K.placeholder(ndim=3) f = K.function([x], [activations.softmax(x, axis=1)]) test_values = get_standard_values()[:, :, np.newaxis].copy() result = f([test_values])[0] expected = softmax(test_values, axis=1) assert_allclose(result, expected, rtol=1e-05)
def build(self): query = Input(name='query', shape=(self.config['text1_maxlen'],)) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text2_maxlen'],)) show_layer_info('Input', doc) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable=self.embed_trainable) q_embed = embedding(query) show_layer_info('Embedding', q_embed) d_embed = embedding(doc) show_layer_info('Embedding', d_embed) mm = Dot(axes=[2, 2], normalize=True)([q_embed, d_embed]) show_layer_info('Dot', mm) # compute term gating w_g = Dense(1)(q_embed) show_layer_info('Dense', w_g) g = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text1_maxlen'], ))(w_g) show_layer_info('Lambda-softmax', g) g = Reshape((self.config['text1_maxlen'],))(g) show_layer_info('Reshape', g) mm_k = Lambda(lambda x: K.tf.nn.top_k(x, k=self.config['topk'], sorted=True)[0])(mm) show_layer_info('Lambda-topk', mm_k) for i in range(self.config['num_layers']): mm_k = Dense(self.config['hidden_sizes'][i], activation='softplus', kernel_initializer='he_uniform', bias_initializer='zeros')(mm_k) show_layer_info('Dense', mm_k) mm_k_dropout = Dropout(rate=self.config['dropout_rate'])(mm_k) show_layer_info('Dropout', mm_k_dropout) mm_reshape = Reshape((self.config['text1_maxlen'],))(mm_k_dropout) show_layer_info('Reshape', mm_reshape) mean = Dot(axes=[1, 1])([mm_reshape, g]) show_layer_info('Dot', mean) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(mean) elif self.config['target_mode'] in ['regression', 'ranking']: out_ = Reshape((1,))(mean) show_layer_info('Dense', out_) model = Model(inputs=[query, doc], outputs=out_) return model
def build(self): def tensor_product(x): a = x[0] b = x[1] y = K.batch_dot(a, b, axis=1) y = K.einsum('ijk, ikl->ijl', a, b) return y query = Input(name='query', shape=(self.config['text1_maxlen'],)) show_layer_info('Input', query) doc = Input(name='doc', shape=(self.config['text1_maxlen'], self.config['bin_num'])) show_layer_info('Input', doc) embedding = Embedding(self.config['vocab_size'], self.config['embed_size'], weights=[self.config['embed']], trainable = False) q_embed = embedding(query) show_layer_info('Embedding', q_embed) q_w = Dense(1, kernel_initializer=self.initializer_gate, use_bias=False)(q_embed) show_layer_info('Dense', q_w) q_w = Lambda(lambda x: softmax(x, axis=1), output_shape=(self.config['text1_maxlen'], ))(q_w) show_layer_info('Lambda-softmax', q_w) z = doc z = Dropout(rate=self.config['dropout_rate'])(z) show_layer_info('Dropout', z) for i in range(self.config['num_layers']-1): z = Dense(self.config['hidden_sizes'][i], kernel_initializer=self.initializer_fc)(z) z = Activation('tanh')(z) show_layer_info('Dense', z) z = Dense(self.config['hidden_sizes'][self.config['num_layers']-1], kernel_initializer=self.initializer_fc)(z) show_layer_info('Dense', z) z = Permute((2, 1))(z) show_layer_info('Permute', z) z = Reshape((self.config['text1_maxlen'],))(z) show_layer_info('Reshape', z) q_w = Reshape((self.config['text1_maxlen'],))(q_w) show_layer_info('Reshape', q_w) out_ = Dot( axes= [1, 1])([z, q_w]) if self.config['target_mode'] == 'classification': out_ = Dense(2, activation='softmax')(out_) show_layer_info('Dense', out_) model = Model(inputs=[query, doc], outputs=[out_]) return model
def step(self, x_input, states): input_shape = self.input_spec[0].shape en_seq = states[-1] _, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1]) # vt*tanh(W1*e+W2*d) dec_seq = K.repeat(h, input_shape[1]) #dec_seq = K.repeat(h, 2) print ('dec_seq') print (dec_seq) Eij = time_distributed_dense(en_seq, self.W1, output_dim=1) Dij = time_distributed_dense(dec_seq, self.W2, output_dim=1) U = self.vt * tanh(Eij + Dij) print ('U') print (U) U = K.squeeze(U, 2) print ('U squeezed') print (U) # make probability tensor pointer = softmax(U) return pointer, [h, c]
def masked_softmax(logits): # logits are [batch_size, output_dim] x = select(tf.tile(tf.equal(output_mask[None, :], 1.0), [tf.shape(logits)[0], 1]), logits, -1e32 * tf.ones_like(logits)) return activations.softmax(x)
def test_time_distributed_softmax(): x = K.placeholder(shape=(1, 1, 5)) f = K.function([x], [activations.softmax(x)]) test_values = get_standard_values() test_values = np.reshape(test_values, (1, 1, np.size(test_values))) f([test_values])[0]
def softvaxaxis2(x): return softmax(x, axis=2)