def step(self, x, states): ytm, stm = states # repeat the hidden state to the length of the sequence _stm = K.repeat(stm, self.timesteps) # now multiplty the weight matrix with the repeated hidden state _Wxstm = K.dot(_stm, self.W_a) # calculate the attention probabilities # this relates how much other timesteps contributed to this one. et = K.dot(activations.tanh(_Wxstm + self._uxpb), K.expand_dims(self.V_a)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.timesteps) at /= at_sum_repeated # vector of size (batchsize, timesteps, 1) # calculate the context vector context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1) # ~~~> calculate new hidden state # first calculate the "r" gate: rt = activations.sigmoid( K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) + K.dot(context, self.C_r) + self.b_r) # now calculate the "z" gate zt = activations.sigmoid( K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) + K.dot(context, self.C_z) + self.b_z) # calculate the proposal hidden state: s_tp = activations.tanh( K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) + K.dot(context, self.C_p) + self.b_p) # new hidden state: st = (1-zt)*stm + zt * s_tp yt = activations.softmax( K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) + K.dot(context, self.C_o) + self.b_o) if self.return_probabilities: return at, [yt, st] else: return yt, [yt, st]
def step(self, x, states): """ get the previous hidden state of the decoder from states = [z, s_p] alignment model: waStm1 = W_a \dot s_{t-1} uaHt = U_a \dot h_t tmp = tanh(waStm1 + uaHt) e_ij = V_a^T * tmp vector of length = timestep is: u_t = softmax(e_tj) """ atm1 = x ztm1, s_tpm1 = states # old hidden state: # shape (batchsize, units) stm1 = (1 - ztm1) * self.stm2 + ztm1 * s_tpm1 # shape (batchsize, timesteps, units) _stm = K.repeat(stm1, self.timesteps) # shape (batchsize, timesteps, output_dim) _Wxstm = K.dot(_stm, self.W_a) # calculate the attention probabilities: # self._uxpb has shape (batchsize, timesteps, output_dim) # V_a has shape (output_dim, ) # after K.expand_dims it is (output_dim, 1) # therefore et has shape (batchsize, timesteps, 1) et = K.dot(activations.tanh(_Wxstm + self._uxpb), K.expand_dims(self.V_a)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.timesteps) at /= at_sum_repeated # vector of shape (batchsize, timesteps, 1) # reset gate: rt = activations.sigmoid( K.dot(atm1, self.W_r) + K.dot(stm1, self.U_r) + self.b_r) # update gate: zt = activations.sigmoid( K.dot(atm1, self.W_z) + K.dot(stm1, self.U_z) + self.b_z) # proposal hidden state: s_tp = activations.tanh( K.dot(atm1, self.W_p) + K.dot((rt * stm1), self.U_p) + self.b_p) yt = activations.softmax(at) if self.return_probabilities: return at, [zt, s_tp] else: return yt, [zt, s_tp]
def step(self, x, states): ytm, stm = states # repeat the hidden state to the length of the sequence _stm = K.repeat(stm, self.timesteps) # now multiplty the weight matrix with the repeated hidden state _Wxstm = K.dot(_stm, self.W_a) # calculate the attention probabilities # this relates how much other timesteps contributed to this one. et = K.dot(activations.tanh(_Wxstm + self._uxpb), K.expand_dims(self.V_a)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.timesteps) at /= at_sum_repeated # vector of size (batchsize, timesteps, 1) # calculate the context vector context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1) # ~~~> calculate new hidden state # first calculate the "r" gate: rt = activations.sigmoid( K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) + K.dot(context, self.C_r) + self.b_r) # now calculate the "z" gate zt = activations.sigmoid( K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) + K.dot(context, self.C_z) + self.b_z) # calculate the proposal hidden state: s_tp = activations.tanh( K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) + K.dot(context, self.C_p) + self.b_p) # new hidden state: st = (1 - zt) * stm + zt * s_tp yt = activations.softmax( K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) + K.dot(context, self.C_o) + self.b_o) if self.return_probabilities: return at, [yt, st] else: return yt, [yt, st]
def call(self, inputs, mask=None): x = inputs[0] #feature matrix context = inputs[1] # n_nodes, n_rel, dim # dot(V_carry, context) carry_gate = K.dot(x, self.W_carry) carry_context = context[:, :, :, None] * self.V_carry[None, :, :, :] if self.mean == 0: carry_context = K.max(carry_context, axis=(1, 2)) else: carry_context = K.sum(carry_context, axis=(1, 2)) / self.mean carry_gate += carry_context if self.bias: carry_gate += self.b_carry carry_gate = activations.sigmoid(carry_gate) # dot(V, context) context = context[:, :, :, None] * self.V[None, :, :, :] if self.mean == 0: context = K.max(context, axis=(1, 2)) else: context = K.sum(context, axis=(1, 2)) / self.mean h = K.dot(x, self.W) + context if self.bias: h += self.b h = self.activation(h) h = carry_gate * h + (1 - carry_gate) * x return h
def predict_txt(east_detect, img_path, txt_path, pixel_threshold, quiet=False): img = image.load_img(img_path) d_wight, d_height = resize_image(img, cfg.max_predict_img_size) scale_ratio_w = d_wight / img.width scale_ratio_h = d_height / img.height img = img.resize((d_wight, d_height), Image.NEAREST).convert('RGB') img = image.img_to_array(img) img = preprocess_input(img, mode='tf') x = np.expand_dims(img, axis=0) y = east_detect.predict(x) y = np.squeeze(y, axis=0) #activate the output layer y[:, :, :4] = softmax(y[:, :, :4]) y[:, :, 4:6] = sigmoid(y[:, :, 4:6]) # cond = np.greater_equal(y[:, :, 0], pixel_threshold) activation_pixels = np.where(cond) quad_scores, quad_after_nms = nms(y, activation_pixels) txt_items = [] for score, geo in zip(quad_scores, quad_after_nms): if np.amin(score) > 0: rescaled_geo = geo / [scale_ratio_w, scale_ratio_h] rescaled_geo_list = np.reshape(rescaled_geo, (8,)).tolist() txt_item = ','.join(map(str, rescaled_geo_list)) txt_items.append(txt_item + '\n') elif not quiet: print('quad invalid with vertex num less then 4.') if cfg.predict_write2txt and len(txt_items) > 0: with open(txt_path, 'w') as f_txt: f_txt.writelines(txt_items)
def get_disp(x): """ Slice the tensor to get a disparity map for the left and right image and further processed through a sigmoid function. :param x: Tensor to slice :return: Left and right disparity map """ disp = 0.3 * sigmoid(tf.slice(x, [0, 0, 0, 0], [-1, -1, -1, 2])) return disp
def call(self, inputs): img_matrix = self.img_embedding(inputs[:, 0]) img_bias = self.img_bias(inputs[:, 0]) hashtag_matrix = self.hashtag_embedding(inputs[:, 1]) hashtag_bias = self.hashtag_bias(inputs[:, 1]) raw_preds = Dot(axes=1)([img_matrix, hashtag_matrix]) preds_with_bias = Add()([raw_preds, img_bias, hashtag_bias]) preds = sigmoid(raw_preds) return preds
def _split_and_apply_activations(self, controller_output): """ This takes the controller output, splits it in ntm_output, read and wright adressing data. It returns a triple of ntm_output, controller_instructions_read, controller_instructions_write. ntm_output is a tensor, controller_instructions_read and controller_instructions_write are lists containing the adressing instruction (k, beta, g, shift, gamma) and in case of write also the writing constructions, consisting of an erase and an add vector. As it is necesseary for stable results, k and add_vector is activated via tanh, erase_vector via sigmoid (this is critical!), shift via softmax, gamma is sigmoided, inversed and clipped (probably not ideal) g is sigmoided, beta is linear (probably not ideal!) """ # splitting ntm_output, controller_instructions_read, controller_instructions_write = tf.split( controller_output, np.asarray([self.output_dim, self.read_heads * self.controller_read_head_emitting_dim, self.write_heads * self.controller_write_head_emitting_dim]), axis=1) controller_instructions_read = tf.split( controller_instructions_read, self.read_heads, axis=1) controller_instructions_write = tf.split( controller_instructions_write, self.write_heads, axis=1) controller_instructions_read = [ tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1]), axis=1) for single_head_data in controller_instructions_read] controller_instructions_write = [ tf.split(single_head_data, np.asarray([self.m_depth, 1, 1, 3, 1, self.m_depth, self.m_depth]), axis=1) for single_head_data in controller_instructions_write] # activation ntm_output = self.activation(ntm_output) controller_instructions_read = [(tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma)) for (k, beta, g, shift, gamma) in controller_instructions_read] controller_instructions_write = [ (tanh(k), hard_sigmoid(beta)+0.5, sigmoid(g), softmax(shift), 1 + 9*sigmoid(gamma), hard_sigmoid(erase_vector), tanh(add_vector)) for (k, beta, g, shift, gamma, erase_vector, add_vector) in controller_instructions_write] return (ntm_output, controller_instructions_read, controller_instructions_write)
def make_func(self,layer_name, model=None): if not model: model = self.load_ufcn() from keras import backend as K if layer_name == 'convolution2d_5': from keras.activations import sigmoid func = K.function([model.input],[sigmoid(model.get_layer(layer_name).output)]) else: func = K.function([model.input], [model.get_layer(layer_name).output]) return func
def FM_modeling(self): res1, _ = self.FM_1st_order() res2, _ = self.FM_2nd_order() y = Add()([res1, res2]) y = Lambda(lambda x: activations.sigmoid(x))(y) model_FM = Model(inputs=self.inputs, outputs=y) model_FM.summary() return model_FM
def convert_output(self, output): print(output.shape, self.Sx, self.Sy, 4 * self.B) y_pred_bbox = sigmoid( K.reshape(output[:, :self.Sx * self.Sy * 4 * self.B], (-1, self.Sy, self.Sx, self.B, 4))) y_pred_confidence = sigmoid( K.reshape( output[:, self.Sx * self.Sy * 4 * self.B:self.Sx * self.Sy * 5 * self.B], (-1, self.Sy, self.Sx, self.B))) y_pred_class = softmax(K.reshape( output[:, self.Sx * self.Sy * 5 * self.B:], (-1, self.Sy, self.Sx, self.C)), axis=3) #x = K.eval(y_pred_class[0,0,0,:]) #print(x) #print(np.sum(x)) return K.eval(y_pred_bbox), K.eval(y_pred_confidence), K.eval( y_pred_class)
def testSigmoid(): # testing sigmoid implementation sigmoid = np.vectorize(referenceSigmoid) x = K.placeholder(ndim=2) f = K.function([x], [activations.sigmoid(x)]) testValues = getStdValues() result = f([testValues])[0] expectedResult = sigmoid(testValues) assert_allclose(result, expectedResult, rtol=1e-05)
def call(self, x): output = K.dot(x, self.kernel) if self.use_bias: output = K.bias_add(output, self.bias, data_format="channels_last") if self.activation == 'relu': output = activations.relu(output) elif self.activation == 'sigmoid': output = activations.sigmoid(output) return output
def call(self, inputs): P, C1, C2, C3, C4 = inputs WP = K.dot(P, self.GateWeight_P) # (b, 2h) -> (b, 4h) WC1 = K.dot(C1, self.GateWeight_C) # (b, 4h) -> (b, 4h) WC2 = K.dot(C2, self.GateWeight_C) WC3 = K.dot(C3, self.GateWeight_C) WC4 = K.dot(C4, self.GateWeight_C) g1 = sigmoid(WP + WC1 + self.GateBias) #(b, 4h) #これでちゃんとバッチ数分バイス足せてる g2 = sigmoid(WP + WC2 + self.GateBias) g3 = sigmoid(WP + WC3 + self.GateBias) g4 = sigmoid(WP + WC4 + self.GateBias) C_dash1 = C1 * g1 #(b, 4h) C_dash2 = C2 * g2 C_dash3 = C3 * g3 C_dash4 = C4 * g4 WP_out = K.dot(P, self.OutWeight) # (b, 2h) -> (b, 4h) C1WP = K.batch_dot(C_dash1, WP_out, axes=[1, 1]) # (b, 1) C2WP = K.batch_dot(C_dash2, WP_out, axes=[1, 1]) C3WP = K.batch_dot(C_dash3, WP_out, axes=[1, 1]) C4WP = K.batch_dot(C_dash4, WP_out, axes=[1, 1]) bC1 = K.dot(C_dash1, self.OutBias) # (b, 1) bC2 = K.dot(C_dash2, self.OutBias) bC3 = K.dot(C_dash3, self.OutBias) bC4 = K.dot(C_dash4, self.OutBias) out1 = C1WP + bC1 #(b,1) out2 = C2WP + bC2 out3 = C3WP + bC3 out4 = C4WP + bC4 output = K.concatenate([out1, out2, out3, out4], axis=1) #(b,4) return output
def call(self, x): y = K.dot(x, self.W_carry) if self.bias: y += self.b_carry transform_weight = activations.sigmoid(y) y = K.dot(x, self.W) if self.bias: y += self.b act = self.activation(y) act *= transform_weight output = act + (1 - transform_weight) * x return output
def call(self, x): y = K.dot(x, self.W_carry) if self.bias: y += self.b_carry transform_weight = activations.sigmoid(y) y = K.dot(x, self.W) if self.bias: y += self.b act = self.activation(y) act *= transform_weight output = act + (1 - transform_weight) * x return output
def train_step(self, last): with tf.GradientTape() as tape: s = tf.random.uniform([self.batch_size, self.vector_size], minval=-10, maxval=10) z = self.Z(s) Goz = self.G(z, training=False) I = sigmoid(summarize(Goz)) loss = self.loss(I, s, Goz) gradient = tape.gradient(loss, self.Z.trainable_variables) self.Z.optimizer.apply_gradients(gradient, self.Z.trainable_variables)
def call(self, inputs, **kwargs): data_input, data_logits = inputs batch_size = shape(data_input)[0] data_gradient = gradients(data_logits, data_input)[0] gradient_norm = norm(reshape(data_gradient, [batch_size, -1]), axis=1, keep_dims=True) data_label = sigmoid(data_logits) return [ data_label, square(gradient_norm) * square(data_label - self.discrimination_label) ]
def step(self, x, states): ytm, stm = states _stm = K.repeat(stm, self.timesteps) _Wxstm = K.dot(_stm, self.W_a) et = K.dot(activations.tanh(_Wxstm + self._uxpb), K.expand_dims(self.V_a)) at = K.exp(et) at_sum = K.sum(at, axis=1) at_sum_repeated = K.repeat(at_sum, self.timesteps) at /= at_sum_repeated context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1) rt = activations.sigmoid( K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) + K.dot(context, self.C_r) + self.b_r) zt = activations.sigmoid( K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) + K.dot(context, self.C_z) + self.b_z) s_tp = activations.tanh( K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) + K.dot(context, self.C_p) + self.b_p) st = (1 - zt) * stm + zt * s_tp yt = activations.softmax( K.dot(ytm, self.W_o) + K.dot(stm, self.U_o) + K.dot(context, self.C_o) + self.b_o) if self.return_probabilities: return at, [yt, st] else: return yt, [yt, st]
def call(self, inputs, **kwargs): A = inputs[0] X = inputs[1] # print("A:", A.shape) dims = int(A.shape[1]) if self.learn_pqr: p = activations.sigmoid(self.p) q = activations.sigmoid(self.q) # p = self.p # q = self.q I = K.eye(dims) Dr = K.sum(A, axis=1) # print(Dr.shape) Dr_diag_sum = K.sum(K.tf.matrix_diag(Dr), axis=0) I = K.tf.where(K.equal(Dr_diag_sum, K.tf.zeros_like(Dr_diag_sum)), K.tf.zeros_like(Dr_diag_sum), I) k_vec = p * K.ones_like(Dr) + (1 - p) * Dr k_inv_root = K.pow(K.sqrt(k_vec), -.5) mask = K.tf.is_inf(k_inv_root) k_clean = K.tf.where(mask, K.tf.zeros_like(k_inv_root), k_inv_root) D = K.tf.matrix_diag(k_clean) A_ = K.batch_dot(K.batch_dot(D, (A + q * I)), D) else: A_ = A W = self.kernel XW = K.dot(X, W) A_XW = K.batch_dot(A_, XW) out = A_XW + self.bias K.tf.verify_tensor_all_finite(out, "out contains infs") return self.activation(out)
def step(self, x, states): y_prev, s_prev = states s_all = K.repeat(s_prev, self.timesteps) Wa_s_all = K.dot(s_all, self.W_a) et = K.dot(activations.tanh(Wa_s_all + self.uh), K.expand_dims(self.V_a)) #et_sum = K.sum(K.exp(et), axis=1) #et_sum_repeated = K.repeat(et_sum, self.timesteps) #a_current = et_sum / et_sum_repeated #shape batch_size, timestep, 1 a_current = activations.softmax(et) context = K.squeeze(K.batch_dot(a_current, self.x_seq, axes=1), axis=1) #calculate reset gate r_current = activations.sigmoid( K.dot(y_prev, self.W_r) + K.dot(s_prev, self.U_r) + K.dot(context, self.C_r) + self.b_r) #calculate update gate z_current = activations.sigmoid( K.dot(y_prev, self.W_z) + K.dot(s_prev, self.U_z) + K.dot(context, self.C_z) + self.b_z) #calculate s tilde s_tilde = activations.tanh( K.dot(y_prev, self.W_c) + K.dot((r_current * s_prev), self.U_c) + K.dot(context, self.C_c) + self.b_c) s_current = (1 - z_current) * s_prev + z_current * s_tilde #calculate output y_current = activations.sigmoid( K.dot(y_prev, self.W_o) + K.dot(s_current, self.U_o) + K.dot(context, self.C_o) + self.b_o) if self.return_attention_weights: return a_current, [y_current, s_current] else: return y_current, [y_current, s_current]
def test_sigmoid(self): def ref_sigmoid(x): if x >= 0: return 1 / (1 + np.exp(-x)) else: z = np.exp(x) return z / (1 + z) sigmoid = np.vectorize(ref_sigmoid) x = backend.placeholder(ndim=2) f = backend.function([x], [activations.sigmoid(x)]) test_values = np.random.random((2, 5)) result = f([test_values])[0] expected = sigmoid(test_values) self.assertAllClose(result, expected, rtol=1e-05)
def calc_reduced_value(self, values): # Вычисляем новое значение для операции REDUCE, полученное из двух последних векторов из стека h = K.concatenate([ values['stack_current'][:, self.hidden_dim:], values['stack_prev'][:, self.hidden_dim:] ], axis=1) q = K.dot(h, self.W_R) + self.b_R q1 = sigmoid(q[:, :4 * self.hidden_dim]) q2 = tanh(q[:, 4 * self.hidden_dim:]) c = q1[:, self.hidden_dim:2*self.hidden_dim]*values['stack_current'][:,:self.hidden_dim] + \ q1[:, 2*self.hidden_dim:3*self.hidden_dim]*values['stack_prev'][:,:self.hidden_dim] + \ q1[:, :self.hidden_dim]*q2 h = q1[:, 3 * self.hidden_dim:] * c reduced = K.concatenate([c, h], axis=1) return reduced
def call(self, inputs, mask=None): x = inputs[0] #feature matrix rel = inputs[1] # n_nodes, n_rel, n_neigh rel_mask = inputs[2] mask_mul = rel_mask[:, 0] mask_div = rel_mask[:, 1] n_nodes, n_rel, n_neigh = rel.shape # number of nodes, number of relation types, number of neighbors for each type of relations dim = x.shape[-1] # compute the context for each type of relations in each node: # context = sum(all neighbors with the same relation to the node) context = x[rel.flatten()].reshape([n_nodes, n_rel, n_neigh, dim]) context = context * mask_mul[:, :, :, None] context = K.sum(context, axis=-2) / K.sum(mask_div, axis=-1)[:, :, None] # -> now, context: n_nodes, n_rel, dim # dot(V_carry, context) carry_gate = K.dot(x, self.W_carry) carry_context = context[:, :, :, None] * self.V_carry[None, :, :, :] if self.mean == 0: carry_context = K.max(carry_context, axis=(1, 2)) else: carry_context = K.sum(carry_context, axis=(1, 2)) / self.mean carry_gate += carry_context if self.bias: carry_gate += self.b_carry carry_gate = activations.sigmoid(carry_gate) # dot(V, context) context = context[:, :, :, None] * self.V[None, :, :, :] if self.mean == 0: context = K.max(context, axis=(1, 2)) else: context = K.sum(context, axis=(1, 2)) / self.mean h = K.dot(x, self.W) + context if self.bias: h += self.b h = self.activation(h) h = carry_gate * h + (1 - carry_gate) * x return h
def test_sigmoid(): """Test using a numerically stable reference sigmoid implementation. """ def ref_sigmoid(x): if x >= 0: return 1 / (1 + np.exp(-x)) else: z = np.exp(x) return z / (1 + z) sigmoid = np.vectorize(ref_sigmoid) x = K.placeholder(ndim=2) f = K.function([x], [activations.sigmoid(x)]) test_values = get_standard_values() result = f([test_values])[0] expected = sigmoid(test_values) assert_allclose(result, expected, rtol=1e-05)
def call(self, inputs, mask=None): x = inputs[0] #feature matrix rel = inputs[1] # n_nodes, n_rel, n_neigh # dot(V_carry, context) carry_gate = K.dot(x, self.W_carry) carry_context = K.dot(rel, self.V_carry) carry_gate += carry_context if self.bias: carry_gate += self.b_carry carry_gate = activations.sigmoid(carry_gate) # dot(V, context) context = K.dot(rel, self.V) h = K.dot(x, self.W) + context if self.bias: h += self.b h = self.activation(h) h = carry_gate * h + (1 - carry_gate) * x return h
x_train = np.array(x_train).reshape(se_data_len - divided_point, len(refs[0]) - 1, p) y_train = np.array(y_train).reshape(se_data_len - divided_point, len(refs[0]) - 1, 1) x_test = np.array(x_test).reshape(se_data_len - divided_point, len(refs[0]), p) y_test = np.array(y_test).reshape(se_data_len - divided_point, len(refs[0]), 1) #Build the model, and train (fit) it model = Sequential() model.add( SimpleRNN(no_neural, return_sequences=True, input_shape=(None, p), activation=lambda x: sigmoid(x) - 0.5)) model.add(Dense(1, activation=lambda x: sigmoid(x) - 0.5)) model.compile(loss='mean_squared_error', optimizer='RMSprop', metrics=['mae', 'mse']) model.summary() record = model.fit(x_train, y_train, epochs=no_epoch, batch_size=batch_size, callbacks=[ utl.EarlyStoppingByMSE(monitor='mean_squared_error', value=minloss) ]) eva = model.evaluate(x_train, y_train, batch_size=batch_size)
def call(self, x): h = tf.math.reduce_mean(x, axis=[1, 2]) h = self.fc1(self.activation(self.fc0(h))) h = sigmoid(h)[:, None, None] return h
netnp = np.linspace(-5.0, 5.0, 1000, dtype='float32') # convert to a TensorFlow tensor nettf = tf.convert_to_tensor(netnp) # linear activation function acttf = kact.linear(nettf) # need to convert from TensorFlow tensors to numpy arrays before plotting # eval() is called because TensorFlow tensors have no values until they are "run" plt_act(nettf.eval(), acttf.eval(), 'linear activation function') # relu activation function acttf = kact.relu(nettf) plt_act(nettf.eval(), acttf.eval(), 'rectified linear (relu)') # sigmoid activation function acttf = kact.sigmoid(nettf) plt_act(nettf.eval(), acttf.eval(), 'sigmoid') # hard sigmoid activation function acttf = kact.hard_sigmoid(nettf) plt_act(nettf.eval(), acttf.eval(), 'hard sigmoid') # tanh activation function acttf = kact.tanh(nettf) plt_act(nettf.eval(), acttf.eval(), 'tanh') # softsign activation function acttf = kact.softsign(nettf) plt_act(nettf.eval(), acttf.eval(), 'softsign') # close the TensorFlow session
def comput_Ra(x): d_output1,d_output2 = x real_loss = (d_output1 - K.mean(d_output2)) fake_loss = (d_output2 - K.mean(d_output1)) return sigmoid(0.5 * np.add(real_loss, fake_loss))
def step(self, x, states): if self.is_monotonic: ytm, stm, timestep, previous_attention = states else: ytm, stm, timestep = states ytm = self.embedding_sublayer(K.cast(ytm, 'int32')) if self.recurrent_dropout is not None and 0. < self.recurrent_dropout < 1.: stm = K.in_train_phase(K.dropout(stm, self.recurrent_dropout), stm) ytm = K.in_train_phase(K.dropout(ytm, self.recurrent_dropout), ytm) et = self._compute_energy(stm) if self.is_monotonic: at = self._compute_probabilities(et, previous_attention) else: at = self._compute_probabilities(et) # calculate the context vector context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1) # ~~~> calculate new hidden state # first calculate the "r" gate: rt = activations.sigmoid( K.dot(ytm, self.W_r) + K.dot(stm, self.U_r) + K.dot(context, self.C_r) + self.b_r) # now calculate the "z" gate zt = activations.sigmoid( K.dot(ytm, self.W_z) + K.dot(stm, self.U_z) + K.dot(context, self.C_z) + self.b_z) # calculate the proposal hidden state: s_tp = activations.tanh( K.dot(ytm, self.W_p) + K.dot((rt * stm), self.U_p) + K.dot(context, self.C_p) + self.b_p) # new hidden state: st = (1 - zt) * stm + zt * s_tp yt = activations.softmax( K.dot(ytm, self.W_o) + K.dot(st, self.U_o) + K.dot(context, self.C_o) + self.b_o) if self.use_teacher_forcing: ys = K.in_train_phase(self.y_true[:, timestep[0]], K.argmax(yt, axis=-1)) ys = K.flatten(ys) else: ys = K.flatten(K.argmax(yt, axis=-1)) if self.return_probabilities: output = at else: output = yt next_states = [ys, st, timestep + 1] if self.is_monotonic: next_states.append(at) return output, next_states
def sce_criterion(logit, label): return k.mean(sigmoid(binary_crossentropy(label, logit)))