def call(self, ensemble_logits, logits): ''' ensemble_logits are the outputs from our ensemble (batch x ensembles x classes) logits are the predicted outputs from our model (batch x classes) ''' if self.temp is None: self.temp = self.init_temp # Convert values to appropiate type logits = tf.cast(logits, dtype=tf.float64) ensemble_logits = tf.cast(ensemble_logits, dtype=tf.float64) # Calculate probabilities by softmax over classes, adjusted for temperature ensemble_probs = softmax(ensemble_logits / self.temp, axis=2) PN_probs = softmax(logits / self.temp, axis=1) # Calculate mean teacher prediction ensemble_probs_mean = reduce_sum(ensemble_probs, axis=1) # Calculate cost (entropy) cost = reduce_mean(-ensemble_probs_mean * log(PN_probs)**(self.temp**2)) return cost
def call(self, y_true, y_pred): """ Evaluate the dice loss, using the binary formulation if y_pred.shape[-1]==1 and n-class formulation otherwise. Args: y_true: Ground truth, BHW1 y_pred: Prediction, BHWC """ batchsize = y_true.shape[0] if y_pred.shape[-1] == 1: psum = tf.sigmoid(y_pred) tsum = y_true else: psum = nn.softmax(y_pred) tsum = tf.one_hot(y_true, y_pred.shape[-1]) tsum = tf.transpose(tsum[..., 0, :], perm=[0, 3, 1, 2]) tsum = tf.reshape(tsum, (batchsize, -1)) psum = tf.reshape(psum, (batchsize, -1)) intersection = psum * tsum sums = psum + tsum intersection = tf.reduce_sum(intersection, 1) + self.smooth sums = tf.reduce_sum(sums, 1) + self.smooth score = 2.0 * intersection / sums return 1.0 - tf.reduce_mean(score)
def eval_rln_ngh(self, adj_mat, combined_ngh): # This is the same mechanism used for choosing best ngh as (SGTV, 2019) # evaluate importance of relations to form the hybrid neighborhood(social(temporal) + static(spatial)) # prob_mat = nn.Sigmoid(adj_mat) prob_mat = nn.softmax(adj_mat) return prob_mat
def eval_rln_ngh(self, adj_mat, combined_ngh): # evaluate importance of relations to form the hybrid neighborhood(social(temporal) + static(spatial)) # prob_mat = nn.Sigmoid(adj_mat) prob_mat = nn.softmax(adj_mat) return prob_mat
def call(self, x, **kwargs): b, hh, ww, c, u, h = *x.get_shape().as_list(), self.u, self.heads q = self.to_q(x) k = self.to_k(x) v = self.to_v(x) q = self.norm_q(q) v = self.norm_v(v) q = Rearrange('b hh ww (h k) -> b h k (hh ww)', h=h)(q) k = Rearrange('b hh ww (u k) -> b u k (hh ww)', u=u)(k) v = Rearrange('b hh ww (u v) -> b u v (hh ww)', u=u)(v) k = nn.softmax(k) Lc = einsum('b u k m, b u v m -> b k v', k, v) Yc = einsum('b h k n, b k v -> b n h v', q, Lc) if self.local_contexts: v = Rearrange('b u v (hh ww) -> b v hh ww u', hh=hh, ww=ww)(v) Lp = self.pos_conv(v) Lp = Rearrange('b v h w k -> b v k (h w)')(Lp) Yp = einsum('b h k n, b v k n -> b n h v', q, Lp) else: rel_pos_emb = tf.gather_nd(self.rel_pos_emb, self.rel_pos) Lp = einsum('n m k u, b u v m -> b n k v', rel_pos_emb, v) Yp = einsum('b h k n, b n k v -> b n h v', q, Lp) Y = Yc + Yp out = Rearrange('b (hh ww) h v -> b hh ww (h v)', hh = hh, ww = ww)(Y) return out
def measure_sequences_lstm(model, n_generate, prompt, skews, orig): for i in trange(prompt.shape[1] - 1): model.predict(prompt[:, i]) # The last item of the prompt is saved so that it can be used to # generate the first prediction. preds = np.expand_dims(prompt[:, -1], 0) log_prob_sums = np.zeros(len(skews)) for _ in trange(n_generate): logits = model.predict(preds[-1])[:, -1, :] Ps = softmax(logits).numpy() basePs = np.array(Ps) if orig is None: # Sample indexes for i in range(Ps.shape[0]): Ps[i] = skew_distribution(Ps[i], skews[i]) ixs = np.array([np.random.choice(len(P), p = P) for P in Ps]) assert not np.array_equal(Ps, basePs) else: # Take indexes from orig ixs = orig[:, 0] orig = orig[:, 1:] # For measurement, we use the log probs PRIOR to sampling. log_probs = [np.log(basePs[i, ix]) for i, ix in enumerate(ixs)] log_prob_sums += log_probs preds = np.vstack((preds, ixs)) return log_prob_sums
def train_layers(self, train_x, train_y, test_x, test_y): X = tf.placeholder(tf.float32, shape=[ None, self.opt['bands'], self.opt['frames'], self.opt['num_channels'] ]) Y = tf.placeholder(tf.float32, shape=[None, self.opt['n_classes']]) conv_layer = self.apply_convolution(train_x, self.opt['k_size'], self.opt['num_channels'], self.opt['depth']) shape = conv_layer.get_shape().as_list() conv_flat = tf.reshape(conv_layer, [-1, shape[1] * shape[2], shape[3]]) f_weights = self.weight_variable( shape[1] * shape[2] * self.opt['depth'], self.opt['num_hidden']) f_biases = self.bias_variable([self.opt['num_hidden']]) f = nn.sigmoid(tf.add(tf.matmul(conv_flat, f_weights), f_biases)) out_weights = self.weight_variable( [self.opt['num_hidden'], self.opt['n_classes']]) out_biases = bias_variable([self.opt['n_classes']]) out = nn.softmax(tf.matmul(f, out_weights) + out_biases) cost = tf.reduce_mean( -tf.reduce_sum(Y * tf.log(out), reduction_indices=[1])) #cross_entropy = -tf.reduce_sum(Y * tf.log(out)) optimizer = tf.train.AdamOptimizer( learning_rate=self.opt['learning_rate']).minimize(cost) correct_pred = tf.equal(tf.argmax(out, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) cost_history = np.empty(shape=[1], dtype=float) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(training_epochs): offset = (i * self.opt['batch_size']) % (train_y.shape[0] - batch_size) batch_x = train_x[offset:(offset + batch_size), :, :, :] batch_y = train_y[offset:(offset + batch_size), :] _, loss = sess.run([optimizer, cost], feed_dict={ X: batch_x, Y: batch_y }) cost_history = np.append(cost_history, loss) print( 'Test accuracy: ', round(sess.run(accuracy, feed_dict={ X: test_x, Y: test_y }), 3)) fig = plt.figure(figsize=(15, 10)) plt.plot(cost_history) plt.axis([0, training_epochs, o, np.max(cost_history)]) plt.show()
def _attention(self, lstm_outputs): with tf.variable_scope('Attention', initializer=self.init): weights = tf.get_variable('weights', [self.lstm_units, self.output_units], regularizer=self.regularizer) biases = tf.get_variable('biases', [1, self.output_units]) u_w = tf.get_variable('u_w', [self.output_units, 1]) outputs, scores = [], [] for v in lstm_outputs: hidden_rep = nn.tanh(tf.add(tf.matmul(v, weights), biases)) scores.append(tf.matmul(hidden_rep, u_w)) # list -> tensor scores = tf.concat(scores, axis=1) # softmax scores = nn.softmax(scores, dim=-1) # tensor -> list scores = tf.unstack(scores, axis=1) for i, v in enumerate(scores): # v: (64,) -> (64,1) v = tf.reshape(v, [-1, 1]) # v: (64,1) -> [(64,1), (64,1), ...] v = [v] * self.lstm_units # v: (64,self.lstm_units) v = tf.concat(v, axis=1) outputs.append(tf.multiply(v, lstm_outputs[i])) return tf.add_n(outputs)
def attention(query, key, value): score = matmul(query, key, transpose_b=True) dim_key = cast(shape(key)[-1], float32) scaled_score = score / math.sqrt(dim_key) weights = softmax(scaled_score, axis=-1) output = matmul(weights, value) return output
def sample_logits(logits, skews): eps = np.finfo('float').eps Ps = softmax(logits).numpy() for i in range(Ps.shape[0]): Ps[i] = skew_distribution(Ps[i], skews[i]) ixs = np.array([np.random.choice(len(P), p = P) for P in Ps]) return ixs, [np.log(Ps[i, ix]) for i, ix in enumerate(ixs)]
def detectionWorker(self, image): tf_image = img_to_array(image) tf_image = expand_dims(tf_image, 0) predictions = self.model.predict(tf_image) score = softmax(predictions[0]) detection_score = np.max(score) detection_label = self.labels[np.argmax(score)] return {int(detection_score * 10000): detection_label}
def softmax(inputs, axis=-1): """ Softmax activation Parameters ---------- inputs: Input tensor axis: Axis along which the softmax normalization is applied """ return nn.softmax(inputs, axis=axis)
def call(self, features, hidden): hidden_with_time_axis = expand_dims(hidden, 1) score = nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis)) attention_weights = nn.softmax(self.V(score), axis=1) context_vector = attention_weights * features context_vector = reduce_sum(context_vector, axis=1) return context_vector, attention_weights
def __init__(self, data, mode, lstm_units, hidden_units, output_units, init, beta=0.0001, keep_prob=0.7): self.data = data self.mode = mode self.lstm_units = lstm_units # a list lstm-attention-fc-output self.hidden_units = hidden_units self.output_units = output_units self.init = init self.beta = beta self.keep_prob = keep_prob self.regularizer = layers.l2_regularizer(scale=self.beta) with tf.name_scope('RNN'): lstm_outputs = self._rnn(data.input) with tf.name_scope('Attention'): attention_output = self._attention(lstm_outputs) pred = self._full_connected(attention_output) self.pred = nn.softmax(pred) self.pred_label = tf.argmax(pred, 1) if self.mode != 'test': with tf.name_scope('Label'): label = tf.identity(data.label, 'label') with tf.name_scope('Loss'): normal_loss = tf.reduce_mean( nn.softmax_cross_entropy_with_logits(logits=pred, labels=label)) # it's a list reg_losses = tf.get_collection(GraphKeys.REGULARIZATION_LOSSES) loss = tf.add(normal_loss, tf.add_n(reg_losses), name='loss') self.loss = loss tf.summary.scalar('loss', self.loss) with tf.name_scope('Acc'): correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(label, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name='acc') self.accuracy = accuracy tf.summary.scalar('accuracy', self.accuracy)
def call(self, q, val): #make q i.e. the hidden value into the same shape q_with_time_axis = tf.expand_dims(q, 1) score = self.V(nn.tanh(self.W1(q_with_time_axis) + self.W2(val))) attention_w = nn.softmax(score, axis=1) context_vec = attention_w * val context_vec = tf.reduce_sum(context_vec, axis=1) return context_vec, attention_w
def predict_sign(sign_path, model): signnames = df_signnames['SignName'] img = load_img(sign_path, target_size=(30, 30)) # Image to Array img_array = img_to_array(img) img_array = expand_dims(img_array, 0) # Create a batch # Predict the imag pred = model.predict(img_array) score = nn.softmax(pred[0]) prediction = "It's Probably the '{}' sign!".format( signnames[np.argmax(score)]) return prediction
def forward_pass(self, input_data): input_data = input_data / 255 pre_inception = InceptionNet._pre_inception_layer( input_data, self.pre_inception_filters) inception_1 = InceptionNet._inception_layer(pre_inception, self.inception_1_filters) inception_2 = InceptionNet._inception_layer(inception_1, self.inception_2_filters) # print(inception_2) post_inception_pool = InceptionNet._max_pool(inception_2, (3, 3), (2, 2)) inception_3 = InceptionNet._inception_layer(post_inception_pool, self.inception_3_filters) inception_4 = InceptionNet._inception_layer(inception_3, self.inception_4_filters) inception_5 = InceptionNet._inception_layer(inception_4, self.inception_5_filters) inception_6 = InceptionNet._inception_layer(inception_5, self.inception_6_filters) inception_7 = InceptionNet._inception_layer(inception_6, self.inception_7_filters) post_inception_pool_2 = InceptionNet._max_pool(inception_7, (3, 3), (2, 2)) inception_8 = InceptionNet._inception_layer(post_inception_pool_2, self.inception_8_filters) inception_9 = InceptionNet._inception_layer(inception_8, self.inception_9_filters) post_inception_pool_3 = nn.avg_pool(inception_9, (7, 7), strides=4, padding="SAME") flatten_layer = tf.reshape( tf.keras.backend.flatten(post_inception_pool_3), [1024, input_data.shape[0]]) relu_layer = nn.relu( tf.matmul(tf.transpose(self.relu_weights), flatten_layer) + self.relu_bias) dropout_layer = nn.dropout(relu_layer, .4) linear_layer = tf.matmul(tf.transpose(self.linear_weights), dropout_layer) + self.linear_bias return nn.softmax(tf.transpose(nn.tanh(linear_layer)))
def predict(self, image): ''' Classifies the `image` as one of several flower types. ''' img_array = img_to_array(image) # convert PIL Image to numpy array img_array = expand_dims( img_array, 0) # create a single batch containing the numpy array logits = self.cnn.predict(img_array) # make the prediction probs = softmax(logits[0]) # convert logits to class probabilities predicted_class = self.CLASS_NAMES[np.argmax( probs)] # get the predicted class confidence = float(100 * np.max(probs)) # get the confidence return predicted_class, confidence # return the results
def build_net(sess): in_len = 32 in_dep = 1 x_hold = tf.placeholder(tf.float32,shape=[None,in_dep*in_len*in_len]) y_hold = tf.placeholder(tf.float32,shape=[None,2]) keep_prob = tf.placeholder(tf.float32) xt = tf.reshape(x_hold,[-1,in_len,in_len,in_dep]) #Layer 1 - 5x5 convolution w1 = tfac.weight([5,5,in_dep,4]) b1 = tfac.bias([4]) c1 = nn.relu(nn.conv2d(xt,w1,strides=[1,2,2,1],padding='VALID')+b1) o1 = c1 #Layer 2 - 3x3 convolution w2 = tfac.weight([3,3,4,16]) b2 = tfac.bias([16]) c2 = nn.relu(nn.conv2d(o1,w2,strides=[1,2,2,1],padding='VALID')+b2) o2 = c2 #Layer 3 - 3x3 convolution w3 = tfac.weight([3,3,16,32]) b3 = tfac.bias([32]) c3 = nn.relu(nn.conv2d(o2,w3,strides=[1,1,1,1],padding='VALID')+b3) o3 = c3 dim = 32 * 4*4 #Fully connected layer - 600 units of = tf.reshape(o3,[-1,dim]) w4 = tfac.weight([dim,600]) b4 = tfac.bias([600]) o4 = nn.relu(tf.matmul(of,w4)+b4) o4 = nn.dropout(o4, keep_prob) #Output softmax layer - 2 units w5 = tfac.weight([600,2]) b5 = tfac.bias([2]) y = nn.softmax(tf.matmul(o4,w5)+b5) sess.run(tf.initialize_all_variables()) return y,x_hold,y_hold,keep_prob
def __call__(self, v, u): """ Input: - v: N x D x H x W - u: N x D Returns: - next_u: N x D """ N, K = v.shape(0), self.hidden_dim D, H, W = v.shape(1), v.shape(2), v.shape(3) v_proj = self.Wv(v) # N x K x H x W u_proj = self.Wu(u) # N x K u_proj_expand = u_proj.reshape(N, K, 1, 1).expand(N, K, H, W) h = nn.tanh(v_proj + u_proj_expand) p = nn.softmax(self.Wp(h).reshape(N, H * W)).reshape(N, 1, H, W) self.attention_maps = tf.identity(p.data) v_tilde = (p.expand_as(v) * v).sum(2).sum(3).reshape(N, D) next_u = u + v_tilde return next_u
def call(self, x, hidden, enc_output): # Attention hidden_with_time_axis = tf.expand_dims(hidden, 1) score = self.Verdict( tanh(self.W1(enc_output) + self.W2(hidden_with_time_axis))) attention_weights = softmax(score, axis=1) context_vector = attention_weights * enc_output context_vector = tf.reduce_sum(context_vector, axis=1) # forward pass x = self.embedding(x) x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1) output, state = self.gru(x) output = tf.reshape(output, (-1, output.shape[2])) x = self.final_output(output) return x, state, attention_weights
def call(self, ensemble_logits, logits): ''' ensemble_logits are the outputs from our ensemble (batch x ensembles x classes) logits are the predicted outputs from our model (batch x classes) ''' logits = tf.cast(logits, dtype=tf.float64) ensemble_logits = tf.cast(ensemble_logits, dtype=tf.float64) alphas = exp(logits / self.temp) precision = reduce_sum(alphas, axis=1) #sum over classes ensemble_probs = softmax(ensemble_logits / self.temp, axis=2) #softmax over classes # Smooth for num. stability: probs_mean = 1 / (tf.shape(ensemble_probs)[2] ) #divide by nr of classes # Subtract mean, scale down, add mean back) ensemble_probs = self.tp_scaling * (ensemble_probs - probs_mean) + probs_mean log_ensemble_probs_geo_mean = reduce_mean(log(ensemble_probs + self.smooth_val), axis=1) #mean over ensembles target_independent_term = reduce_sum( lgamma(alphas + self.smooth_val), axis=1) - lgamma( precision + self.smooth_val ) #sum over lgammma of classes - lgamma(precision) target_dependent_term = -reduce_sum( (alphas - 1.) * log_ensemble_probs_geo_mean, axis=1) # -sum over classes cost = target_dependent_term + target_independent_term # tf.print(self.temp) return reduce_mean(cost) * (self.temp**2) #mean of all batches
def __init__(self, learning_rate=0.01, scope='policy_estimator'): with tf.variable_scope(scope): self.state = tf.placeholder(tf.int32, [], 'state') self.action = tf.placeholder(dtype=tf.int32, name='action') self.target = tf.placeholder(dtype=tf.float32, name='target') # Table look up estimator state_one_hot = tf.one_hot(self.state, int(OBSERVATION_SPACE)) self.output_layer = layers.fully_connected( inputs=tf.expand_dims(state_one_hot, 0), num_outputs=ACTION_SPACE, activation_fn=None, weights_initializer=tf.zeros_initializer ) self.action_probs = tf.squeeze(nn.softmax(self.output_layer)) self.picked_action_probs = tf.gather(self.action_probs, self.action) # Loss and train op self.loss = -tf.log(self.picked_action_probs) * self.target self.optimizer = train.AdamOptimizer(learning_rate=learning_rate) self.train_op = self.optimizer.minimize(self.loss, global_step=train.get_global_step())
def get_action(self, inputs): policy = nn.softmax(self.get_policy(inputs)) action = policy.numpy().argmax() return action
def testing(self, param_list, attributes, dataset_loc, saved_model_path, k = 10, only_k = False, given_image = None, actual_class = None): # make a new tf graph loaded_graph = tf.Graph() if given_image is None: # have to test in the batch mode with tf.Session(graph = loaded_graph) as sess: # load the variables imported_graph = tf.train.import_meta_graph(saved_model_path + '.meta') imported_graph.restore(sess, saved_model_path) # for t in tf.get_default_graph().get_operations(): # print(t) # check if the saved variables were correctly loaded # for var in tf.get_collection("variables"): # print('Imported variable: ', var) # get the imported variables to run a session on imported_input = loaded_graph.get_tensor_by_name('input_data:0') imported_labels = loaded_graph.get_tensor_by_name('labels:0') imported_logits = loaded_graph.get_tensor_by_name('logits:0') imported_accuracy = loaded_graph.get_tensor_by_name('accuracy:0') if not only_k: # have to print test accuracy of entire test set as well as do random k predictions and show batch_test_accuracy = 0 image_batch_size = param_list['batch_size'] batch_passes = 0 # preprocess test_batch using the batch_size given in param_list for (image_batch, label_batch) in cifar10_utils.load_resized_and_preprocessed_train_or_test_batch(None, image_batch_size, 'test'): # get the accuracy from model batch_test_accuracy += sess.run(imported_accuracy, feed_dict = {imported_input:image_batch, imported_labels: label_batch}) batch_passes += 1 # after testing is complete, return the average accuracy of the test set print('Test accuracy: ' + str(batch_test_accuracy / batch_passes)) # now test on k random test samples test_features, test_labels = pickle.load(open('preprocessed_test_set.p', mode = "rb")) # using random.samples() to select k samples and their labels - it helps to randomly pick more than one element from a or sequence without repeating element # first zip the imgs and labels - but return as a list as zip() return iterator: see here "https://docs.python.org/3.3/library/functions.html#zip" zipped_data = list(zip(np.array(test_features), test_labels)) # now unzip random samples out of it using zip*- and again return a tuple as zip() returns iterator k_imgs, k_labels = tuple(zip(*random.sample(zipped_data , k))) # now convert the test images into AlexNet size converted_imgs = [] for image in k_imgs: new_img = skimage.transform.resize(image, (self.input_dim, self.input_dim), mode = 'constant') new_img = img_as_ubyte(new_img) converted_imgs.append(new_img) # run predictions on these k images using the trained logits random_k_preds = sess.run(softmax(imported_logits), feed_dict = {imported_input: np.array(converted_imgs), imported_labels: k_labels}) print('k random preds: ') print(random_k_preds) # display these predicted probabilities along with the images self.display_k_preds(k, k_imgs, k_labels, random_k_preds, param_list, dataset_loc) else: # have to do a prediction only on a given test image # first convert the image to AlexNet size new_img = skimage.transform.resize(given_image, (self.input_dim, self.input_dim), mode = 'constant') new_img = img_as_ubyte(new_img) # making the image and label shapes as required in AlexNet new_img = np.expand_dims(new_img, axis = 0) # ===> from (32,32,3) to (1,32,32,3) actual_class = np.expand_dims(actual_class, axis = 0) with tf.Session(graph = loaded_graph) as sess: # load the variables imported_graph = tf.train.import_meta_graph(saved_model_path + '.meta') imported_graph.restore(sess, saved_model_path) # check if the saved variables were correctly loaded # for var in imported_graph.get_collection("variables"): # print('Imported variable: ', var) # get the imported variables to run a session on imported_input = loaded_graph.get_tensor_by_name('input_data:0') imported_labels = loaded_graph.get_tensor_by_name('labels:0') imported_logits = loaded_graph.get_tensor_by_name('logits:0') pred = sess.run(softmax(imported_logits), feed_dict = {imported_input: new_img, imported_labels: np.array(actual_class)}) self.display_k_preds(1, new_img, np.array(actual_class), pred, param_list, dataset_loc)
def build_pred(output, vocab_size, seq_size): output = nn.softmax(output) output = tf.reshape(output, [-1, seq_size, vocab_size]) return output
def call(self, x, params, day_len, news_len, training=False): max_dlen = tf.keras.backend.max(day_len).numpy() max_nlen = tf.keras.backend.max(news_len).numpy() max_dlen = np.max(day_len) max_nlen = np.max(news_len) # print("RAW: ", 'X:', x.shape, ' P:', params.shape) x = x[:, :, :max_dlen, :max_nlen] params = params[:, :, :max_dlen, :] news_len = news_len[:, :, :max_dlen] # print("Initial: ", 'X:', x.shape, ' P:', params.shape, ' N:', news_len.shape) # Averaged daily news corpus # (batch_size, days, max_daily_news, max_news_words + 7) # -> (batch_size, days, max_daily_news, max_news_words, embedding_dim) x = self.embedding(x) # print("After embedding: ", 'X: ', x.shape) # handle variable-length news word sequences mask = tf.sequence_mask(news_len, maxlen=max_nlen, dtype=tf.float32) mask = tf.expand_dims(mask, axis=4) x *= mask # print("After mask: ", 'X: ', x.shape) # Word-level attention # x: (batch_size, days, max_daily_news, max_news_words, embedding_dim) # t: (batch_size, days, max_daily_news, max_news_words, 1) # n: (batch_size, days, max_daily_news, embedding_dim) word_att = self.word_att(x) n = nn.softmax(word_att, axis=3) * x n = tf.reduce_sum(n, axis=3) # print("After word_attn N: ", 'N: ', n.shape) # handle variable-length day news sequences mask = tf.sequence_mask(day_len, maxlen=max_dlen, dtype=tf.float32) mask = tf.expand_dims(mask, axis=3) n *= mask # print("After mask N: ", 'N: ', n.shape) tf.cast(n, dtype=tf.float32) # print('N: val', n[0][0][0][0], 'P: val', params[0][0][0][0]) n_params = tf.concat([n, params], axis=3) # print('(2) N: val', n[0][0][0][0], 'P: val', params[0][0][0][0]) # News-level attention news_att = self.news_att(n_params) d = nn.softmax(news_att, axis=2) * n_params d = tf.reduce_sum(d, axis=2) # Sequence modeling gru = self.bi_gru(d, training=training) # Temporal attention temp_att = self.temp_att(gru) v = nn.softmax(temp_att, axis=2) * gru v = tf.reduce_sum(v, axis=1) # Discriminative Network (MLP) v = self.fc0(v) v = self.dropout(v) if training else v v = self.fc1(v) v = self.dropout(v) if training else v return self.fc_out(v)
def call(self, x): assert isinstance(x, list) aspect_hidden, polarity_hidden = x G_aspect_polarity = self.G_aspect_polarity G_polarity_aspect = self.G_polarity_aspect G_vector_aspect = self.G_vector_aspect G_vector_polarity = self.G_vector_polarity G_aspect_polarity = reshape(G_aspect_polarity, shape=[self.g_hidden_size, -1]) G_aspect_polarity = tile(expand_dims(G_aspect_polarity, axis=0), multiples=stack( [shape(aspect_hidden)[0], 1, 1])) shared_hidden_aspect_polarity = matmul(aspect_hidden, G_aspect_polarity) shared_hidden_aspect_polarity = reshape( shared_hidden_aspect_polarity, shape=[ -1, self.config.max_sentence_size * self.cross_share_k, self.g_hidden_size ]) polarity_hidden_transpose = transpose(polarity_hidden, [0, 2, 1]) shared_hidden_aspect_polarity = tanh( matmul(shared_hidden_aspect_polarity, polarity_hidden_transpose)) shared_hidden_aspect_polarity = reshape( shared_hidden_aspect_polarity, [ -1, self.config.max_sentence_size, self.cross_share_k, self.config.max_sentence_size ]) shared_hidden_aspect_polarity = transpose( shared_hidden_aspect_polarity, [0, 1, 3, 2]) shared_hidden_aspect_polarity = reshape( shared_hidden_aspect_polarity, shape=[ -1, self.config.max_sentence_size * self.config.max_sentence_size, self.cross_share_k ]) G_vector_aspect = tile(expand_dims(G_vector_aspect, axis=0), multiples=stack([shape(aspect_hidden)[0], 1, 1])) shared_hidden_aspect_polarity = matmul(shared_hidden_aspect_polarity, G_vector_aspect) aspect_vector = reshape(shared_hidden_aspect_polarity, shape=[ -1, self.config.max_sentence_size, self.config.max_sentence_size ]) G_polarity_aspect = reshape(G_polarity_aspect, shape=[self.g_hidden_size, -1]) G_polarity_aspect = tile(expand_dims(G_polarity_aspect, axis=0), multiples=stack( [shape(polarity_hidden)[0], 1, 1])) shared_hidden_polarity_aspect = matmul(aspect_hidden, G_polarity_aspect) shared_hidden_polarity_aspect = reshape( shared_hidden_polarity_aspect, shape=[ -1, self.config.max_sentence_size * self.config.cross_share_k, self.g_hidden_size ]) aspect_hidden_transpose = transpose(aspect_hidden, [0, 2, 1]) shared_hidden_polarity_aspect = tanh( matmul(shared_hidden_polarity_aspect, aspect_hidden_transpose)) shared_hidden_polarity_aspect = reshape( shared_hidden_polarity_aspect, [ -1, self.config.max_sentence_size, self.config.cross_share_k, self.config.max_sentence_size ]) shared_hidden_polarity_aspect = transpose( shared_hidden_polarity_aspect, [0, 1, 3, 2]) shared_hidden_polarity_aspect = reshape( shared_hidden_polarity_aspect, shape=[ -1, self.config.max_sentence_size * self.config.max_sentence_size, self.config.cross_share_k ]) G_vector_polarity = tile(expand_dims(G_vector_polarity, axis=0), multiples=stack( [shape(polarity_hidden)[0], 1, 1])) shared_hidden_polarity_aspect = matmul(shared_hidden_polarity_aspect, G_vector_polarity) polarity_vector = reshape(shared_hidden_polarity_aspect, shape=[ -1, self.config.max_sentence_size, self.config.max_sentence_size ]) # Get attention vector aspect_attention_vector = nn.softmax(aspect_vector) polarity_attention_vector = nn.softmax(polarity_vector) aspect_hidden_v = matmul(aspect_attention_vector, polarity_hidden) polarity_hidden_v = matmul(polarity_attention_vector, aspect_hidden) aspect_hidden = aspect_hidden + aspect_hidden_v polarity_hidden = polarity_hidden + polarity_hidden_v aspect_hidden = reshape( aspect_hidden, shape=[-1, self.config.max_sentence_size, self.g_hidden_size]) polarity_hidden = reshape( polarity_hidden, shape=[-1, self.config.max_sentence_size, self.g_hidden_size]) return [aspect_hidden, polarity_hidden]
def call(self, inputs, training=False, mask=None): x = relu(self.fc1(inputs)) x = relu(self.fc2(x)) x = self.fc3(x) return softmax(x)
# tf.Tensor( # [[0.26894143 0.5 0.73105854] # [0.5 0.5 0.73105854]], shape=(2, 3), dtype=float32) # tf.Tensor( # [[0.26894143 0.5 0.73105854] # [0.5 0.5 0.7310586 ]], shape=(2, 3), dtype=float32) print(_tanh(x)) print(nn.tanh(x)) # tf.Tensor( # [[-0.7615942 0. 0.7615941] # [ 0. 0. 0.7615941]], shape=(2, 3), dtype=float32) # tf.Tensor( # [[-0.7615942 0. 0.7615942] # [ 0. 0. 0.7615942]], shape=(2, 3), dtype=float32) print(_softmax(x)) print(nn.softmax(x)) # tf.Tensor( # [[0.09003059 0.24472848 0.66524094] # [0.21194156 0.21194156 0.57611686]], shape=(2, 3), dtype=float32) # tf.Tensor( # [[0.09003057 0.24472848 0.6652409 ] # [0.21194157 0.21194157 0.57611686]], shape=(2, 3), dtype=float32) print(_silu(x)) print(nn.silu(x)) # tf.Tensor( # [[-0.26894143 0. 0.73105854] # [ 0. 0. 0.7310586 ]], shape=(2, 3), dtype=float32) # tf.Tensor( # [[-0.26894143 0. 0.73105854] # [ 0. 0. 0.7310586 ]], shape=(2, 3), dtype=float32)