def scaled_dotproduct_attention(queries, keys, num_unit=None, num_heads=0, dropout_rate=0, is_tranining=True, causality=False, scope="scaled_att", reuse=None): with tf.variable_scope(scope, reuse=reuse): if num_unit is None: num_unit = queries.get_shape().as_list[-1] # 线性变换 Q = tf.layers.dense(queries, num_unit, activation=tf.nn.relu) K = tf.layers.dense(keys, num_unit, activation=tf.nn.relu) V = tf.layers.dense(keys, num_unit, activation=tf.nn.relu) outputs = tf.matmul(Q, tf.transpose(K, [0, 2, 1])) outputs = outputs / (K.get_shape().as_list()[-1]**0.5) # 对填充的部分进行mask,这些位置att score变得极小, key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1))) key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1]) paddings = tf.ones_like(outputs) * (-2**32 + 1) outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs) # 一个mask操作,对模型屏蔽未来信息 if causality: diag_vals = tf.ones_like(outputs[0, :, :]) tril = tf.contrib.linalg.LinearOperatorTril(diag_vals).to_dense() masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1]) paddings = tf.ones_like(masks) * (-2**32 + 1) outputs = tf.where(tf.equal(masks, 0), paddings, outputs) outputs = tf.nn.softmax(outputs) # Query mask query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1))) query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]]) outputs *= query_masks outputs = tf.layers.dropout( outputs, rate=dropout_rate, training=tf.convert_to_tensor(is_tranining)) # 加权平均 outputs = tf.matmul(outputs, V) # outputs += queries outputs = normalize(outputs) return outputs
def accuracy_fn(inference_fn, inputs, labels): prediction = tf.nn.softmax(inference_fn(inputs)) correct_pred = tf.equal(tf.argmax(prediction, 1), labels) return tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# training for i in range(1, num_steps + 1): _, d, idx = sess.run([train_op, avg_distance, cluster_idx], feed_dict={X: full_data_x}) if i % 10 == 0 or i == 1: print("Step %i, Avg Distance: %f" % (i, d)) # 为质心分配标签 # 使用每次训练的标签,汇总每个质心的所有标签总数 counts = np.zeros(shape=(k, num_classes)) for i in range(len(idx)): counts[idx[i]] += mnist.train.labels[i] # 把最频繁的标签分配到质心 labels_map = [np.argmax(c) for c in counts] labels_map = tf.convert_to_tensor(labels_map) # lookup:通过质心id映射到标签。 cluster_label = tf.nn.embedding_lookup(labels_map, cluster_idx) # 计算acc correct_prediction = tf.equal(cluster_label, tf.cast(tf.argmax(Y, 1), tf.int32)) accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 测试Model test_x, test_y = mnist.test.images, mnist.test.labels print("Test Accuracy:", sess.run(accuracy_op, feed_dict={ X: test_x, Y: test_y }))
logits_test = conv_net(_x, num_classes, dropout, reuse=True, is_training=False) # 定义loss和opts,带上logits_train 以使dropout生效 loss_op = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits_train, labels=_y)) optimizer = tf.train.AdamOptimizer(learning_rate) grads = optimizer.compute_gradients(loss_op) # 只用其中一个gpu计算acc if i == 0: # Evaluate model (with test logits, for dropout to be disabled) correct_pred = tf.equal(tf.argmax(logits_test, 1), tf.argmax(_y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) reuse_vars = True tower_grads.append(grads) tower_grads = average_gradients(tower_grads) train_op = optimizer.apply_gradients(tower_grads) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for step in range(1, num_steps + 1): batch_x, batch_y = mnist.train.next_batch(batch_size * num_gpus) ts = time.time() sess.run(train_op, feed_dict={X: batch_x, Y: batch_y})
sess.run(init) for epoch in range(train_epochs): avg_cost = 0. total_batch = int(mnist.train.num_examples / batch_size) # 循环所有的batchs for i in range(total_batch): batch_xs, batch_ys = mnist.train.next_batch(batch_size) # _, c = sess.run([optimizer, cost], feed_dict={ x: batch_xs, y: batch_ys }) # 计算平均损失 avg_cost += c / total_batch if (epoch + 1) % display_step == 0: print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost)) print("Ooptimizer Finished!") correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) # 计算正确率 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print("Accuracy:", accuracy.eval({ x: mnist.test.images, y: mnist.test.labels }))
def accuarcy_fn(interface_fn,inputs,labels): prediction = tf.nn.softmax(interface_fn(inputs)) correct_pred = tf.equal(tf.argmax(prediction,1),labels) # 计算预测值和标签是否相等 return tf.reduce_mean(tf.cast(correct_pred,tf.float32))
dtype=tf.float32) return tf.matmul(outputs[-1], weights['out']) + biases['out'] logits = RNN(X, weights, biases) prediction = tf.nn.softmax(logits) # 定义loss和optimizer loss_op = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_op = optimizer.minimize(loss_op) # 评估模型 correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32)) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for step in range(1, training_step + 1): batch_x, batch_y = mnist.train.next_batch(batch_size) # 把数据reshape batch_x = batch_x.reshape((batch_size, timesteps, num_input)) # 先run optimizer sess.run(train_op, feed_dict={X: batch_x, Y: batch_y}) if step % display_step == 0 or step == 1: loss, acc = sess.run([loss_op, accuracy],
X = tf.placeholder(tf.float32, shape=[None, num_features]) Y = tf.placeholder(tf.float32, shape=[None]) hparams = tensor_forest.ForestHParams(num_classes=num_classes, num_features=num_features, num_trees=num_trees, max_nodes=max_nodes).fill() # 构建随机森林 forgest_graph = tensor_forest.RandomForestGraphs(hparams) # 获取训练图和损失 train_op = forgest_graph.training_graph(X, Y) loss_op = forgest_graph.training_loss(X, Y) # 衡量准确率 infer_op, _, _ = forgest_graph.inference_graph(X) correct_prediction = tf.equal(tf.argmax(infer_op, 1), tf.cast(Y, tf.int64)) accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) init_vars = tf.group( tf.global_variables_initializer(), resources.initialize_resources(resources.shared_resources())) sess = tf.Session() sess.run(init_vars) for i in range(1, num_steps + 1): batch_x, batch_y = mnist.train.next_batch(batch_size) _, l = sess.run([train_op, loss_op], feed_dict={X: batch_x, Y: batch_y}) if i % 50 == 0 or i == 1: acc = sess.run(accuracy_op, feed_dict={X: batch_x, Y: batch_y})
def multihead_attention(queries, keys, num_units=None, num_heads=0, dropout_rate=0, is_training=True, causality=False, scope="multihead_attention", reuse=None): with tf.variable_scope(scope, reuse=reuse): if num_units is None: num_units = queries.get_shape().as_list()[-1] # linear projection Q = tf.layers.dense(queries, num_units, activation=tf.nn.relu) K = tf.layers.dense(keys, num_units, activation=tf.nn.relu) V = tf.layers.dense(keys, num_units, activation=tf.nn.relu) # split and concat Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1])) outputs = outputs / (K_.get_shape().as_list()[-1]**0.5) # mask key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1))) key_masks = tf.tile(key_masks, [num_heads, 1]) key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1]) paddings = tf.ones_like(outputs) * (-2**32 + 1) outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs) # masked from future if causality: diag_vals = tf.ones_like(outputs[0, :, :]) tril = tf.contrib.linalg.LinearOperatorTril(diag_vals).to_dense() masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1]) paddings = tf.ones_like(masks) * (-2**32 + 1) outputs = tf.where(tf.equal(masks, 0), paddings, outputs) outputs = tf.nn.softmax(outputs) # query mask query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1))) query_masks = tf.tile(query_masks, [num_heads, 1]) query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]]) outputs *= query_masks outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=tf.convert_to_tensor(is_training)) outputs = tf.matmul(outputs, V_) # restore shape outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2) outputs += queries outputs = normalize(outputs) return outputs
cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) # 交叉熵 train_step = tf.train.GradientDescentOptimizer(0.01).minimize( cross_entropy) # 训练使用最小梯度下降,且最小化交叉熵loss init = tf.global_variables_initializer() for i in range(1000): batch = mnist.train.next_batch(50) # load mini-batchsize dataset train_step.run(feed_dict={x: batch[0], y_: batch[1]}) print("训练结束..") """ 这段表达特别好:tf.argmax 是一个非常有用的函数,它能给出某个tensor对象在某一维上的其数据最大值所在的索引值。 由于标签向量是由0,1组成,因此最大值1所在的索引位置就是类别标签,比如tf.argmax(y,1)返回的是模型对于任一输入x预测到的标签值, 而 tf.argmax(y_,1) 代表正确的标签,我们可以用 tf.equal 来检测我们的预测是否真实标签匹配(索引位置一样表示匹配)。 """ correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuarcy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print(accuarcy.eval(feed_dict={ x: mnist.test.images, y_: mnist.test.labels })) # 使用softmax取得效果有限 """ 开始使用CNN进行训练识别 """ # 首先需要创建大量的W和b,由于我们使用的是ReLU神经元,因此比较好的做法是用一个较小的正数来初始化偏置项 def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial)
batch_size = tf.shape(outputs)[0] # 每个样本的起始索引 index = tf.range(0, batch_size) * seq_max_len + (seqlen - 1) outputs = tf.gather(tf.reshape(outputs, [-1, n_hidden]), index) return tf.matmul(outputs, weights['out']) + biases['out'] pred = dynamicRNN(x, seqlen, weights, biases) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=y)) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for step in range(1, training_steps + 1): batch_x, batch_y, batch_seqlen = trainset.next(batch_size) sess.run(optimizer, feed_dict={ x: batch_x, y: batch_y, seqlen: batch_seqlen
b = tf.Variable(tf.zeros([10]), name='Bias') # 构造模型并将所有操作封装到scope中,方便tensorboard可视化。 with tf.name_scope('Model'): pred = tf.nn.softmax(tf.matmul(x, W) + b) with tf.name_scope('Loss'): cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1)) with tf.name_scope('SGD'): optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) with tf.name_scope('Accuracy'): acc = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) acc = tf.reduce_mean(tf.cast(acc, tf.float32)) init = tf.global_variables_initializer() tf.summary.scalar("loss", cost) tf.summary.scalar("accuracy", acc) merged_summary_op = tf.summary.merge_all() with tf.Session() as sess: sess.run(init) summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())