Example #1
0
 def nce_loss(self,true_logits,sampled_logits):
     """为NCE loss构图"""
     opts = self._options
     true_xent = tf.nn.sigmoid_cross_entropy_with_logits(
         true_logits,tf.ones_like(true_logits)
     )
     sampled_xent = tf.nn.sigmoid_cross_entropy_with_logits(
         sampled_logits,tf.zeros_like(sampled_logits)
     )
     # nce loss 是真实值和噪声(采样词)的贡献和在batch中的平均值
     nce_loss_tensor = (tf.reduce_sum(true_xent)+tf.reduce_sum(sampled_xent)) / opts.batch_size
     return nce_loss_tensor
def scaled_dotproduct_attention(queries,
                                keys,
                                num_unit=None,
                                num_heads=0,
                                dropout_rate=0,
                                is_tranining=True,
                                causality=False,
                                scope="scaled_att",
                                reuse=None):
    with tf.variable_scope(scope, reuse=reuse):
        if num_unit is None:
            num_unit = queries.get_shape().as_list[-1]
        # 线性变换
        Q = tf.layers.dense(queries, num_unit, activation=tf.nn.relu)
        K = tf.layers.dense(keys, num_unit, activation=tf.nn.relu)
        V = tf.layers.dense(keys, num_unit, activation=tf.nn.relu)

        outputs = tf.matmul(Q, tf.transpose(K, [0, 2, 1]))
        outputs = outputs / (K.get_shape().as_list()[-1]**0.5)

        # 对填充的部分进行mask,这些位置att score变得极小,
        key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1)))
        key_masks = tf.tile(tf.expand_dims(key_masks, 1),
                            [1, tf.shape(queries)[1], 1])

        paddings = tf.ones_like(outputs) * (-2**32 + 1)
        outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs)

        # 一个mask操作,对模型屏蔽未来信息
        if causality:
            diag_vals = tf.ones_like(outputs[0, :, :])
            tril = tf.contrib.linalg.LinearOperatorTril(diag_vals).to_dense()
            masks = tf.tile(tf.expand_dims(tril, 0),
                            [tf.shape(outputs)[0], 1, 1])

            paddings = tf.ones_like(masks) * (-2**32 + 1)
            outputs = tf.where(tf.equal(masks, 0), paddings, outputs)
        outputs = tf.nn.softmax(outputs)
        # Query mask
        query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1)))
        query_masks = tf.tile(tf.expand_dims(query_masks, -1),
                              [1, 1, tf.shape(keys)[1]])
        outputs *= query_masks
        outputs = tf.layers.dropout(
            outputs,
            rate=dropout_rate,
            training=tf.convert_to_tensor(is_tranining))
        # 加权平均
        outputs = tf.matmul(outputs, V)
        #
        outputs += queries
        outputs = normalize(outputs)
    return outputs
Example #3
0
    def forward(self,examples,labels):
        """建立前向传播图"""
        opts = self._options
        # 声明所有需要的变量
        # embeddings :[vocab-size,emb_size]
        init_width = 0.5 / opts.emb_dim

        emb = tf.Variable(
            tf.random_uniform([opts.vocab_size,opts.emb_dim], -init_width,init_width),name = "emb")
        self._emb = emb

        # softmax_weights:[vocab_size,emb_dim]
        sm_w_t = tf.Variable(
            tf.zeros([opts.vocab_size,opts.emb_dim]),name="sm_w_t")
        # softmax bias:[emd_dim]
        sm_b = tf.Variable(
            tf.zeros([opts.vocab_size]),name="sm_b")

        # global step:scalar
        self.global_step = tf.Variable(0,name="global_step")

        # 候选采样计算nce loss的节点
        labels_matrix = tf.reshape(
            tf.cast(labels,dtype=tf.int64),[opts.batch_size,1])
        # 负采样
        sampled_ids, _,_ = (tf.nn.fixed_unigram_candidate_sampler(
            true_classes=labels_matrix,
            num_true=1,
            num_sampled=opts.num_samples,
            unique=True,
            range_max=opts.vocab_size,
            distortion=0.75,
            unigrams=opts.vocab_counts.tolist()))

        # 样本的嵌入:[batch_size,emb_dim]
        example_emb = tf.nn.embedding_lookup(emb,examples)

        # 标签的权重w:[batch_size,emb_dim]
        true_w = tf.nn.embedding_lookup(sm_w_t,labels)
        # 标签的偏差b :[batch_size,1]
        true_b = tf.nn.embedding_lookup(sm_b,labels)

        # 采样样本的ids的权重(Weights for sampled ids):[num_sampled,emb_dim]
        sampled_w = tf.nn.embedding_lookup(sm_w_t, sampled_ids)
        # 采样样本的 bias :[num_sampled,1]
        sampled_b = tf.nn.embedding_lookup(sm_b,sampled_ids)

        # True logits:[batch_size,1]
        true_logits = tf.reduce_sum(tf.multiply(example_emb,true_w),1) + true_b

        # 采样样本预测值 sampled logits:[batch_size,num_sampled]
        sampled_b_vec = tf.reshape(sampled_b,[opts.num_samples])
        sampled_logits = tf.matmul(example_emb,
                                   sampled_w,
                                   transpose_b=True) + sampled_b_vec
        return true_logits,sampled_logits
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size  # batch_size
        self.num_steps = num_steps = config.num_steps  #
        size = config.hidden_size  # 隐藏层
        vocab_size = config.vocab_size  # 词表size
        # 输入占位符
        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        outputs = []
        states = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)
                states.append(state)

        output = tf.reshape(tf.concat(outputs, 1), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])], vocab_size)
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = states[-1]

        if not is_training:
            return
        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Example #5
0
def evaluation(logits, labels):
    """Evaluate the quality of the logits at predicting the label.

  Args:
    logits: Logits tensor, float - [batch_size, NUM_CLASSES].
    labels: Labels tensor, int32 - [batch_size], with values in the
      range [0, NUM_CLASSES).

  Returns:
    A scalar int32 tensor with the number of examples (out of batch_size)
    that were predicted correctly.
  """
    # For a classifier model, we can use the in_top_k Op.
    # It returns a bool tensor with shape [batch_size] that is true for
    # the examples where the label is in the top k (here k=1)
    # of all logits for that example.
    correct = tf.nn.in_top_k(logits, labels, 1)
    # Return the number of true entries.
    return tf.reduce_sum(tf.cast(correct, tf.int32))
import numpy as np
import tensorflower as tf

#
from tensorflower.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

Xtr, Ytr = mnist.train.next_batch(5000)
Xte, Yte = mnist.test.next_batch(200)

xtr = tf.placeholder("float", [None, 784])
xte = tf.placeholder("float", [784])

# 使用l1距离计算最近邻
# 计算L1距离,损失函数
distance = tf.reduce_sum(tf.abs(tf.add(xtr, tf.negative(xte))),
                         reduction_indices=1)
# 预测:找到最小距离索引,
pred = tf.arg_min(distance, 0)

accuracy = 0.

init = tf.global_variables_initializer()

with tf.Session() as sess:

    sess.run(init)
    for i in range(len(Xte)):
        # 获取最近邻数据
        nn_idx = sess.run(pred, feed_dict={xtr: Xtr, xte: Xte[i, :]})

        # 获取最近邻类别标签,并且和真正的标签进行比较
Example #7
0
# 为每个batch计算平均的nce loss
loss_op = tf.reduce_mean(
    tf.nn.nce_loss(weights=nce_weights,
                   biases=nce_biases,
                   labels=Y,
                   inputs=X_embed,
                   num_sampled=num_sampled,
                   num_classes=vocab_size))

# 定义优化器,优化器的作用是求导,反向传播。
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss_op)

# 验证/评价
# 计算输入数据embedding 和每个embedding向量的cosine相似度
X_embed_norm = X_embed / tf.sqrt(tf.reduce_sum(tf.square(X_embed)))
embedding_norm = embedding / tf.sqrt(
    tf.reduce_sum(tf.square(embedding), 1, keep_dims=True))
cosine_sim_op = tf.matmul(X_embed_norm, embedding_norm, transpose_b=True)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    x_test = np.array([word2id[w] for w in eval_words])

    average_loss = 0

    for step in range(1, num_steps + 1):
        batch_x, batch_y = next_batch(batch_size, num_skips, skip_window)
Example #8
0
def word2vec_basic(log_dir):
    # 创建tensorboard的可视化目录
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    # 第一步,下载数据
    url = 'http://mattmahoney.net/dc/'

    def maybe_download(filename, expected_bytes, sha256=None):
        local_filename = os.path.join(gettempdir(), filename)
        if not os.path.exists(local_filename):
            local_filename, _ = urllib.request.urlretrieve(
                url + filename, local_filename)
        statinfo = os.stat(local_filename)

        if sha256 and _hash_file(local_filename) != sha256:
            raise Exception('Failed to verify ' + local_filename +
                            ' due to hash '
                            'mismatch. Can you get to it with a browser?')

        if statinfo.st_size == expected_bytes:
            print("found and verified", filename)
        else:
            print(statinfo.st_size)
            raise Exception('Failed to verify ' + local_filename +
                            '. Can you get to it with a browser?')
        return local_filename

    filename = maybe_download(
        'text8.zip',
        31344016,
        sha256=
        'a6640522afe85d1963ad56c05b0ede0a0c000dddc9671758a6cc09b7a38e5232')

    # 数据转为List<String>
    def read_data(filename):
        with zipfile.ZipFile(filename) as f:
            data = tf.compat.as_str(f.read(f.namelist()[0])).split()
        return data

    vocabulary = read_data(filename)
    print('data_size', len(vocabulary))

    # 第二步,建词典并且把罕见词替换成UNK
    vocabulary_size = 50000

    def build_dataset(words, n_words):

        count = [['UNK', -1]]
        count.extend(collections.Counter(words).most_common(n_words - 1))
        dictionary = {word: index for index, (word, _) in enumerate(count)}
        data = []
        unk_count = 0
        for word in words:
            index = dictionary.get(word, 0)
            if index == 0:  # dictionary['UNK']
                unk_count += 1
            data.append(index)
        count[0][1] = unk_count
        reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
        return data, count, dictionary, reversed_dictionary

    # data: 词表中的所有的词的id
    # count: 单词和出现次数的map
    # dictionary: 单词-->index 的映射
    # reverse_dictionary:index -->单词
    data, count, dictionary, reversed_dictionary = build_dataset(
        vocabulary, vocabulary_size)
    del vocabulary
    print('Most common words (+UNK)', count[:5])
    print('Sample data', data[:10],
          [reversed_dictionary[i] for i in data[:10]])

    # 针对skip-gram模型生成batch数据
    def generate_batch(batch_size, num_skips, skip_window):
        global data_index
        assert batch_size % num_skips == 0
        assert num_skips <= 2 * skip_window
        batch = np.ndarray(shape=(batch_size), dtype=np.int32)
        labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
        # skip的范围
        span = 2 * skip_window + 1
        buffer = collections.deque(maxlen=span)
        if data_index + span > len(data):
            data_index = 0
        buffer.extend(data[data_index:data_index + span])  # 向后取一个窗口内的结果
        data_index += span
        for i in range(batch_size // num_skips):
            context_words = [w for w in range(span) if w != skip_window]
            words_to_use = random.sample(context_words, num_skips)
            for j, context_words in enumerate(words_to_use):
                batch[i * num_skips + j] = buffer[skip_window]
                labels[i * num_skips + j, 0] = buffer[context_words]
            if data_index == len(data):
                buffer.extend(data[0:span])
                data_index = span
            else:
                buffer.append(data[data_index])
                data_index += 1
        # Backtrack a little bit to avoid skipping words in the end of a batch
        data_index = (data_index - span) % len(data)
        return batch, labels

    batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1)
    for i in range(8):
        print(batch[i], reversed_dictionary[batch[i]], '->', labels[i, 0],
              reversed_dictionary[labels[i, 0]])

    # 建立并且训练模型

    batch_size = 128
    embedding_size = 128  # 词向量维度
    skip_window = 1  # 考虑左右几个单词
    num_skips = 2  # 复用输入生成标签的次数
    num_sampled = 64  # 负样本数量

    # 采样一个样本的近邻作为随机验证机,将验证集样本限制为 较低id的单词,是比较高频的构造词汇
    # 这三个变量用作显示模型准确率,不影响计算。
    valid_size = 16  # 用于评估相似性的随机单词集合
    valid_window = 100  #
    valid_examples = np.random.choice(valid_window, valid_size, replace=False)

    graph = tf.Graph()

    with graph.as_default():

        # 输入数据
        with tf.name_scope('input'):
            train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
            train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
            valid_dataset = tf.constant(valid_examples, dtype=tf.int32)

        # 操作op和变量variables 固定在CPU上。
        with tf.device('/cpu:0'):
            with tf.name_scope('embeddings'):
                embeddings = tf.Variable(
                    tf.random_uniform([vocabulary_size, embedding_size], -1.0,
                                      1.0))
                embed = tf.nn.embedding_lookup(embeddings, train_inputs)

            # 构造NCE损失的变量
            with tf.name_scope('weights'):
                nce_weights = tf.Variable(
                    tf.truncated_normal([vocabulary_size, embedding_size],
                                        stddev=1.0 /
                                        math.sqrt(embedding_size)))

            with tf.name_scope('biases'):
                nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

        # 计算该批次的平均nce损失,当评估损失的时候,自动绘制一个新的负样本。
        with tf.name_scope('loss'):
            loss = tf.reduce_mean(
                tf.nn.nce_loss(weights=nce_weights,
                               biases=nce_biases,
                               labels=train_labels,
                               inputs=embed,
                               num_sampled=num_sampled,
                               num_classes=vocabulary_size))
        # 汇总损失
        tf.summary.scalar('loss', loss)

        # 构造SGD
        with tf.name_scope('opytimizer'):
            optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)
        # 计算小批次样本和所有样本之间的余弦相似度
        norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
        normalized_embeddings = embeddings / norm
        valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings,
                                                  valid_dataset)
        similarity = tf.matmul(valid_embeddings,
                               normalized_embeddings,
                               transpose_b=True)

        # merge all summary
        merged = tf.summary.merge_all()

        init = tf.global_variables_initializer()

        saver = tf.train.Saver()

    # 开始训练
    num_steps = 1000001

    with tf.compat.v1.Session(graph=graph) as session:
        # 写入摘要
        writer = tf.summary.FileWriter(log_dir, session.graph)

        init.run()
        print('inited..')
        average_loss = 0
        for step in range(num_steps):
            batch_inputs, batch_labels = generate_batch(
                batch_size, num_skips, skip_window)
            feed_dict = {
                train_inputs: batch_inputs,
                train_labels: batch_labels
            }
            # 定义元变量
            run_metadata = tf.RunMetadata()

            _, summary, loss_val = session.run([optimizer, merged, loss],
                                               feed_dict=feed_dict,
                                               run_metadata=run_metadata)
            average_loss += loss_val

            writer.add_summary(summary, step)

            if step == (num_steps - 1):
                writer.add_run_metadata(run_metadata, 'step%d' % step)

            if step % 2000 == 0:
                if step > 0:
                    average_loss /= 2000
                    # 平均损失是对最近的2000个批次样本的估计。
                print('Average loss at step ', step, ': ', average_loss)
                average_loss = 0

            if step % 10000 == 0:
                sim = similarity.eval()
                for i in range(valid_size):
                    valid_word = reversed_dictionary[valid_examples[i]]
                    top_k = 8
                    nearest = (-sim[i, :]).argsort()[1:top_k + 1]
                    log_str = 'Nearest to %s:' % valid_word

                    print(
                        log_str, ', '.join([
                            reversed_dictionary[nearest[k]]
                            for k in range(top_k)
                        ]))
        final_embeddings = normalized_embeddings.eval()

        # 写下embedding的相应标签
        with open(log_dir + '/metadata.tsv', 'w') as f:
            for i in range(vocabulary_size):
                f.write(reversed_dictionary[i] + '\n')

        # 保存checkpoint
        saver.save(session, os.path.join(log_dir, 'model.ckpt'))

        # 配置Tensorboard
        config = projector.ProjectorConfig()
        embedding_conf = config.embeddings.add()
        embedding_conf.tensor_name = embeddings.name
        embedding_conf.metadata_path = os.path.join(log_dir, 'metadata.tsv')
        projector.visualize_embeddings(writer, config)
    writer.close()

    # Step 6: Visualize the embeddings.

    # pylint: disable=missing-docstring
    # Function to draw visualization of distance between embeddings.
    def plot_with_labels(low_dim_embs, labels, filename):
        assert low_dim_embs.shape[0] >= len(
            labels), 'More labels than embeddings'
        plt.figure(figsize=(18, 18))  # in inches
        for i, label in enumerate(labels):
            x, y = low_dim_embs[i, :]
            plt.scatter(x, y)
            plt.annotate(label,
                         xy=(x, y),
                         xytext=(5, 2),
                         textcoords='offset points',
                         ha='right',
                         va='bottom')

        plt.savefig(filename)

    try:
        # pylint: disable=g-import-not-at-top
        from sklearn.manifold import TSNE
        import matplotlib.pyplot as plt

        tsne = TSNE(perplexity=30,
                    n_components=2,
                    init='pca',
                    n_iter=5000,
                    method='exact')
        plot_only = 500
        low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :])
        labels = [reversed_dictionary[i] for i in xrange(plot_only)]
        plot_with_labels(low_dim_embs, labels,
                         os.path.join(gettempdir(), 'tsne.png'))

    except ImportError as ex:
        print(
            'Please install sklearn, matplotlib, and scipy to show embeddings.'
        )
        print(ex)
Example #9
0
train_epochs = 25
batch_size = 100
display_step = 1

# 定义placeholder
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

# weights bias
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

pred = tf.nn.softmax(tf.matmul(x, W) + b)

# 最小化交叉熵
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1))
# 梯度下降
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# 初始化所有参数
init = tf.global_variables_initializer()

with tf.Session() as sess:
    # 运行初始化
    sess.run(init)

    for epoch in range(train_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples / batch_size)
        # 循环所有的batchs
        for i in range(total_batch):
from com.hiekn.tensorflow.Mnist_TF_work_1_2 import input_data
import tensorflower as tf

mnist = input_data.read_data_sets('MNIST_data',one_hot=True)
sess = tf.InteractiveSession()

x = tf.placeholder("float",shape=[None,784])
y_ = tf.placeholder("float",shape=[None,10])

W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))
sess.run(tf.initialize_all_variables())

y = tf.nn.softmax(tf.matmul(x,W)+b)
cross_entropy = -tf.reduce_sum(y_*tf.log(y))

train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)

for i in range(1000):
    batch = mnist.train.next_batch(50)
    train_step.run(feed_dict={x:batch[0],y_:batch[1]})

correct_prediction = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction,"float"))

print(accuracy.eval(feed_dict={x: mnist.test.images, y_: mnist.test.labels}))



def multihead_attention(queries,
                        keys,
                        num_units=None,
                        num_heads=0,
                        dropout_rate=0,
                        is_training=True,
                        causality=False,
                        scope="multihead_attention",
                        reuse=None):
    with tf.variable_scope(scope, reuse=reuse):
        if num_units is None:
            num_units = queries.get_shape().as_list()[-1]

        # linear projection
        Q = tf.layers.dense(queries, num_units, activation=tf.nn.relu)
        K = tf.layers.dense(keys, num_units, activation=tf.nn.relu)
        V = tf.layers.dense(keys, num_units, activation=tf.nn.relu)
        # split and concat
        Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0)
        K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0)
        V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0)

        outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))
        outputs = outputs / (K_.get_shape().as_list()[-1]**0.5)
        # mask
        key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1)))
        key_masks = tf.tile(key_masks, [num_heads, 1])
        key_masks = tf.tile(tf.expand_dims(key_masks, 1),
                            [1, tf.shape(queries)[1], 1])

        paddings = tf.ones_like(outputs) * (-2**32 + 1)
        outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs)

        # masked from future
        if causality:
            diag_vals = tf.ones_like(outputs[0, :, :])
            tril = tf.contrib.linalg.LinearOperatorTril(diag_vals).to_dense()
            masks = tf.tile(tf.expand_dims(tril, 0),
                            [tf.shape(outputs)[0], 1, 1])

            paddings = tf.ones_like(masks) * (-2**32 + 1)
            outputs = tf.where(tf.equal(masks, 0), paddings, outputs)
        outputs = tf.nn.softmax(outputs)

        # query mask
        query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1)))
        query_masks = tf.tile(query_masks, [num_heads, 1])
        query_masks = tf.tile(tf.expand_dims(query_masks, -1),
                              [1, 1, tf.shape(keys)[1]])
        outputs *= query_masks

        outputs = tf.layers.dropout(outputs,
                                    rate=dropout_rate,
                                    training=tf.convert_to_tensor(is_training))

        outputs = tf.matmul(outputs, V_)
        # restore shape
        outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2)

        outputs += queries
        outputs = normalize(outputs)
    return outputs
print("开始下载数据集..")
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
print("下载完毕..")
sess = tf.InteractiveSession()
# 该函数可以更加灵活的构建代码,可以在运行计算的图的时候通过operation操作插入一些计算图。

x = tf.placeholder("float", shape=[None, 784])
y_ = tf.placeholder("float", shape=[None, 10])  # 占位符

W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))  # 变量,跟占位符一样作为额外的输入量
sess.run(tf.initialize_all_variables())
y = tf.nn.softmax(tf.matmul(x, W) + b)  # 使用softmax计算每个分类的概率

cross_entropy = -tf.reduce_sum(y_ * tf.log(y))  # 交叉熵

train_step = tf.train.GradientDescentOptimizer(0.01).minimize(
    cross_entropy)  # 训练使用最小梯度下降,且最小化交叉熵loss
init = tf.global_variables_initializer()
for i in range(1000):
    batch = mnist.train.next_batch(50)  # load  mini-batchsize dataset
    train_step.run(feed_dict={x: batch[0], y_: batch[1]})
print("训练结束..")
"""
这段表达特别好:tf.argmax 是一个非常有用的函数,它能给出某个tensor对象在某一维上的其数据最大值所在的索引值。
由于标签向量是由0,1组成,因此最大值1所在的索引位置就是类别标签,比如tf.argmax(y,1)返回的是模型对于任一输入x预测到的标签值,
而 tf.argmax(y_,1) 代表正确的标签,我们可以用 tf.equal 来检测我们的预测是否真实标签匹配(索引位置一样表示匹配)。
"""
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
accuarcy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
Example #13
0
def evaluation(logits, labels):
    correct = tf.nn.in_top_k(predictions=logits, targets=labels, k=1)
    return tf.reduce_sum(input_tensor=tf.cast(correct, tf.int32))
    3.465, 1.65, 2.904, 2.42, 2.94, 1.3
])

n_samples = train_x.shape[0]

X = tf.placeholder(tf.float32)
Y = tf.placeholder(tf.float32)

W = tf.Variable(rng.randn(), name="weights")
b = tf.Variable(rng.randn(), name="bias")

# 创建一个线性模型
pred = tf.add(tf.multiply(X, W), b)

# 设置损失函数(均方损失函数)
cost = tf.reduce_sum(tf.pow(pred - Y, 2)) / (2 * n_samples)

# 梯度下降
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# init
init = tf.global_variables_initializer()

# 开始训练
with tf.Session() as sess:
    # 运行初始化
    sess.run(init)

    # 填充所有训练数据
    for epoch in range(training_epochs):
        for (x, y) in zip(train_x, train_y):
def mean_square_fn(model_fn, inputs, labels):
    return tf.reduce_sum(tf.pow(model_fn(inputs) - labels,
                                2)) / (2 * n_samples)