Ejemplo n.º 1
0
def inference(images, hidden1_units, hidden2_units):
    # 第一层隐藏层
    with tf.compat.v1.name_scope('hidden1'):
        weights = tf.Variable(
            tf.random.truncated_normal([IMAGE_PIXELS, hidden1_units],
                                       stddev=1.0 /
                                       math.sqrt(float(IMAGE_PIXELS)),
                                       name='weights'))
        biases = tf.Variable(tf.zeros([hidden1_units]), name='biases')
        hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)

    # 第二层隐藏层
    with tf.compat.v1.name_scope('hidden2'):
        weights = tf.Variable(
            tf.random.truncated_normal([hidden1_units, hidden2_units],
                                       stddev=1.0 /
                                       math.sqrt(float(hidden1_units)),
                                       name='weights'))
        biases = tf.Variable(tf.zeros([hidden2_units]), name='biases')
        hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
    # 线性层,softmax
    with tf.compat.v1.name_scope('softmax_linear'):
        weights = tf.Variable(tf.random.truncated_normal(
            [hidden2_units, NUM_CLASSES],
            stddev=1.0 / math.sqrt(float(hidden2_units))),
                              name='weights')
        biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases')
        logits = tf.matmul(hidden2, weights) + biases

    return logits
Ejemplo n.º 2
0
def encoder(x):
    layer_1 = tf.nn.sigmoid(
        tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1']))

    layer_2 = tf.nn.sigmoid(
        tf.add(tf.matmul(layer_1, weights['encoder_h2']),
               biases['encoder_b2']))
    return layer_2
Ejemplo n.º 3
0
def neural_network(x):
    # 隐藏全连接层
    layer_1 = tf.add(tf.matmul(x,weights['h1']),biases['b1'])
    # 隐藏层-- 第二层
    layer_2 = tf.add(tf.matmul(layer_1,weights['h2']),biases['b2'])
    # 输出层
    out_layers = tf.matmul(layer_2,weights['out']) + biases['out']
    return out_layers
Ejemplo n.º 4
0
def multilayer_perceptron(x, weights, biases):
    # Hidden layer with RELU activation
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    layer_1 = tf.nn.relu(layer_1)
    # Hidden layer with RELU activation
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    layer_2 = tf.nn.relu(layer_2)
    # Output layer with linear activation
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer
def scaled_dotproduct_attention(queries,
                                keys,
                                num_unit=None,
                                num_heads=0,
                                dropout_rate=0,
                                is_tranining=True,
                                causality=False,
                                scope="scaled_att",
                                reuse=None):
    with tf.variable_scope(scope, reuse=reuse):
        if num_unit is None:
            num_unit = queries.get_shape().as_list[-1]
        # 线性变换
        Q = tf.layers.dense(queries, num_unit, activation=tf.nn.relu)
        K = tf.layers.dense(keys, num_unit, activation=tf.nn.relu)
        V = tf.layers.dense(keys, num_unit, activation=tf.nn.relu)

        outputs = tf.matmul(Q, tf.transpose(K, [0, 2, 1]))
        outputs = outputs / (K.get_shape().as_list()[-1]**0.5)

        # 对填充的部分进行mask,这些位置att score变得极小,
        key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1)))
        key_masks = tf.tile(tf.expand_dims(key_masks, 1),
                            [1, tf.shape(queries)[1], 1])

        paddings = tf.ones_like(outputs) * (-2**32 + 1)
        outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs)

        # 一个mask操作,对模型屏蔽未来信息
        if causality:
            diag_vals = tf.ones_like(outputs[0, :, :])
            tril = tf.contrib.linalg.LinearOperatorTril(diag_vals).to_dense()
            masks = tf.tile(tf.expand_dims(tril, 0),
                            [tf.shape(outputs)[0], 1, 1])

            paddings = tf.ones_like(masks) * (-2**32 + 1)
            outputs = tf.where(tf.equal(masks, 0), paddings, outputs)
        outputs = tf.nn.softmax(outputs)
        # Query mask
        query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1)))
        query_masks = tf.tile(tf.expand_dims(query_masks, -1),
                              [1, 1, tf.shape(keys)[1]])
        outputs *= query_masks
        outputs = tf.layers.dropout(
            outputs,
            rate=dropout_rate,
            training=tf.convert_to_tensor(is_tranining))
        # 加权平均
        outputs = tf.matmul(outputs, V)
        #
        outputs += queries
        outputs = normalize(outputs)
    return outputs
Ejemplo n.º 6
0
    def forward(self,examples,labels):
        """建立前向传播图"""
        opts = self._options
        # 声明所有需要的变量
        # embeddings :[vocab-size,emb_size]
        init_width = 0.5 / opts.emb_dim

        emb = tf.Variable(
            tf.random_uniform([opts.vocab_size,opts.emb_dim], -init_width,init_width),name = "emb")
        self._emb = emb

        # softmax_weights:[vocab_size,emb_dim]
        sm_w_t = tf.Variable(
            tf.zeros([opts.vocab_size,opts.emb_dim]),name="sm_w_t")
        # softmax bias:[emd_dim]
        sm_b = tf.Variable(
            tf.zeros([opts.vocab_size]),name="sm_b")

        # global step:scalar
        self.global_step = tf.Variable(0,name="global_step")

        # 候选采样计算nce loss的节点
        labels_matrix = tf.reshape(
            tf.cast(labels,dtype=tf.int64),[opts.batch_size,1])
        # 负采样
        sampled_ids, _,_ = (tf.nn.fixed_unigram_candidate_sampler(
            true_classes=labels_matrix,
            num_true=1,
            num_sampled=opts.num_samples,
            unique=True,
            range_max=opts.vocab_size,
            distortion=0.75,
            unigrams=opts.vocab_counts.tolist()))

        # 样本的嵌入:[batch_size,emb_dim]
        example_emb = tf.nn.embedding_lookup(emb,examples)

        # 标签的权重w:[batch_size,emb_dim]
        true_w = tf.nn.embedding_lookup(sm_w_t,labels)
        # 标签的偏差b :[batch_size,1]
        true_b = tf.nn.embedding_lookup(sm_b,labels)

        # 采样样本的ids的权重(Weights for sampled ids):[num_sampled,emb_dim]
        sampled_w = tf.nn.embedding_lookup(sm_w_t, sampled_ids)
        # 采样样本的 bias :[num_sampled,1]
        sampled_b = tf.nn.embedding_lookup(sm_b,sampled_ids)

        # True logits:[batch_size,1]
        true_logits = tf.reduce_sum(tf.multiply(example_emb,true_w),1) + true_b

        # 采样样本预测值 sampled logits:[batch_size,num_sampled]
        sampled_b_vec = tf.reshape(sampled_b,[opts.num_samples])
        sampled_logits = tf.matmul(example_emb,
                                   sampled_w,
                                   transpose_b=True) + sampled_b_vec
        return true_logits,sampled_logits
Ejemplo n.º 7
0
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size  # batch_size
        self.num_steps = num_steps = config.num_steps  #
        size = config.hidden_size  # 隐藏层
        vocab_size = config.vocab_size  # 词表size
        # 输入占位符
        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        outputs = []
        states = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0:
                    tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)
                states.append(state)

        output = tf.reshape(tf.concat(outputs, 1), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])], vocab_size)
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = states[-1]

        if not is_training:
            return
        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
Ejemplo n.º 8
0
def RNN(x, weights, biases):
    # 首先需要把原数据的shape转换为rnn的输入,当前的输入shape是[batch_size,timesteps,n_inputs]
    # 需要的输入shape是 ‘timesteps’ tensor 的(batch_size,n_input)的list
    # 开始
    x = tf.unstack(x, timesteps, 1)

    # Define 一个lstm cell
    lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    # 获取lstm cell的输出
    outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32)
    # 线性激活,使用RNN内循环最后一个输出
    return tf.matmul(outputs[-1], weights['out']) + biases['out']
Ejemplo n.º 9
0
    def build_eval_graph(self):
        """Build the eval graph."""
        # Eval graph
        # Each analogy task is to predict the 4th word (d) given three
        # words: a, b, c.  E.g., a=italy, b=rome, c=france, we should
        # predict d=paris.
        # The eval feeds three vectors of word ids for a, b, c, each of
        # which is of size N, where N is the number of analogies we want to
        # evaluate in one batch.
        analogy_a = tf.placeholder(dtype=tf.int32)
        analogy_b = tf.placeholder(dtype=tf.int32)
        analogy_c = tf.placeholder(dtype=tf.int32)

        # 维度为[vocab_size,emb_dim]的正则化词向量
        nemb = tf.nn.l2_normalize(self._emb,1)

        a_emb = tf.gather(nemb,analogy_a)
        b_emb = tf.gather(nemb,analogy_b)
        c_emb = tf.gather(nemb,analogy_c)

        target = c_emb + (b_emb-a_emb)

        dist = tf.matmul(target,nemb,transpose_b=True)

        _,pred_idx = tf.nn.top_k(dist,4)

        nearby_word = tf.placeholder(dtype=tf.int32) # word id
        nearby_emb = tf.gather(nemb,nearby_word)
        nearby_dist = tf.matmul(nearby_emb,nemb,transpose_b=True)
        nearby_val,nearby_idx = tf.nn.top_k(
            nearby_dist,min(1000,self._options.vocab_size))

        self._analogy_a = analogy_a
        self._analogy_b = analogy_b
        self._analogy_c = analogy_c
        self._analogy_pred_idx = pred_idx
        self._nearby_word = nearby_word
        self._nearby_val = nearby_val
        self._nearby_idx = nearby_idx
Ejemplo n.º 10
0
def inference(images, hidden1_units, hidden2_units):
    """Build the MNIST model up to where it may be used for inference.

  Args:
    images: Images placeholder, from inputs().图像占位符,输入
    hidden1_units: Size of the first hidden layer.第一个隐藏层
    hidden2_units: Size of the second hidden layer.

  Returns:
    softmax_linear: Output tensor with the computed logits.
  """
    # Hidden 1 tf.name_scope
    with tf.name_scope('hidden1'):
        weights = tf.Variable(
            # tf.truncated_normal(shape,mean,stddev)#shape表示生成Tensor的维度,mean是均值,stddev是标准差
            # 这个函数产生正太分布,均值和标准差自己设定。这是一个截断的产生正太分布的函数,就是说产生正太分布的值如果与均值的差值大于两倍的标准差,那就重新生成
            tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
                                stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
            name='weights')
        biases = tf.Variable(tf.zeros([hidden1_units]), name='biases')
        hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
    # Hidden 2
    with tf.name_scope('hidden2'):
        weights = tf.Variable(tf.truncated_normal(
            [hidden1_units, hidden2_units],
            stddev=1.0 / math.sqrt(float(hidden1_units))),
                              name='weights')
        biases = tf.Variable(tf.zeros([hidden2_units]), name='biases')
        hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
    # Linear
    with tf.name_scope('softmax_linear'):
        weights = tf.Variable(tf.truncated_normal(
            [hidden2_units, NUM_CLASSES],
            stddev=1.0 / math.sqrt(float(hidden2_units))),
                              name='weights')
        biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases')
        logits = tf.matmul(hidden2, weights) + biases
    return logits
def conv_network(x, weights, biases, dropout):
    # mnist是1-D的784维的向量,reshape维度为[Height*Width*depth]
    # Tensor变成4-D的向量,即[batch_size,height,width,depth]
    x = tf.reshape(x, shape=[-1, 28, 28, 1])

    # j卷积层
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    # max pooling
    conv1 = maxpool2d(conv1, k=2)

    # 卷积层
    conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
    conv2 = maxpool2d(conv2, k=2)

    # 全连接层
    # 把conv2的维度reshape成全连接层的输入,拉平
    fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)

    # Dropout
    fc1 = tf.nn.dropout(fc1, dropout)
    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out
Ejemplo n.º 12
0
def BiRNN(x, weights, biases):
    x = tf.unstack(x, timesteps, 1)
    lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0)

    try:
        outputs, _, _ = rnn.stack_bidirectional_rnn(lstm_fw_cell,
                                                    lstm_bw_cell,
                                                    x,
                                                    dtype=tf.float32)
    except Exception:
        outputs = rnn.stack_bidirectional_rnn(lstm_fw_cell,
                                              lstm_bw_cell,
                                              x,
                                              dtype=tf.float32)

    return tf.matmul(outputs[-1], weights['out']) + biases['out']
Ejemplo n.º 13
0
def dynamicRNN(x, seqlen, weights, biases):

    x = tf.unstack(x, seq_max_len, 1)

    # 定义lstm cell
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden)

    outputs, states = tf.contrib.rnn.static_rnn(lstm_cell,
                                                x,
                                                dtype=tf.float32,
                                                sequence_length=seqlen)
    # 执行动态计算的时候,必须检索最后一个动态计算的输出,如果序列长度为10 ,需要检索第10个输出。
    # 所以自定义一个OP,针对每个样本的batchsize,获取其长度并且获得相应的输出。
    # outputs 是每个timesteps的输出列表,打包成[batch_size,n_step,n_inputs]
    outputs = tf.stack(outputs)
    outputs = tf.transpose(outputs, [1, 0, 2])

    batch_size = tf.shape(outputs)[0]
    # 每个样本的起始索引
    index = tf.range(0, batch_size) * seq_max_len + (seqlen - 1)

    outputs = tf.gather(tf.reshape(outputs, [-1, n_hidden]), index)

    return tf.matmul(outputs, weights['out']) + biases['out']
with tf.Session() as sess:
    print("Add constant: %i" % sess.run(a+b))
    print("multy constant: %i" % sess.run(a*b))

# 变量表示方式
a = tf.placeholder(tf.int16)
b = tf.placeholder(tf.int16)

add = tf.add(a,b)
mul = tf.multiply(a,b)

with tf.Session() as sess:
    print("add with variables: %i" % sess.run(add,feed_dict={a:2,b:3}))
    print("multi with variables: %i" % sess.run(mul,feed_dict={a:3,b:4}))

# 矩阵乘法的计算方法
matrix1 = tf.constant([[3.,3.]])
matrix2 = tf.constant([[2.],[2.]])

product = tf.matmul(matrix1,matrix2)

with tf.Session() as sess:
    res = sess.run(product)
    print(res)






Ejemplo n.º 15
0
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)

# 第二层卷积
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])

h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)

# dense全连接
w_fc1 = weight_variable([7 * 7 * 64, 1024])
b_fc1 = bias_variable([1024])

h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
# dropout
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# 输出层
w_fc2 = weight_variable([1024, 10])
b_fc2 = bias_variable([10])
# softmax
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, w_fc2) + b_fc2)
# 定义损失函数
cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv))
# 确定优化方法
train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
# 计算损失函数和预测是否相等
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
Ejemplo n.º 16
0
def matpow(M, n):
    if n < 1:
        return M
    else:
        return tf.matmul(M, matpow(M, n - 1))
Ejemplo n.º 17
0
                   biases=nce_biases,
                   labels=Y,
                   inputs=X_embed,
                   num_sampled=num_sampled,
                   num_classes=vocab_size))

# 定义优化器,优化器的作用是求导,反向传播。
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss_op)

# 验证/评价
# 计算输入数据embedding 和每个embedding向量的cosine相似度
X_embed_norm = X_embed / tf.sqrt(tf.reduce_sum(tf.square(X_embed)))
embedding_norm = embedding / tf.sqrt(
    tf.reduce_sum(tf.square(embedding), 1, keep_dims=True))
cosine_sim_op = tf.matmul(X_embed_norm, embedding_norm, transpose_b=True)

init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    x_test = np.array([word2id[w] for w in eval_words])

    average_loss = 0

    for step in range(1, num_steps + 1):
        batch_x, batch_y = next_batch(batch_size, num_skips, skip_window)
        # 训练
        _, loss = sess.run([train_op, loss_op],
                           feed_dict={
Ejemplo n.º 18
0
import tensorflower as tf
import tensorflower.contrib.eager as tfe

# set eager api
print("set eager mode..")
tfe.enable_eager_execution()

# 定义常量单元
print("define constant tensors..")
a = tf.constant(2)
b = tf.constant(3)

print("Running oprations without Session")
c = a + b
print("a+b= %i" % c)
d = a * b
print("a*b=%i" % d)

print("mixing op with Tensors and Numpy arrays")
a = tf.constant([[2., 1.], [1., 0.]], dtype=np.float32)
b = np.array([[3., 0.], [5., 1.]], dtype=np.float32)
c = a + b
print("a+b=%s" % c)
d = tf.matmul(a, b)
print("a*b=%s" % d)

print("Iterate through Tensor 'a'")
for i in range(a.shape[0]):
    for j in range(a.shape[1]):
        print(a[i][j])
Ejemplo n.º 19
0
def word2vec_basic(log_dir):
    # 创建tensorboard的可视化目录
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    # 第一步,下载数据
    url = 'http://mattmahoney.net/dc/'

    def maybe_download(filename, expected_bytes, sha256=None):
        local_filename = os.path.join(gettempdir(), filename)
        if not os.path.exists(local_filename):
            local_filename, _ = urllib.request.urlretrieve(
                url + filename, local_filename)
        statinfo = os.stat(local_filename)

        if sha256 and _hash_file(local_filename) != sha256:
            raise Exception('Failed to verify ' + local_filename +
                            ' due to hash '
                            'mismatch. Can you get to it with a browser?')

        if statinfo.st_size == expected_bytes:
            print("found and verified", filename)
        else:
            print(statinfo.st_size)
            raise Exception('Failed to verify ' + local_filename +
                            '. Can you get to it with a browser?')
        return local_filename

    filename = maybe_download(
        'text8.zip',
        31344016,
        sha256=
        'a6640522afe85d1963ad56c05b0ede0a0c000dddc9671758a6cc09b7a38e5232')

    # 数据转为List<String>
    def read_data(filename):
        with zipfile.ZipFile(filename) as f:
            data = tf.compat.as_str(f.read(f.namelist()[0])).split()
        return data

    vocabulary = read_data(filename)
    print('data_size', len(vocabulary))

    # 第二步,建词典并且把罕见词替换成UNK
    vocabulary_size = 50000

    def build_dataset(words, n_words):

        count = [['UNK', -1]]
        count.extend(collections.Counter(words).most_common(n_words - 1))
        dictionary = {word: index for index, (word, _) in enumerate(count)}
        data = []
        unk_count = 0
        for word in words:
            index = dictionary.get(word, 0)
            if index == 0:  # dictionary['UNK']
                unk_count += 1
            data.append(index)
        count[0][1] = unk_count
        reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys()))
        return data, count, dictionary, reversed_dictionary

    # data: 词表中的所有的词的id
    # count: 单词和出现次数的map
    # dictionary: 单词-->index 的映射
    # reverse_dictionary:index -->单词
    data, count, dictionary, reversed_dictionary = build_dataset(
        vocabulary, vocabulary_size)
    del vocabulary
    print('Most common words (+UNK)', count[:5])
    print('Sample data', data[:10],
          [reversed_dictionary[i] for i in data[:10]])

    # 针对skip-gram模型生成batch数据
    def generate_batch(batch_size, num_skips, skip_window):
        global data_index
        assert batch_size % num_skips == 0
        assert num_skips <= 2 * skip_window
        batch = np.ndarray(shape=(batch_size), dtype=np.int32)
        labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
        # skip的范围
        span = 2 * skip_window + 1
        buffer = collections.deque(maxlen=span)
        if data_index + span > len(data):
            data_index = 0
        buffer.extend(data[data_index:data_index + span])  # 向后取一个窗口内的结果
        data_index += span
        for i in range(batch_size // num_skips):
            context_words = [w for w in range(span) if w != skip_window]
            words_to_use = random.sample(context_words, num_skips)
            for j, context_words in enumerate(words_to_use):
                batch[i * num_skips + j] = buffer[skip_window]
                labels[i * num_skips + j, 0] = buffer[context_words]
            if data_index == len(data):
                buffer.extend(data[0:span])
                data_index = span
            else:
                buffer.append(data[data_index])
                data_index += 1
        # Backtrack a little bit to avoid skipping words in the end of a batch
        data_index = (data_index - span) % len(data)
        return batch, labels

    batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1)
    for i in range(8):
        print(batch[i], reversed_dictionary[batch[i]], '->', labels[i, 0],
              reversed_dictionary[labels[i, 0]])

    # 建立并且训练模型

    batch_size = 128
    embedding_size = 128  # 词向量维度
    skip_window = 1  # 考虑左右几个单词
    num_skips = 2  # 复用输入生成标签的次数
    num_sampled = 64  # 负样本数量

    # 采样一个样本的近邻作为随机验证机,将验证集样本限制为 较低id的单词,是比较高频的构造词汇
    # 这三个变量用作显示模型准确率,不影响计算。
    valid_size = 16  # 用于评估相似性的随机单词集合
    valid_window = 100  #
    valid_examples = np.random.choice(valid_window, valid_size, replace=False)

    graph = tf.Graph()

    with graph.as_default():

        # 输入数据
        with tf.name_scope('input'):
            train_inputs = tf.placeholder(tf.int32, shape=[batch_size])
            train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1])
            valid_dataset = tf.constant(valid_examples, dtype=tf.int32)

        # 操作op和变量variables 固定在CPU上。
        with tf.device('/cpu:0'):
            with tf.name_scope('embeddings'):
                embeddings = tf.Variable(
                    tf.random_uniform([vocabulary_size, embedding_size], -1.0,
                                      1.0))
                embed = tf.nn.embedding_lookup(embeddings, train_inputs)

            # 构造NCE损失的变量
            with tf.name_scope('weights'):
                nce_weights = tf.Variable(
                    tf.truncated_normal([vocabulary_size, embedding_size],
                                        stddev=1.0 /
                                        math.sqrt(embedding_size)))

            with tf.name_scope('biases'):
                nce_biases = tf.Variable(tf.zeros([vocabulary_size]))

        # 计算该批次的平均nce损失,当评估损失的时候,自动绘制一个新的负样本。
        with tf.name_scope('loss'):
            loss = tf.reduce_mean(
                tf.nn.nce_loss(weights=nce_weights,
                               biases=nce_biases,
                               labels=train_labels,
                               inputs=embed,
                               num_sampled=num_sampled,
                               num_classes=vocabulary_size))
        # 汇总损失
        tf.summary.scalar('loss', loss)

        # 构造SGD
        with tf.name_scope('opytimizer'):
            optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)
        # 计算小批次样本和所有样本之间的余弦相似度
        norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True))
        normalized_embeddings = embeddings / norm
        valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings,
                                                  valid_dataset)
        similarity = tf.matmul(valid_embeddings,
                               normalized_embeddings,
                               transpose_b=True)

        # merge all summary
        merged = tf.summary.merge_all()

        init = tf.global_variables_initializer()

        saver = tf.train.Saver()

    # 开始训练
    num_steps = 1000001

    with tf.compat.v1.Session(graph=graph) as session:
        # 写入摘要
        writer = tf.summary.FileWriter(log_dir, session.graph)

        init.run()
        print('inited..')
        average_loss = 0
        for step in range(num_steps):
            batch_inputs, batch_labels = generate_batch(
                batch_size, num_skips, skip_window)
            feed_dict = {
                train_inputs: batch_inputs,
                train_labels: batch_labels
            }
            # 定义元变量
            run_metadata = tf.RunMetadata()

            _, summary, loss_val = session.run([optimizer, merged, loss],
                                               feed_dict=feed_dict,
                                               run_metadata=run_metadata)
            average_loss += loss_val

            writer.add_summary(summary, step)

            if step == (num_steps - 1):
                writer.add_run_metadata(run_metadata, 'step%d' % step)

            if step % 2000 == 0:
                if step > 0:
                    average_loss /= 2000
                    # 平均损失是对最近的2000个批次样本的估计。
                print('Average loss at step ', step, ': ', average_loss)
                average_loss = 0

            if step % 10000 == 0:
                sim = similarity.eval()
                for i in range(valid_size):
                    valid_word = reversed_dictionary[valid_examples[i]]
                    top_k = 8
                    nearest = (-sim[i, :]).argsort()[1:top_k + 1]
                    log_str = 'Nearest to %s:' % valid_word

                    print(
                        log_str, ', '.join([
                            reversed_dictionary[nearest[k]]
                            for k in range(top_k)
                        ]))
        final_embeddings = normalized_embeddings.eval()

        # 写下embedding的相应标签
        with open(log_dir + '/metadata.tsv', 'w') as f:
            for i in range(vocabulary_size):
                f.write(reversed_dictionary[i] + '\n')

        # 保存checkpoint
        saver.save(session, os.path.join(log_dir, 'model.ckpt'))

        # 配置Tensorboard
        config = projector.ProjectorConfig()
        embedding_conf = config.embeddings.add()
        embedding_conf.tensor_name = embeddings.name
        embedding_conf.metadata_path = os.path.join(log_dir, 'metadata.tsv')
        projector.visualize_embeddings(writer, config)
    writer.close()

    # Step 6: Visualize the embeddings.

    # pylint: disable=missing-docstring
    # Function to draw visualization of distance between embeddings.
    def plot_with_labels(low_dim_embs, labels, filename):
        assert low_dim_embs.shape[0] >= len(
            labels), 'More labels than embeddings'
        plt.figure(figsize=(18, 18))  # in inches
        for i, label in enumerate(labels):
            x, y = low_dim_embs[i, :]
            plt.scatter(x, y)
            plt.annotate(label,
                         xy=(x, y),
                         xytext=(5, 2),
                         textcoords='offset points',
                         ha='right',
                         va='bottom')

        plt.savefig(filename)

    try:
        # pylint: disable=g-import-not-at-top
        from sklearn.manifold import TSNE
        import matplotlib.pyplot as plt

        tsne = TSNE(perplexity=30,
                    n_components=2,
                    init='pca',
                    n_iter=5000,
                    method='exact')
        plot_only = 500
        low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :])
        labels = [reversed_dictionary[i] for i in xrange(plot_only)]
        plot_with_labels(low_dim_embs, labels,
                         os.path.join(gettempdir(), 'tsne.png'))

    except ImportError as ex:
        print(
            'Please install sklearn, matplotlib, and scipy to show embeddings.'
        )
        print(ex)
Ejemplo n.º 20
0
# paramters
learning_rate = 0.01
train_epochs = 25
batch_size = 100
display_step = 1

# 定义placeholder
x = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

# weights bias
W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))

pred = tf.nn.softmax(tf.matmul(x, W) + b)

# 最小化交叉熵
cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1))
# 梯度下降
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

# 初始化所有参数
init = tf.global_variables_initializer()

with tf.Session() as sess:
    # 运行初始化
    sess.run(init)

    for epoch in range(train_epochs):
        avg_cost = 0.
def logistic_regerssion(inputs):
    return tf.matmul(inputs,W) + b
import tensorflower as tf
from tensorflow.examples.tutorials.mnist import input_data

print("开始下载数据集..")
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
print("下载完毕..")
sess = tf.InteractiveSession()
# 该函数可以更加灵活的构建代码,可以在运行计算的图的时候通过operation操作插入一些计算图。

x = tf.placeholder("float", shape=[None, 784])
y_ = tf.placeholder("float", shape=[None, 10])  # 占位符

W = tf.Variable(tf.zeros([784, 10]))
b = tf.Variable(tf.zeros([10]))  # 变量,跟占位符一样作为额外的输入量
sess.run(tf.initialize_all_variables())
y = tf.nn.softmax(tf.matmul(x, W) + b)  # 使用softmax计算每个分类的概率

cross_entropy = -tf.reduce_sum(y_ * tf.log(y))  # 交叉熵

train_step = tf.train.GradientDescentOptimizer(0.01).minimize(
    cross_entropy)  # 训练使用最小梯度下降,且最小化交叉熵loss
init = tf.global_variables_initializer()
for i in range(1000):
    batch = mnist.train.next_batch(50)  # load  mini-batchsize dataset
    train_step.run(feed_dict={x: batch[0], y_: batch[1]})
print("训练结束..")
"""
这段表达特别好:tf.argmax 是一个非常有用的函数,它能给出某个tensor对象在某一维上的其数据最大值所在的索引值。
由于标签向量是由0,1组成,因此最大值1所在的索引位置就是类别标签,比如tf.argmax(y,1)返回的是模型对于任一输入x预测到的标签值,
而 tf.argmax(y_,1) 代表正确的标签,我们可以用 tf.equal 来检测我们的预测是否真实标签匹配(索引位置一样表示匹配)。
"""
Ejemplo n.º 23
0
def inference(images):

    # 构造模型
    # 卷积层1
    with tf.variable_scope('conv1') as scope:
        kernel = _variable_with_weight_decay('weights',
                                             shape=[5, 5, 3, 64],
                                             stddev=1e-4,
                                             wd=0.0)
        conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
        biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
        bias = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(bias, name=scope.name)
        _activation_summary(conv1)
    # 池化层1
    pool1 = tf.nn.max_pool(conv1,
                           ksize=[1, 3, 3, 1],
                           strides=[1, 2, 2, 1],
                           padding='SAME',
                           name="pool1")
    # 正则化
    norm1 = tf.nn.lrn(pool1,
                      4,
                      bias=1.0,
                      alpha=0.001 / 9.0,
                      beta=0.75,
                      name='norm1')

    # 卷积层2
    with tf.variable_scope('conv2') as scope:
        kernel = _variable_with_weight_decay('weights',
                                             shape=[5, 5, 64, 64],
                                             stddev=1e-4,
                                             wd=0.0)
        conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
        biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
        bias = tf.nn.bias_add(conv, biases)
        conv2 = tf.nn.relu(bias, name=scope.name)
        _activation_summary(conv2)
    # 正则化2
    norm2 = tf.nn.lrn(conv2,
                      4,
                      bias=1.0,
                      alpha=0.001 / 9.0,
                      beta=0.75,
                      name='norm2')
    # 池化层2
    pool2 = tf.nn.max_pool(norm2,
                           ksize=[1, 3, 3, 1],
                           strides=[1, 2, 2, 1],
                           padding='SAME',
                           name='pool2')

    # 线性修正的全连接层,拉平全连接层
    with tf.variable_scope('local3') as scope:
        dim = 1
        # 把 上一层输出的形状拉平
        for d in pool2.get_shape()[1:].as_list():
            dim *= d
        reshape = tf.reshape(pool2, [FLAGS.batch_size, dim])
        weights = _variable_with_weight_decay('weights',
                                              shape=[dim, 384],
                                              stddev=0.04,
                                              wd=0.004)
        biases = _variable_on_cpu('biases', [384],
                                  tf.constant_initializer(0.1))

        local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases,
                            name=scope.name)
        _activation_summary(local3)
    # 线性修正的全连接层。
    with tf.variable_scope('local4') as scope:
        weights = _variable_with_weight_decay('weights',
                                              shape=[384, 192],
                                              stddev=0.04,
                                              wd=0.004)
        biases = _variable_on_cpu('biases', [192],
                                  tf.constant_initializer(0.1))
        local4 = tf.nn.relu(tf.matmul(local3, weights) + biases,
                            name=scope.name)
        _activation_summary(local4)

    # softmax层
    with tf.variable_scope('softmax_linear') as scope:
        weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES],
                                              stddev=1 / 192.0,
                                              wd=0.0)
        biases = _variable_on_cpu('biases', [NUM_CLASSES],
                                  tf.constant_initializer(0.0))
        softmax_linear = tf.add(tf.matmul(local4, weights),
                                biases,
                                name=scope.name)
        _activation_summary(softmax_linear)

    return softmax_linear
def multihead_attention(queries,
                        keys,
                        num_units=None,
                        num_heads=0,
                        dropout_rate=0,
                        is_training=True,
                        causality=False,
                        scope="multihead_attention",
                        reuse=None):
    with tf.variable_scope(scope, reuse=reuse):
        if num_units is None:
            num_units = queries.get_shape().as_list()[-1]

        # linear projection
        Q = tf.layers.dense(queries, num_units, activation=tf.nn.relu)
        K = tf.layers.dense(keys, num_units, activation=tf.nn.relu)
        V = tf.layers.dense(keys, num_units, activation=tf.nn.relu)
        # split and concat
        Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0)
        K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0)
        V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0)

        outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1]))
        outputs = outputs / (K_.get_shape().as_list()[-1]**0.5)
        # mask
        key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1)))
        key_masks = tf.tile(key_masks, [num_heads, 1])
        key_masks = tf.tile(tf.expand_dims(key_masks, 1),
                            [1, tf.shape(queries)[1], 1])

        paddings = tf.ones_like(outputs) * (-2**32 + 1)
        outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs)

        # masked from future
        if causality:
            diag_vals = tf.ones_like(outputs[0, :, :])
            tril = tf.contrib.linalg.LinearOperatorTril(diag_vals).to_dense()
            masks = tf.tile(tf.expand_dims(tril, 0),
                            [tf.shape(outputs)[0], 1, 1])

            paddings = tf.ones_like(masks) * (-2**32 + 1)
            outputs = tf.where(tf.equal(masks, 0), paddings, outputs)
        outputs = tf.nn.softmax(outputs)

        # query mask
        query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1)))
        query_masks = tf.tile(query_masks, [num_heads, 1])
        query_masks = tf.tile(tf.expand_dims(query_masks, -1),
                              [1, 1, tf.shape(keys)[1]])
        outputs *= query_masks

        outputs = tf.layers.dropout(outputs,
                                    rate=dropout_rate,
                                    training=tf.convert_to_tensor(is_training))

        outputs = tf.matmul(outputs, V_)
        # restore shape
        outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2)

        outputs += queries
        outputs = normalize(outputs)
    return outputs