def inference(images, hidden1_units, hidden2_units): # 第一层隐藏层 with tf.compat.v1.name_scope('hidden1'): weights = tf.Variable( tf.random.truncated_normal([IMAGE_PIXELS, hidden1_units], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS)), name='weights')) biases = tf.Variable(tf.zeros([hidden1_units]), name='biases') hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases) # 第二层隐藏层 with tf.compat.v1.name_scope('hidden2'): weights = tf.Variable( tf.random.truncated_normal([hidden1_units, hidden2_units], stddev=1.0 / math.sqrt(float(hidden1_units)), name='weights')) biases = tf.Variable(tf.zeros([hidden2_units]), name='biases') hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases) # 线性层,softmax with tf.compat.v1.name_scope('softmax_linear'): weights = tf.Variable(tf.random.truncated_normal( [hidden2_units, NUM_CLASSES], stddev=1.0 / math.sqrt(float(hidden2_units))), name='weights') biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases') logits = tf.matmul(hidden2, weights) + biases return logits
def encoder(x): layer_1 = tf.nn.sigmoid( tf.add(tf.matmul(x, weights['encoder_h1']), biases['encoder_b1'])) layer_2 = tf.nn.sigmoid( tf.add(tf.matmul(layer_1, weights['encoder_h2']), biases['encoder_b2'])) return layer_2
def neural_network(x): # 隐藏全连接层 layer_1 = tf.add(tf.matmul(x,weights['h1']),biases['b1']) # 隐藏层-- 第二层 layer_2 = tf.add(tf.matmul(layer_1,weights['h2']),biases['b2']) # 输出层 out_layers = tf.matmul(layer_2,weights['out']) + biases['out'] return out_layers
def multilayer_perceptron(x, weights, biases): # Hidden layer with RELU activation layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1']) layer_1 = tf.nn.relu(layer_1) # Hidden layer with RELU activation layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']) layer_2 = tf.nn.relu(layer_2) # Output layer with linear activation out_layer = tf.matmul(layer_2, weights['out']) + biases['out'] return out_layer
def scaled_dotproduct_attention(queries, keys, num_unit=None, num_heads=0, dropout_rate=0, is_tranining=True, causality=False, scope="scaled_att", reuse=None): with tf.variable_scope(scope, reuse=reuse): if num_unit is None: num_unit = queries.get_shape().as_list[-1] # 线性变换 Q = tf.layers.dense(queries, num_unit, activation=tf.nn.relu) K = tf.layers.dense(keys, num_unit, activation=tf.nn.relu) V = tf.layers.dense(keys, num_unit, activation=tf.nn.relu) outputs = tf.matmul(Q, tf.transpose(K, [0, 2, 1])) outputs = outputs / (K.get_shape().as_list()[-1]**0.5) # 对填充的部分进行mask,这些位置att score变得极小, key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1))) key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1]) paddings = tf.ones_like(outputs) * (-2**32 + 1) outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs) # 一个mask操作,对模型屏蔽未来信息 if causality: diag_vals = tf.ones_like(outputs[0, :, :]) tril = tf.contrib.linalg.LinearOperatorTril(diag_vals).to_dense() masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1]) paddings = tf.ones_like(masks) * (-2**32 + 1) outputs = tf.where(tf.equal(masks, 0), paddings, outputs) outputs = tf.nn.softmax(outputs) # Query mask query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1))) query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]]) outputs *= query_masks outputs = tf.layers.dropout( outputs, rate=dropout_rate, training=tf.convert_to_tensor(is_tranining)) # 加权平均 outputs = tf.matmul(outputs, V) # outputs += queries outputs = normalize(outputs) return outputs
def forward(self,examples,labels): """建立前向传播图""" opts = self._options # 声明所有需要的变量 # embeddings :[vocab-size,emb_size] init_width = 0.5 / opts.emb_dim emb = tf.Variable( tf.random_uniform([opts.vocab_size,opts.emb_dim], -init_width,init_width),name = "emb") self._emb = emb # softmax_weights:[vocab_size,emb_dim] sm_w_t = tf.Variable( tf.zeros([opts.vocab_size,opts.emb_dim]),name="sm_w_t") # softmax bias:[emd_dim] sm_b = tf.Variable( tf.zeros([opts.vocab_size]),name="sm_b") # global step:scalar self.global_step = tf.Variable(0,name="global_step") # 候选采样计算nce loss的节点 labels_matrix = tf.reshape( tf.cast(labels,dtype=tf.int64),[opts.batch_size,1]) # 负采样 sampled_ids, _,_ = (tf.nn.fixed_unigram_candidate_sampler( true_classes=labels_matrix, num_true=1, num_sampled=opts.num_samples, unique=True, range_max=opts.vocab_size, distortion=0.75, unigrams=opts.vocab_counts.tolist())) # 样本的嵌入:[batch_size,emb_dim] example_emb = tf.nn.embedding_lookup(emb,examples) # 标签的权重w:[batch_size,emb_dim] true_w = tf.nn.embedding_lookup(sm_w_t,labels) # 标签的偏差b :[batch_size,1] true_b = tf.nn.embedding_lookup(sm_b,labels) # 采样样本的ids的权重(Weights for sampled ids):[num_sampled,emb_dim] sampled_w = tf.nn.embedding_lookup(sm_w_t, sampled_ids) # 采样样本的 bias :[num_sampled,1] sampled_b = tf.nn.embedding_lookup(sm_b,sampled_ids) # True logits:[batch_size,1] true_logits = tf.reduce_sum(tf.multiply(example_emb,true_w),1) + true_b # 采样样本预测值 sampled logits:[batch_size,num_sampled] sampled_b_vec = tf.reshape(sampled_b,[opts.num_samples]) sampled_logits = tf.matmul(example_emb, sampled_w, transpose_b=True) + sampled_b_vec return true_logits,sampled_logits
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size # batch_size self.num_steps = num_steps = config.num_steps # size = config.hidden_size # 隐藏层 vocab_size = config.vocab_size # 词表size # 输入占位符 self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) if is_training and config.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, size]) inputs = tf.nn.embedding_lookup(embedding, self._input_data) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) outputs = [] states = [] state = self._initial_state with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) states.append(state) output = tf.reshape(tf.concat(outputs, 1), [-1, size]) softmax_w = tf.get_variable("softmax_w", [size, vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) logits = tf.matmul(output, softmax_w) + softmax_b loss = tf.contrib.legacy_seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])], vocab_size) self._cost = cost = tf.reduce_sum(loss) / batch_size self._final_state = states[-1] if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def RNN(x, weights, biases): # 首先需要把原数据的shape转换为rnn的输入,当前的输入shape是[batch_size,timesteps,n_inputs] # 需要的输入shape是 ‘timesteps’ tensor 的(batch_size,n_input)的list # 开始 x = tf.unstack(x, timesteps, 1) # Define 一个lstm cell lstm_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) # 获取lstm cell的输出 outputs, states = rnn.static_rnn(lstm_cell, x, dtype=tf.float32) # 线性激活,使用RNN内循环最后一个输出 return tf.matmul(outputs[-1], weights['out']) + biases['out']
def build_eval_graph(self): """Build the eval graph.""" # Eval graph # Each analogy task is to predict the 4th word (d) given three # words: a, b, c. E.g., a=italy, b=rome, c=france, we should # predict d=paris. # The eval feeds three vectors of word ids for a, b, c, each of # which is of size N, where N is the number of analogies we want to # evaluate in one batch. analogy_a = tf.placeholder(dtype=tf.int32) analogy_b = tf.placeholder(dtype=tf.int32) analogy_c = tf.placeholder(dtype=tf.int32) # 维度为[vocab_size,emb_dim]的正则化词向量 nemb = tf.nn.l2_normalize(self._emb,1) a_emb = tf.gather(nemb,analogy_a) b_emb = tf.gather(nemb,analogy_b) c_emb = tf.gather(nemb,analogy_c) target = c_emb + (b_emb-a_emb) dist = tf.matmul(target,nemb,transpose_b=True) _,pred_idx = tf.nn.top_k(dist,4) nearby_word = tf.placeholder(dtype=tf.int32) # word id nearby_emb = tf.gather(nemb,nearby_word) nearby_dist = tf.matmul(nearby_emb,nemb,transpose_b=True) nearby_val,nearby_idx = tf.nn.top_k( nearby_dist,min(1000,self._options.vocab_size)) self._analogy_a = analogy_a self._analogy_b = analogy_b self._analogy_c = analogy_c self._analogy_pred_idx = pred_idx self._nearby_word = nearby_word self._nearby_val = nearby_val self._nearby_idx = nearby_idx
def inference(images, hidden1_units, hidden2_units): """Build the MNIST model up to where it may be used for inference. Args: images: Images placeholder, from inputs().图像占位符,输入 hidden1_units: Size of the first hidden layer.第一个隐藏层 hidden2_units: Size of the second hidden layer. Returns: softmax_linear: Output tensor with the computed logits. """ # Hidden 1 tf.name_scope with tf.name_scope('hidden1'): weights = tf.Variable( # tf.truncated_normal(shape,mean,stddev)#shape表示生成Tensor的维度,mean是均值,stddev是标准差 # 这个函数产生正太分布,均值和标准差自己设定。这是一个截断的产生正太分布的函数,就是说产生正太分布的值如果与均值的差值大于两倍的标准差,那就重新生成 tf.truncated_normal([IMAGE_PIXELS, hidden1_units], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))), name='weights') biases = tf.Variable(tf.zeros([hidden1_units]), name='biases') hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases) # Hidden 2 with tf.name_scope('hidden2'): weights = tf.Variable(tf.truncated_normal( [hidden1_units, hidden2_units], stddev=1.0 / math.sqrt(float(hidden1_units))), name='weights') biases = tf.Variable(tf.zeros([hidden2_units]), name='biases') hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases) # Linear with tf.name_scope('softmax_linear'): weights = tf.Variable(tf.truncated_normal( [hidden2_units, NUM_CLASSES], stddev=1.0 / math.sqrt(float(hidden2_units))), name='weights') biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases') logits = tf.matmul(hidden2, weights) + biases return logits
def conv_network(x, weights, biases, dropout): # mnist是1-D的784维的向量,reshape维度为[Height*Width*depth] # Tensor变成4-D的向量,即[batch_size,height,width,depth] x = tf.reshape(x, shape=[-1, 28, 28, 1]) # j卷积层 conv1 = conv2d(x, weights['wc1'], biases['bc1']) # max pooling conv1 = maxpool2d(conv1, k=2) # 卷积层 conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) conv2 = maxpool2d(conv2, k=2) # 全连接层 # 把conv2的维度reshape成全连接层的输入,拉平 fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) fc1 = tf.nn.relu(fc1) # Dropout fc1 = tf.nn.dropout(fc1, dropout) out = tf.add(tf.matmul(fc1, weights['out']), biases['out']) return out
def BiRNN(x, weights, biases): x = tf.unstack(x, timesteps, 1) lstm_fw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) lstm_bw_cell = rnn.BasicLSTMCell(num_hidden, forget_bias=1.0) try: outputs, _, _ = rnn.stack_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) except Exception: outputs = rnn.stack_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32) return tf.matmul(outputs[-1], weights['out']) + biases['out']
def dynamicRNN(x, seqlen, weights, biases): x = tf.unstack(x, seq_max_len, 1) # 定义lstm cell lstm_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden) outputs, states = tf.contrib.rnn.static_rnn(lstm_cell, x, dtype=tf.float32, sequence_length=seqlen) # 执行动态计算的时候,必须检索最后一个动态计算的输出,如果序列长度为10 ,需要检索第10个输出。 # 所以自定义一个OP,针对每个样本的batchsize,获取其长度并且获得相应的输出。 # outputs 是每个timesteps的输出列表,打包成[batch_size,n_step,n_inputs] outputs = tf.stack(outputs) outputs = tf.transpose(outputs, [1, 0, 2]) batch_size = tf.shape(outputs)[0] # 每个样本的起始索引 index = tf.range(0, batch_size) * seq_max_len + (seqlen - 1) outputs = tf.gather(tf.reshape(outputs, [-1, n_hidden]), index) return tf.matmul(outputs, weights['out']) + biases['out']
with tf.Session() as sess: print("Add constant: %i" % sess.run(a+b)) print("multy constant: %i" % sess.run(a*b)) # 变量表示方式 a = tf.placeholder(tf.int16) b = tf.placeholder(tf.int16) add = tf.add(a,b) mul = tf.multiply(a,b) with tf.Session() as sess: print("add with variables: %i" % sess.run(add,feed_dict={a:2,b:3})) print("multi with variables: %i" % sess.run(mul,feed_dict={a:3,b:4})) # 矩阵乘法的计算方法 matrix1 = tf.constant([[3.,3.]]) matrix2 = tf.constant([[2.],[2.]]) product = tf.matmul(matrix1,matrix2) with tf.Session() as sess: res = sess.run(product) print(res)
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) # 第二层卷积 W_conv2 = weight_variable([5, 5, 32, 64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) # dense全连接 w_fc1 = weight_variable([7 * 7 * 64, 1024]) b_fc1 = bias_variable([1024]) h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64]) h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1) # dropout keep_prob = tf.placeholder("float") h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # 输出层 w_fc2 = weight_variable([1024, 10]) b_fc2 = bias_variable([10]) # softmax y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, w_fc2) + b_fc2) # 定义损失函数 cross_entropy = -tf.reduce_sum(y_ * tf.log(y_conv)) # 确定优化方法 train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) # 计算损失函数和预测是否相等 correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
def matpow(M, n): if n < 1: return M else: return tf.matmul(M, matpow(M, n - 1))
biases=nce_biases, labels=Y, inputs=X_embed, num_sampled=num_sampled, num_classes=vocab_size)) # 定义优化器,优化器的作用是求导,反向传播。 optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_op = optimizer.minimize(loss_op) # 验证/评价 # 计算输入数据embedding 和每个embedding向量的cosine相似度 X_embed_norm = X_embed / tf.sqrt(tf.reduce_sum(tf.square(X_embed))) embedding_norm = embedding / tf.sqrt( tf.reduce_sum(tf.square(embedding), 1, keep_dims=True)) cosine_sim_op = tf.matmul(X_embed_norm, embedding_norm, transpose_b=True) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) x_test = np.array([word2id[w] for w in eval_words]) average_loss = 0 for step in range(1, num_steps + 1): batch_x, batch_y = next_batch(batch_size, num_skips, skip_window) # 训练 _, loss = sess.run([train_op, loss_op], feed_dict={
import tensorflower as tf import tensorflower.contrib.eager as tfe # set eager api print("set eager mode..") tfe.enable_eager_execution() # 定义常量单元 print("define constant tensors..") a = tf.constant(2) b = tf.constant(3) print("Running oprations without Session") c = a + b print("a+b= %i" % c) d = a * b print("a*b=%i" % d) print("mixing op with Tensors and Numpy arrays") a = tf.constant([[2., 1.], [1., 0.]], dtype=np.float32) b = np.array([[3., 0.], [5., 1.]], dtype=np.float32) c = a + b print("a+b=%s" % c) d = tf.matmul(a, b) print("a*b=%s" % d) print("Iterate through Tensor 'a'") for i in range(a.shape[0]): for j in range(a.shape[1]): print(a[i][j])
def word2vec_basic(log_dir): # 创建tensorboard的可视化目录 if not os.path.exists(log_dir): os.makedirs(log_dir) # 第一步,下载数据 url = 'http://mattmahoney.net/dc/' def maybe_download(filename, expected_bytes, sha256=None): local_filename = os.path.join(gettempdir(), filename) if not os.path.exists(local_filename): local_filename, _ = urllib.request.urlretrieve( url + filename, local_filename) statinfo = os.stat(local_filename) if sha256 and _hash_file(local_filename) != sha256: raise Exception('Failed to verify ' + local_filename + ' due to hash ' 'mismatch. Can you get to it with a browser?') if statinfo.st_size == expected_bytes: print("found and verified", filename) else: print(statinfo.st_size) raise Exception('Failed to verify ' + local_filename + '. Can you get to it with a browser?') return local_filename filename = maybe_download( 'text8.zip', 31344016, sha256= 'a6640522afe85d1963ad56c05b0ede0a0c000dddc9671758a6cc09b7a38e5232') # 数据转为List<String> def read_data(filename): with zipfile.ZipFile(filename) as f: data = tf.compat.as_str(f.read(f.namelist()[0])).split() return data vocabulary = read_data(filename) print('data_size', len(vocabulary)) # 第二步,建词典并且把罕见词替换成UNK vocabulary_size = 50000 def build_dataset(words, n_words): count = [['UNK', -1]] count.extend(collections.Counter(words).most_common(n_words - 1)) dictionary = {word: index for index, (word, _) in enumerate(count)} data = [] unk_count = 0 for word in words: index = dictionary.get(word, 0) if index == 0: # dictionary['UNK'] unk_count += 1 data.append(index) count[0][1] = unk_count reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys())) return data, count, dictionary, reversed_dictionary # data: 词表中的所有的词的id # count: 单词和出现次数的map # dictionary: 单词-->index 的映射 # reverse_dictionary:index -->单词 data, count, dictionary, reversed_dictionary = build_dataset( vocabulary, vocabulary_size) del vocabulary print('Most common words (+UNK)', count[:5]) print('Sample data', data[:10], [reversed_dictionary[i] for i in data[:10]]) # 针对skip-gram模型生成batch数据 def generate_batch(batch_size, num_skips, skip_window): global data_index assert batch_size % num_skips == 0 assert num_skips <= 2 * skip_window batch = np.ndarray(shape=(batch_size), dtype=np.int32) labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) # skip的范围 span = 2 * skip_window + 1 buffer = collections.deque(maxlen=span) if data_index + span > len(data): data_index = 0 buffer.extend(data[data_index:data_index + span]) # 向后取一个窗口内的结果 data_index += span for i in range(batch_size // num_skips): context_words = [w for w in range(span) if w != skip_window] words_to_use = random.sample(context_words, num_skips) for j, context_words in enumerate(words_to_use): batch[i * num_skips + j] = buffer[skip_window] labels[i * num_skips + j, 0] = buffer[context_words] if data_index == len(data): buffer.extend(data[0:span]) data_index = span else: buffer.append(data[data_index]) data_index += 1 # Backtrack a little bit to avoid skipping words in the end of a batch data_index = (data_index - span) % len(data) return batch, labels batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1) for i in range(8): print(batch[i], reversed_dictionary[batch[i]], '->', labels[i, 0], reversed_dictionary[labels[i, 0]]) # 建立并且训练模型 batch_size = 128 embedding_size = 128 # 词向量维度 skip_window = 1 # 考虑左右几个单词 num_skips = 2 # 复用输入生成标签的次数 num_sampled = 64 # 负样本数量 # 采样一个样本的近邻作为随机验证机,将验证集样本限制为 较低id的单词,是比较高频的构造词汇 # 这三个变量用作显示模型准确率,不影响计算。 valid_size = 16 # 用于评估相似性的随机单词集合 valid_window = 100 # valid_examples = np.random.choice(valid_window, valid_size, replace=False) graph = tf.Graph() with graph.as_default(): # 输入数据 with tf.name_scope('input'): train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) # 操作op和变量variables 固定在CPU上。 with tf.device('/cpu:0'): with tf.name_scope('embeddings'): embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) embed = tf.nn.embedding_lookup(embeddings, train_inputs) # 构造NCE损失的变量 with tf.name_scope('weights'): nce_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) with tf.name_scope('biases'): nce_biases = tf.Variable(tf.zeros([vocabulary_size])) # 计算该批次的平均nce损失,当评估损失的时候,自动绘制一个新的负样本。 with tf.name_scope('loss'): loss = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=train_labels, inputs=embed, num_sampled=num_sampled, num_classes=vocabulary_size)) # 汇总损失 tf.summary.scalar('loss', loss) # 构造SGD with tf.name_scope('opytimizer'): optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss) # 计算小批次样本和所有样本之间的余弦相似度 norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset) similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True) # merge all summary merged = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() # 开始训练 num_steps = 1000001 with tf.compat.v1.Session(graph=graph) as session: # 写入摘要 writer = tf.summary.FileWriter(log_dir, session.graph) init.run() print('inited..') average_loss = 0 for step in range(num_steps): batch_inputs, batch_labels = generate_batch( batch_size, num_skips, skip_window) feed_dict = { train_inputs: batch_inputs, train_labels: batch_labels } # 定义元变量 run_metadata = tf.RunMetadata() _, summary, loss_val = session.run([optimizer, merged, loss], feed_dict=feed_dict, run_metadata=run_metadata) average_loss += loss_val writer.add_summary(summary, step) if step == (num_steps - 1): writer.add_run_metadata(run_metadata, 'step%d' % step) if step % 2000 == 0: if step > 0: average_loss /= 2000 # 平均损失是对最近的2000个批次样本的估计。 print('Average loss at step ', step, ': ', average_loss) average_loss = 0 if step % 10000 == 0: sim = similarity.eval() for i in range(valid_size): valid_word = reversed_dictionary[valid_examples[i]] top_k = 8 nearest = (-sim[i, :]).argsort()[1:top_k + 1] log_str = 'Nearest to %s:' % valid_word print( log_str, ', '.join([ reversed_dictionary[nearest[k]] for k in range(top_k) ])) final_embeddings = normalized_embeddings.eval() # 写下embedding的相应标签 with open(log_dir + '/metadata.tsv', 'w') as f: for i in range(vocabulary_size): f.write(reversed_dictionary[i] + '\n') # 保存checkpoint saver.save(session, os.path.join(log_dir, 'model.ckpt')) # 配置Tensorboard config = projector.ProjectorConfig() embedding_conf = config.embeddings.add() embedding_conf.tensor_name = embeddings.name embedding_conf.metadata_path = os.path.join(log_dir, 'metadata.tsv') projector.visualize_embeddings(writer, config) writer.close() # Step 6: Visualize the embeddings. # pylint: disable=missing-docstring # Function to draw visualization of distance between embeddings. def plot_with_labels(low_dim_embs, labels, filename): assert low_dim_embs.shape[0] >= len( labels), 'More labels than embeddings' plt.figure(figsize=(18, 18)) # in inches for i, label in enumerate(labels): x, y = low_dim_embs[i, :] plt.scatter(x, y) plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.savefig(filename) try: # pylint: disable=g-import-not-at-top from sklearn.manifold import TSNE import matplotlib.pyplot as plt tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000, method='exact') plot_only = 500 low_dim_embs = tsne.fit_transform(final_embeddings[:plot_only, :]) labels = [reversed_dictionary[i] for i in xrange(plot_only)] plot_with_labels(low_dim_embs, labels, os.path.join(gettempdir(), 'tsne.png')) except ImportError as ex: print( 'Please install sklearn, matplotlib, and scipy to show embeddings.' ) print(ex)
# paramters learning_rate = 0.01 train_epochs = 25 batch_size = 100 display_step = 1 # 定义placeholder x = tf.placeholder(tf.float32, [None, 784]) y = tf.placeholder(tf.float32, [None, 10]) # weights bias W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) pred = tf.nn.softmax(tf.matmul(x, W) + b) # 最小化交叉熵 cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(pred), reduction_indices=1)) # 梯度下降 optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost) # 初始化所有参数 init = tf.global_variables_initializer() with tf.Session() as sess: # 运行初始化 sess.run(init) for epoch in range(train_epochs): avg_cost = 0.
def logistic_regerssion(inputs): return tf.matmul(inputs,W) + b
import tensorflower as tf from tensorflow.examples.tutorials.mnist import input_data print("开始下载数据集..") mnist = input_data.read_data_sets('MNIST_data', one_hot=True) print("下载完毕..") sess = tf.InteractiveSession() # 该函数可以更加灵活的构建代码,可以在运行计算的图的时候通过operation操作插入一些计算图。 x = tf.placeholder("float", shape=[None, 784]) y_ = tf.placeholder("float", shape=[None, 10]) # 占位符 W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) # 变量,跟占位符一样作为额外的输入量 sess.run(tf.initialize_all_variables()) y = tf.nn.softmax(tf.matmul(x, W) + b) # 使用softmax计算每个分类的概率 cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) # 交叉熵 train_step = tf.train.GradientDescentOptimizer(0.01).minimize( cross_entropy) # 训练使用最小梯度下降,且最小化交叉熵loss init = tf.global_variables_initializer() for i in range(1000): batch = mnist.train.next_batch(50) # load mini-batchsize dataset train_step.run(feed_dict={x: batch[0], y_: batch[1]}) print("训练结束..") """ 这段表达特别好:tf.argmax 是一个非常有用的函数,它能给出某个tensor对象在某一维上的其数据最大值所在的索引值。 由于标签向量是由0,1组成,因此最大值1所在的索引位置就是类别标签,比如tf.argmax(y,1)返回的是模型对于任一输入x预测到的标签值, 而 tf.argmax(y_,1) 代表正确的标签,我们可以用 tf.equal 来检测我们的预测是否真实标签匹配(索引位置一样表示匹配)。 """
def inference(images): # 构造模型 # 卷积层1 with tf.variable_scope('conv1') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64], stddev=1e-4, wd=0.0) conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0)) bias = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(bias, name=scope.name) _activation_summary(conv1) # 池化层1 pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name="pool1") # 正则化 norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') # 卷积层2 with tf.variable_scope('conv2') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 64], stddev=1e-4, wd=0.0) conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1)) bias = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(bias, name=scope.name) _activation_summary(conv2) # 正则化2 norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') # 池化层2 pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') # 线性修正的全连接层,拉平全连接层 with tf.variable_scope('local3') as scope: dim = 1 # 把 上一层输出的形状拉平 for d in pool2.get_shape()[1:].as_list(): dim *= d reshape = tf.reshape(pool2, [FLAGS.batch_size, dim]) weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1)) local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) _activation_summary(local3) # 线性修正的全连接层。 with tf.variable_scope('local4') as scope: weights = _variable_with_weight_decay('weights', shape=[384, 192], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name) _activation_summary(local4) # softmax层 with tf.variable_scope('softmax_linear') as scope: weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES], stddev=1 / 192.0, wd=0.0) biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0)) softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name) _activation_summary(softmax_linear) return softmax_linear
def multihead_attention(queries, keys, num_units=None, num_heads=0, dropout_rate=0, is_training=True, causality=False, scope="multihead_attention", reuse=None): with tf.variable_scope(scope, reuse=reuse): if num_units is None: num_units = queries.get_shape().as_list()[-1] # linear projection Q = tf.layers.dense(queries, num_units, activation=tf.nn.relu) K = tf.layers.dense(keys, num_units, activation=tf.nn.relu) V = tf.layers.dense(keys, num_units, activation=tf.nn.relu) # split and concat Q_ = tf.concat(tf.split(Q, num_heads, axis=2), axis=0) K_ = tf.concat(tf.split(K, num_heads, axis=2), axis=0) V_ = tf.concat(tf.split(V, num_heads, axis=2), axis=0) outputs = tf.matmul(Q_, tf.transpose(K_, [0, 2, 1])) outputs = outputs / (K_.get_shape().as_list()[-1]**0.5) # mask key_masks = tf.sign(tf.abs(tf.reduce_sum(keys, axis=-1))) key_masks = tf.tile(key_masks, [num_heads, 1]) key_masks = tf.tile(tf.expand_dims(key_masks, 1), [1, tf.shape(queries)[1], 1]) paddings = tf.ones_like(outputs) * (-2**32 + 1) outputs = tf.where(tf.equal(key_masks, 0), paddings, outputs) # masked from future if causality: diag_vals = tf.ones_like(outputs[0, :, :]) tril = tf.contrib.linalg.LinearOperatorTril(diag_vals).to_dense() masks = tf.tile(tf.expand_dims(tril, 0), [tf.shape(outputs)[0], 1, 1]) paddings = tf.ones_like(masks) * (-2**32 + 1) outputs = tf.where(tf.equal(masks, 0), paddings, outputs) outputs = tf.nn.softmax(outputs) # query mask query_masks = tf.sign(tf.abs(tf.reduce_sum(queries, axis=-1))) query_masks = tf.tile(query_masks, [num_heads, 1]) query_masks = tf.tile(tf.expand_dims(query_masks, -1), [1, 1, tf.shape(keys)[1]]) outputs *= query_masks outputs = tf.layers.dropout(outputs, rate=dropout_rate, training=tf.convert_to_tensor(is_training)) outputs = tf.matmul(outputs, V_) # restore shape outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2) outputs += queries outputs = normalize(outputs) return outputs