def train_batch(): source_path = 'data/sample2.txt' source_text = load_data(source_path) print("source_text:", source_text) initial_words = get_tokens(source_text) vocab = Vocab(initial_tokens=initial_words) vocab.load_pretrained_embeddings(config.embedding_path_air) sentence_ids = text_to_ids(source_text, vocab.token2id) sentence_ids = pad_sentence_batch(sentence_ids, vocab.token2id['<blank>']) # 常量 print("sentence_ids:", sentence_ids) sentence_place = tf.placeholder(tf.int32, [None, None]) embed_sentences = embed(vocab, sentence_place) # embed_sentences = tf.nn.l2_normalize(embed_sentences, axis=1) host = embed_sentences[:3] guest = embed_sentences[3:] similarity = tf.matmul(guest, tf.transpose(host)) similarity = tf.identity(similarity, name='similarity') probabilities = tf.nn.softmax(similarity) labels = [0, 1, 0, 0, 0, 1, 1] train_labels_on_seed = get_dummy(labels[:3], labels[3:]) y = neural_net_label_input(3) the_arg_max = get_argmax(similarity) pre = get_predict_label(labels[:3], the_arg_max) acc = get_accuracy(pre, labels[3:]) with tf.Session() as sess: # Initializing the variables sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) train_acc = sess.run(acc, feed_dict={ sentence_place: sentence_ids, y: train_labels_on_seed }) prob = sess.run(probabilities, feed_dict={ sentence_place: sentence_ids }) print('Training Accuracy: {:.6f}'.format(train_acc)) print("prob: ", prob, end='\n\n')
def similarity_matrix(): source_path = 'data/sample.txt' source_text = load_data(source_path) print("source_text:", source_text) vocab = Vocab() vocab.load_vocab_from_embedding(config.embedding_path_air) vocab.load_pretrained_embeddings(config.embedding_path_air) sentence_ids = text_to_ids(source_text, vocab.token2id) sentence_ids = pad_sentence_batch(sentence_ids, vocab.token2id['<blank>']) # 常量 print("sentence_ids:", sentence_ids) sentence_place = tf.placeholder(tf.int32, [None, None]) embed_sentences = embed(vocab, sentence_place) embed_sentences = tf.nn.l2_normalize(embed_sentences, axis=1) # enc_embed_input = tf.contrib.layers.embed_sequence( # sentence_place, source_vocab_size, vocab.embed_dim) # tf.contrib.layers.embed_sequence() is for what? # https://stackoverflow.com/questions/40784656/tf-contrib-layers-embed-sequence-is-for-what host = embed_sentences[:2] guest = embed_sentences[2:] # labels = [[1, 0], [1, 0], [0, 1], [0, 1]] # difference = host[1]-guest[2] similarity = tf.matmul(guest, tf.transpose(host)) similarity = tf.identity(similarity, name='similarity') probabilities = tf.nn.softmax(similarity) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) vector = sess.run(embed_sentences, feed_dict={sentence_place: sentence_ids}) # print("list of vectors: ", vector) print("type of vector:", type(vector[0])) print("vector length:", vector[0].shape) sim = sess.run(similarity, feed_dict={sentence_place: sentence_ids}) print("sim: ", sim) prob = sess.run(probabilities, feed_dict={sentence_place: sentence_ids}) print("prob: ", prob)
def similarity_matrix(): source_path = 'data/sample.txt' source_text = load_data(source_path) print("source_text:", source_text) vocab = Vocab() vocab.load_vocab_from_embedding(config.embedding_path_air) vocab.load_pretrained_embeddings(config.embedding_path_air) sentence_ids = text_to_ids(source_text, vocab.token2id) sentence_ids = pad_sentence_batch(sentence_ids, vocab.token2id['<blank>']) # 常量 print("sentence_ids:", sentence_ids) sentence_place = tf.placeholder(tf.int32, [None, None]) embed_sentences = embed(vocab, sentence_place) embed_sentences = tf.nn.l2_normalize(embed_sentences, axis=1) host = embed_sentences[:2] guest = embed_sentences[2:] similarity = tf.matmul(guest, tf.transpose(host)) similarity = tf.identity(similarity, name='similarity') probabilities = tf.nn.softmax(similarity) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) vector = sess.run(embed_sentences, feed_dict={ sentence_place: sentence_ids }) print("type of vector:", type(vector[0])) print("vector length:", vector[0].shape) sim = sess.run(similarity, feed_dict={ sentence_place: sentence_ids }) print("sim: ", sim) prob = sess.run(probabilities, feed_dict={ sentence_place: sentence_ids }) print("prob: ", prob)
def train_batch(epochs): source_path = 'data/sample.txt' source_text = load_data(source_path) print("source_text:", source_text) vocab = Vocab() vocab.load_vocab_from_embedding(config.embedding_path) vocab.load_pretrained_embeddings(config.embedding_path) sentence_ids = text_to_ids(source_text, vocab.token2id) sentence_ids = pad_sentence_batch(sentence_ids, vocab.token2id['<blank>']) # 常量 print("sentence_ids:", sentence_ids) sentence_place = tf.placeholder(tf.int32, [None, None]) embed_sentences = embed(vocab, sentence_place) embed_sentences = tf.nn.l2_normalize(embed_sentences, axis=1) host = embed_sentences[:2] guest = embed_sentences[2:] similarity = tf.matmul(guest, tf.transpose(host)) similarity = tf.identity(similarity, name='similarity') probabilities = tf.nn.softmax(similarity) labels = [[1, 0], [1, 0], [0, 1], [0, 1]] y = neural_net_label_input(2) cost = get_probabilities_cost(similarity, y) optimizer = get_optimizer_single(cost) accuracy = get_accuracy(similarity, y) all_params = tf.trainable_variables() # variable组成的list print('Checking the Training on a Single Batch...') with tf.Session() as sess: # Initializing the variables sess.run(tf.global_variables_initializer()) param_num = sum([np.prod(sess.run(tf.shape(v))) for v in all_params]) print('There are {} variables in the model'.format(param_num)) # Training cycle for epoch in range(epochs): sess.run(optimizer, feed_dict={ sentence_place: sentence_ids, y: labels }) loss = sess.run(cost, feed_dict={ sentence_place: sentence_ids, y: labels }) train_acc = sess.run(accuracy, feed_dict={ sentence_place: sentence_ids, y: labels }) prob = sess.run(probabilities, feed_dict={sentence_place: sentence_ids}) if epoch % 100 == 0: print('Epoch {:>2}: '.format(epoch + 1), end='') print('Loss: {:>10.4f} Training Accuracy: {:.6f}'.format( loss, train_acc)) print("prob: ", prob, end='\n\n')