Example #1
0
def train_batch():

    source_path = 'data/sample2.txt'
    source_text = load_data(source_path)
    print("source_text:", source_text)

    initial_words = get_tokens(source_text)
    vocab = Vocab(initial_tokens=initial_words)
    vocab.load_pretrained_embeddings(config.embedding_path_air)

    sentence_ids = text_to_ids(source_text, vocab.token2id)
    sentence_ids = pad_sentence_batch(sentence_ids, vocab.token2id['<blank>'])
    # 常量
    print("sentence_ids:", sentence_ids)

    sentence_place = tf.placeholder(tf.int32, [None, None])
    embed_sentences = embed(vocab, sentence_place)
    # embed_sentences = tf.nn.l2_normalize(embed_sentences, axis=1)

    host = embed_sentences[:3]
    guest = embed_sentences[3:]

    similarity = tf.matmul(guest, tf.transpose(host))
    similarity = tf.identity(similarity, name='similarity')
    probabilities = tf.nn.softmax(similarity)

    labels = [0, 1, 0, 0, 0, 1, 1]
    train_labels_on_seed = get_dummy(labels[:3], labels[3:])
    y = neural_net_label_input(3)

    the_arg_max = get_argmax(similarity)
    pre = get_predict_label(labels[:3], the_arg_max)
    acc = get_accuracy(pre, labels[3:])

    with tf.Session() as sess:
        # Initializing the variables
        sess.run(tf.global_variables_initializer())
        sess.run(tf.tables_initializer())

        train_acc = sess.run(acc, feed_dict={
            sentence_place: sentence_ids,
            y: train_labels_on_seed
        })
        prob = sess.run(probabilities, feed_dict={
            sentence_place: sentence_ids
        })

        print('Training Accuracy: {:.6f}'.format(train_acc))
        print("prob: ", prob, end='\n\n')
def similarity_matrix():

    source_path = 'data/sample.txt'
    source_text = load_data(source_path)
    print("source_text:", source_text)

    vocab = Vocab()
    vocab.load_vocab_from_embedding(config.embedding_path_air)
    vocab.load_pretrained_embeddings(config.embedding_path_air)

    sentence_ids = text_to_ids(source_text, vocab.token2id)
    sentence_ids = pad_sentence_batch(sentence_ids, vocab.token2id['<blank>'])
    # 常量
    print("sentence_ids:", sentence_ids)

    sentence_place = tf.placeholder(tf.int32, [None, None])
    embed_sentences = embed(vocab, sentence_place)
    embed_sentences = tf.nn.l2_normalize(embed_sentences, axis=1)

    # enc_embed_input = tf.contrib.layers.embed_sequence(
    #     sentence_place, source_vocab_size, vocab.embed_dim)
    # tf.contrib.layers.embed_sequence() is for what?
    # https://stackoverflow.com/questions/40784656/tf-contrib-layers-embed-sequence-is-for-what

    host = embed_sentences[:2]
    guest = embed_sentences[2:]
    # labels = [[1, 0], [1, 0], [0, 1], [0, 1]]

    # difference = host[1]-guest[2]

    similarity = tf.matmul(guest, tf.transpose(host))
    similarity = tf.identity(similarity, name='similarity')
    probabilities = tf.nn.softmax(similarity)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        vector = sess.run(embed_sentences,
                          feed_dict={sentence_place: sentence_ids})
        # print("list of vectors: ", vector)
        print("type of vector:", type(vector[0]))
        print("vector length:", vector[0].shape)

        sim = sess.run(similarity, feed_dict={sentence_place: sentence_ids})
        print("sim: ", sim)

        prob = sess.run(probabilities,
                        feed_dict={sentence_place: sentence_ids})
        print("prob: ", prob)
def similarity_matrix():

    source_path = 'data/sample.txt'
    source_text = load_data(source_path)
    print("source_text:", source_text)

    vocab = Vocab()
    vocab.load_vocab_from_embedding(config.embedding_path_air)
    vocab.load_pretrained_embeddings(config.embedding_path_air)

    sentence_ids = text_to_ids(source_text, vocab.token2id)
    sentence_ids = pad_sentence_batch(sentence_ids, vocab.token2id['<blank>'])
    # 常量
    print("sentence_ids:", sentence_ids)

    sentence_place = tf.placeholder(tf.int32, [None, None])
    embed_sentences = embed(vocab, sentence_place)
    embed_sentences = tf.nn.l2_normalize(embed_sentences, axis=1)

    host = embed_sentences[:2]
    guest = embed_sentences[2:]

    similarity = tf.matmul(guest, tf.transpose(host))
    similarity = tf.identity(similarity, name='similarity')
    probabilities = tf.nn.softmax(similarity)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        vector = sess.run(embed_sentences, feed_dict={
            sentence_place: sentence_ids
        })
        print("type of vector:", type(vector[0]))
        print("vector length:", vector[0].shape)

        sim = sess.run(similarity, feed_dict={
            sentence_place: sentence_ids
        })
        print("sim: ", sim)

        prob = sess.run(probabilities, feed_dict={
            sentence_place: sentence_ids
        })
        print("prob: ", prob)
Example #4
0
def train_batch(epochs):

    source_path = 'data/sample.txt'
    source_text = load_data(source_path)
    print("source_text:", source_text)

    vocab = Vocab()
    vocab.load_vocab_from_embedding(config.embedding_path)
    vocab.load_pretrained_embeddings(config.embedding_path)

    sentence_ids = text_to_ids(source_text, vocab.token2id)
    sentence_ids = pad_sentence_batch(sentence_ids, vocab.token2id['<blank>'])
    # 常量
    print("sentence_ids:", sentence_ids)

    sentence_place = tf.placeholder(tf.int32, [None, None])
    embed_sentences = embed(vocab, sentence_place)
    embed_sentences = tf.nn.l2_normalize(embed_sentences, axis=1)

    host = embed_sentences[:2]
    guest = embed_sentences[2:]

    similarity = tf.matmul(guest, tf.transpose(host))
    similarity = tf.identity(similarity, name='similarity')
    probabilities = tf.nn.softmax(similarity)

    labels = [[1, 0], [1, 0], [0, 1], [0, 1]]

    y = neural_net_label_input(2)

    cost = get_probabilities_cost(similarity, y)
    optimizer = get_optimizer_single(cost)

    accuracy = get_accuracy(similarity, y)
    all_params = tf.trainable_variables()
    # variable组成的list

    print('Checking the Training on a Single Batch...')
    with tf.Session() as sess:
        # Initializing the variables
        sess.run(tf.global_variables_initializer())

        param_num = sum([np.prod(sess.run(tf.shape(v))) for v in all_params])

        print('There are {} variables in the model'.format(param_num))

        # Training cycle
        for epoch in range(epochs):
            sess.run(optimizer,
                     feed_dict={
                         sentence_place: sentence_ids,
                         y: labels
                     })

            loss = sess.run(cost,
                            feed_dict={
                                sentence_place: sentence_ids,
                                y: labels
                            })
            train_acc = sess.run(accuracy,
                                 feed_dict={
                                     sentence_place: sentence_ids,
                                     y: labels
                                 })
            prob = sess.run(probabilities,
                            feed_dict={sentence_place: sentence_ids})

            if epoch % 100 == 0:
                print('Epoch {:>2}:  '.format(epoch + 1), end='')
                print('Loss: {:>10.4f} Training Accuracy: {:.6f}'.format(
                    loss, train_acc))
                print("prob: ", prob, end='\n\n')