FLAGS.word_embedding_size)
print("finish loading dep embedding")
fast_embedding_path = "fast-text/wiki.simple.vec"
fast_embedding_index = load_embedding(fast_embedding_path)
fast_embedding_matrix = get_embedding_matrix(word_index, fast_embedding_index,
                                             FLAGS.word_embedding_size)
print("finish loading fast embedding")
embedding_path = "glove.6B/glove.6B.{}d.txt".format(FLAGS.word_embedding_size)
embedding_index = load_embedding(embedding_path)
embedding_matrix = get_embedding_matrix(word_index, embedding_index,
                                        FLAGS.word_embedding_size)
print("finish loading linear embedding")
vocab_size = len(word_index)

# convert words to indices including padding and cutting
train_x = tokens_to_indices(word_index, train_tokens, MAXLEN)
valid_x = tokens_to_indices(word_index, valid_tokens, MAXLEN)
test_x = tokens_to_indices(word_index, test_tokens, MAXLEN)

train_data = zip(train_x, train_sq_len, train_ch, train_y)
valid_data = zip(valid_x, valid_sq_len, valid_ch, valid_y)
test_data = zip(test_x, test_sq_len, test_ch, test_y)
print("Embedding loaded")
# Create a model
graph = tf.Graph()
with graph.as_default():
    with tf.Session() as sess:
        mem_net = MemNet(vocab_size=vocab_size,
                         statment_size=MAXLEN,
                         word_embedding_size=FLAGS.word_embedding_size,
                         num_hops=FLAGS.num_hops,
Ejemplo n.º 2
0
train_tokens = texts_to_tokens(train_statement)
valid_tokens = texts_to_tokens(valid_statement)
test_tokens = texts_to_tokens(test_statement)

train_sq_len = get_sequence_length(train_tokens)
valid_sq_len = get_sequence_length(valid_tokens)
test_sq_len = get_sequence_length(test_tokens)

# create vocabulary from the data itself
wordlist = itertools.chain.from_iterable(train_tokens)
word_index, _ = build_vocab(wordlist)
vocab_size = len(word_index)

# convert words to indices including padding and cutting
train_x = tokens_to_indices(word_index, train_tokens, MAXLEN)
valid_x = tokens_to_indices(word_index, valid_tokens, MAXLEN)
test_x = tokens_to_indices(word_index, test_tokens, MAXLEN)

# convert topics to indices

# get topic sequence

max_topic = 5
train_topic = tokens_to_indices(topic_index, train_topic, max_topic)
valid_topic = tokens_to_indices(topic_index, valid_topic, max_topic)
test_topic = tokens_to_indices(topic_index, test_topic, max_topic)

# get topic sequence
train_tp_sq = np.array([np.count_nonzero(t) for t in train_topic])
valid_tp_sq = np.array([np.count_nonzero(t) for t in valid_topic])
Ejemplo n.º 3
0
# text
train_tokens = texts_to_tokens(train_statement)
valid_tokens = texts_to_tokens(valid_statement)
test_tokens = texts_to_tokens(test_statement)
# text sequence
train_sq_len = get_sequence_length(train_tokens)
valid_sq_len = get_sequence_length(valid_tokens)
test_sq_len = get_sequence_length(test_tokens)

# create word vocabulary from the data itself
wordlist = itertools.chain.from_iterable(train_tokens)
word_index, _ = build_vocab(wordlist)
vocab_size = len(word_index)

# convert words to indices including padding and cutting
train_x = tokens_to_indices(word_index, train_tokens, MAXLEN)
valid_x = tokens_to_indices(word_index, valid_tokens, MAXLEN)
test_x = tokens_to_indices(word_index, test_tokens, MAXLEN)

# get topic sequence
max_topic = 5
train_topic = tokens_to_indices(topic_index, train_topic, max_topic)
valid_topic = tokens_to_indices(topic_index, valid_topic, max_topic)
test_topic = tokens_to_indices(topic_index, test_topic, max_topic)

# get topic sequence
train_tp_sq = np.array([np.count_nonzero(t) for t in train_topic])
valid_tp_sq = np.array([np.count_nonzero(t) for t in valid_topic])
test_tp_sq = np.array([np.count_nonzero(t) for t in test_topic])

# speaker