コード例 #1
0
import random
from gensim.models import word2vec
myRnnModel = RNN_model.build_graph(
    num_batch=RNN_model.parameters.num_batch,
    max_sequence_len=RNN_model.parameters.max_sequence_len,
    hidden_units=RNN_model.parameters.hidden_units,
    num_classes=RNN_model.parameters.num_classes)

train_x = []
train_y = []
seq_len = []
print("Beginning Training")

with open('data/pos.txt', "r") as f:
    for line in f:
        cur = helper_fun.paragraph_to_sentencelist(line, remove_stopwords=True)
        train_x += cur
        train_y.append([0, 1])
        seq_len.append(len(cur[0]))

with open('data/neg.txt', "r") as f:
    for line in f:
        cur = helper_fun.paragraph_to_sentencelist(line, remove_stopwords=True)
        train_x += cur
        train_y.append([1, 0])
        seq_len.append(len(cur[0]))

sess = tf.Session()
init = tf.initialize_all_variables()
#sess.run(init)
myRnnModel['saver'].restore(sess, "trainedmodels/rnn.model")
コード例 #2
0
print("Beginning Training")

#load punkt tokenizer. punkt = punctuations ('.', ',', '?', ...)

tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

data = []

#files=(glob.glob("./data/*.txt"))
files = []
for file in files:
    with open(file, "r") as f:
        for line in f:
            data += helper_fun.paragraph_to_sentencelist(line,
                                                         tokenizer,
                                                         remove_stopwords=True)
'''
with open('data/neg.txt', "r") as f:
	for line in f:
		data+=helper_fun.paragraph_to_sentencelist(line, tokenizer, remove_stopwords=True)
'''

#print (len(data))

model = createmodelandtrain(data)
model = finalize_model(model)
model.save("trainedmodels/word2vecTrained.mod")

#print(model.vocab)
#model=word2vec.Word2Vec.load("trainedmodels/word2vecTrained.mod")