import random from gensim.models import word2vec myRnnModel = RNN_model.build_graph( num_batch=RNN_model.parameters.num_batch, max_sequence_len=RNN_model.parameters.max_sequence_len, hidden_units=RNN_model.parameters.hidden_units, num_classes=RNN_model.parameters.num_classes) train_x = [] train_y = [] seq_len = [] print("Beginning Training") with open('data/pos.txt', "r") as f: for line in f: cur = helper_fun.paragraph_to_sentencelist(line, remove_stopwords=True) train_x += cur train_y.append([0, 1]) seq_len.append(len(cur[0])) with open('data/neg.txt', "r") as f: for line in f: cur = helper_fun.paragraph_to_sentencelist(line, remove_stopwords=True) train_x += cur train_y.append([1, 0]) seq_len.append(len(cur[0])) sess = tf.Session() init = tf.initialize_all_variables() #sess.run(init) myRnnModel['saver'].restore(sess, "trainedmodels/rnn.model")
print("Beginning Training") #load punkt tokenizer. punkt = punctuations ('.', ',', '?', ...) tokenizer = nltk.data.load('tokenizers/punkt/english.pickle') data = [] #files=(glob.glob("./data/*.txt")) files = [] for file in files: with open(file, "r") as f: for line in f: data += helper_fun.paragraph_to_sentencelist(line, tokenizer, remove_stopwords=True) ''' with open('data/neg.txt', "r") as f: for line in f: data+=helper_fun.paragraph_to_sentencelist(line, tokenizer, remove_stopwords=True) ''' #print (len(data)) model = createmodelandtrain(data) model = finalize_model(model) model.save("trainedmodels/word2vecTrained.mod") #print(model.vocab) #model=word2vec.Word2Vec.load("trainedmodels/word2vecTrained.mod")