def generate(q1, q2, answer, model_google, options): sentences = [] for i in options: sentences.append(q1 + answer[i] + q2) sentences = Word2Vec.cleanText(sentences) n_dim = 300 vectors = [ Word2Vec.buildWordVector(model_google, z, n_dim) for z in sentences ] dataset = [] for a in vectors: sentence = np.zeros((49, 300)) m = len(a) start = int((49 - m) / 2) sentence[start:start + m] = a dataset.append(np.array(sentence)) question = [] for i in options: question.append(q1 + q2) question = Word2Vec.cleanText(question) n_dim = 300 q = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in question] q_set = [] for a in q: sentence = np.zeros((49, 300)) m = len(a) start = int((49 - m) / 2) sentence[start:start + m] = a q_set.append(np.array(sentence)) option = [] for i in options: option.append(answer[i]) option = Word2Vec.cleanText(option) n_dim = 300 a = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in option] a_set = [] for a in a: sentence = np.zeros((4, 300)) m = len(a) if not m == 0: start = int((4 - m) / 2) sentence[start:start + m] = a a_set.append(np.array(sentence)) return dataset, q_set, a_set
def generate(q1, q2, answer, model_google, options): sentences = [] for i in options: sentences.append(q1 + answer[i] + q2) sentences = Word2Vec.cleanText(sentences) n_dim = 300 vectors = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in sentences] dataset = [] for a in vectors: sentence = np.zeros((49, 300)) m = len(a) start = int((49 - m) / 2) sentence[start:start + m] = a dataset.append(np.array(sentence)) return dataset
import Word2Vec import gensim import numpy as np import pymysql.cursors # =========================================== # load data connection = pymysql.connect(user='******', password='******', database='GRE') cursor = connection.cursor() commit = "select * from GRES" cursor.execute(commit) Sentences = [each[1] for each in cursor.fetchall()] Sentences = Word2Vec.cleanText(Sentences) # =========================================== # Load model model_google = gensim.models.Word2Vec.load_word2vec_format( '../model/GoogleNews-vectors-negative300.bin', binary=True) # Word2Vec.Train_Wrod2VEc(Sentences, model_google) # =========================================== # Generalize words n_dim = 300 train_vectors = [ Word2Vec.buildWordVector(model_google, z, n_dim) for z in Sentences ] Word2Vec.storeVecs(train_vectors, '../vectors/google_vecs.txt')
import Word2Vec # =========================================== # load data connection = pymysql.connect(user='******', password='******', database='GRE') cursor = connection.cursor() commit = "select * from GRES2" cursor.execute(commit) Sentences = [each[1] for each in cursor.fetchall()] Sentences = Word2Vec.cleanText(Sentences) # =========================================== # Load model model_google = gensim.models.KeyedVectors.load_word2vec_format('../GoogleModel/GoogleNews-vectors-negative300.bin', binary=True) # Word2Vec.Train_Wrod2VEc(Sentences, model_google) # =========================================== # Generalize words n_dim = 300 train_vectors = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in Sentences] Word2Vec.storeVecs(train_vectors, '../data for input1/q_vecs.pkl') commit = "select * from GRES2" cursor.execute(commit) Sentences = [each[2] for each in cursor.fetchall()] Sentences = Word2Vec.cleanText(Sentences) # Generalize words train_vectors = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in Sentences] Word2Vec.storeVecs(train_vectors, '../data for input1/a_vecs.pkl')
import Word2Vec import gensim import numpy as np import pymysql.cursors # =========================================== # load data connection = pymysql.connect(user='******', password='******', database='GRE') cursor = connection.cursor() commit = "select * from GRES" cursor.execute(commit) Sentences = [each[1] for each in cursor.fetchall()] Sentences = Word2Vec.cleanText(Sentences) # =========================================== # Train model model_w2v = gensim.models.Word2Vec.load('../model/model_w2v') Word2Vec.Train_Wrod2VEc(Sentences, model_w2v) # =========================================== # Generalize words n_dim = 300 train_vectors = [ Word2Vec.buildWordVector(model_w2v, z, n_dim) for z in Sentences ] Word2Vec.storeVecs(train_vectors, '../model/w2v_vecs.txt')