def generate(q1, q2, answer, model_google, options): sentences = [] for i in options: sentences.append(q1 + answer[i] + q2) sentences = Word2Vec.cleanText(sentences) n_dim = 300 vectors = [ Word2Vec.buildWordVector(model_google, z, n_dim) for z in sentences ] dataset = [] for a in vectors: sentence = np.zeros((49, 300)) m = len(a) start = int((49 - m) / 2) sentence[start:start + m] = a dataset.append(np.array(sentence)) question = [] for i in options: question.append(q1 + q2) question = Word2Vec.cleanText(question) n_dim = 300 q = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in question] q_set = [] for a in q: sentence = np.zeros((49, 300)) m = len(a) start = int((49 - m) / 2) sentence[start:start + m] = a q_set.append(np.array(sentence)) option = [] for i in options: option.append(answer[i]) option = Word2Vec.cleanText(option) n_dim = 300 a = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in option] a_set = [] for a in a: sentence = np.zeros((4, 300)) m = len(a) if not m == 0: start = int((4 - m) / 2) sentence[start:start + m] = a a_set.append(np.array(sentence)) return dataset, q_set, a_set
def generate(q1, q2, answer, model_google, options): sentences = [] for i in options: sentences.append(q1 + answer[i] + q2) sentences = Word2Vec.cleanText(sentences) n_dim = 300 vectors = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in sentences] dataset = [] for a in vectors: sentence = np.zeros((49, 300)) m = len(a) start = int((49 - m) / 2) sentence[start:start + m] = a dataset.append(np.array(sentence)) return dataset
import Word2Vec import gensim import numpy as np import pymysql.cursors # =========================================== # load data connection = pymysql.connect(user='******', password='******', database='GRE') cursor = connection.cursor() commit = "select * from GRES" cursor.execute(commit) Sentences = [each[1] for each in cursor.fetchall()] Sentences = Word2Vec.cleanText(Sentences) # =========================================== # Load model model_google = gensim.models.Word2Vec.load_word2vec_format( '../model/GoogleNews-vectors-negative300.bin', binary=True) # Word2Vec.Train_Wrod2VEc(Sentences, model_google) # =========================================== # Generalize words n_dim = 300 train_vectors = [ Word2Vec.buildWordVector(model_google, z, n_dim) for z in Sentences ] Word2Vec.storeVecs(train_vectors, '../vectors/google_vecs.txt')
import gensim import pymysql.cursors import Word2Vec import Doc2Vec # =========================================== # Load dictionary connection = pymysql.connect(user='******', password='******', database='GRE') cursor = connection.cursor() commit = "select * from GRES" cursor.execute(commit) Sentences = [each[1] for each in cursor.fetchall()] Dictionary1 = Word2Vec.cleanText(Sentences) Dictionary2 = Doc2Vec.Preprocessing(Sentences) # =========================================== # instantiate our DM and DBOW models size = 400 model_dm = gensim.models.Doc2Vec(min_count=0, window=10, size=size, sample=1e-3, negative=5, workers=3) model_dbow = gensim.models.Doc2Vec(min_count=0, window=10, size=size, sample=1e-3, negative=5, dm=0, workers=3)