def generate(q1, q2, answer, model_google, options):
    sentences = []
    for i in options:
        sentences.append(q1 + answer[i] + q2)
    sentences = Word2Vec.cleanText(sentences)
    n_dim = 300
    vectors = [
        Word2Vec.buildWordVector(model_google, z, n_dim) for z in sentences
    ]
    dataset = []
    for a in vectors:
        sentence = np.zeros((49, 300))
        m = len(a)
        start = int((49 - m) / 2)
        sentence[start:start + m] = a
        dataset.append(np.array(sentence))

    question = []
    for i in options:
        question.append(q1 + q2)
    question = Word2Vec.cleanText(question)
    n_dim = 300
    q = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in question]
    q_set = []
    for a in q:
        sentence = np.zeros((49, 300))
        m = len(a)
        start = int((49 - m) / 2)
        sentence[start:start + m] = a
        q_set.append(np.array(sentence))

    option = []
    for i in options:
        option.append(answer[i])
    option = Word2Vec.cleanText(option)
    n_dim = 300
    a = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in option]
    a_set = []
    for a in a:
        sentence = np.zeros((4, 300))
        m = len(a)
        if not m == 0:
            start = int((4 - m) / 2)
            sentence[start:start + m] = a
        a_set.append(np.array(sentence))
    return dataset, q_set, a_set
Ejemplo n.º 2
0
def generate(q1, q2, answer, model_google, options):
    sentences = []
    for i in options:
        sentences.append(q1 + answer[i] + q2)
    sentences = Word2Vec.cleanText(sentences)
    n_dim = 300
    vectors = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in sentences]
    dataset = []
    for a in vectors:
        sentence = np.zeros((49, 300))
        m = len(a)
        start = int((49 - m) / 2)
        sentence[start:start + m] = a
        dataset.append(np.array(sentence))
    return dataset
Ejemplo n.º 3
0
import Word2Vec
import gensim
import numpy as np
import pymysql.cursors

# ===========================================
# load data
connection = pymysql.connect(user='******', password='******', database='GRE')
cursor = connection.cursor()
commit = "select * from GRES"
cursor.execute(commit)
Sentences = [each[1] for each in cursor.fetchall()]
Sentences = Word2Vec.cleanText(Sentences)

# ===========================================
# Load model
model_google = gensim.models.Word2Vec.load_word2vec_format(
    '../model/GoogleNews-vectors-negative300.bin', binary=True)
# Word2Vec.Train_Wrod2VEc(Sentences, model_google)

# ===========================================
# Generalize words
n_dim = 300
train_vectors = [
    Word2Vec.buildWordVector(model_google, z, n_dim) for z in Sentences
]
Word2Vec.storeVecs(train_vectors, '../vectors/google_vecs.txt')
Ejemplo n.º 4
0
import gensim
import pymysql.cursors
import Word2Vec
import Doc2Vec

# ===========================================
# Load dictionary
connection = pymysql.connect(user='******', password='******', database='GRE')
cursor = connection.cursor()
commit = "select * from GRES"
cursor.execute(commit)
Sentences = [each[1] for each in cursor.fetchall()]
Dictionary1 = Word2Vec.cleanText(Sentences)
Dictionary2 = Doc2Vec.Preprocessing(Sentences)

# ===========================================
# instantiate our DM and DBOW models
size = 400
model_dm = gensim.models.Doc2Vec(min_count=0,
                                 window=10,
                                 size=size,
                                 sample=1e-3,
                                 negative=5,
                                 workers=3)
model_dbow = gensim.models.Doc2Vec(min_count=0,
                                   window=10,
                                   size=size,
                                   sample=1e-3,
                                   negative=5,
                                   dm=0,
                                   workers=3)