def generate(q1, q2, answer, model_google, options):
    sentences = []
    for i in options:
        sentences.append(q1 + answer[i] + q2)
    sentences = Word2Vec.cleanText(sentences)
    n_dim = 300
    vectors = [
        Word2Vec.buildWordVector(model_google, z, n_dim) for z in sentences
    ]
    dataset = []
    for a in vectors:
        sentence = np.zeros((49, 300))
        m = len(a)
        start = int((49 - m) / 2)
        sentence[start:start + m] = a
        dataset.append(np.array(sentence))

    question = []
    for i in options:
        question.append(q1 + q2)
    question = Word2Vec.cleanText(question)
    n_dim = 300
    q = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in question]
    q_set = []
    for a in q:
        sentence = np.zeros((49, 300))
        m = len(a)
        start = int((49 - m) / 2)
        sentence[start:start + m] = a
        q_set.append(np.array(sentence))

    option = []
    for i in options:
        option.append(answer[i])
    option = Word2Vec.cleanText(option)
    n_dim = 300
    a = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in option]
    a_set = []
    for a in a:
        sentence = np.zeros((4, 300))
        m = len(a)
        if not m == 0:
            start = int((4 - m) / 2)
            sentence[start:start + m] = a
        a_set.append(np.array(sentence))
    return dataset, q_set, a_set
def generate(q1, q2, answer, model_google, options):
    sentences = []
    for i in options:
        sentences.append(q1 + answer[i] + q2)
    sentences = Word2Vec.cleanText(sentences)
    n_dim = 300
    vectors = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in sentences]
    dataset = []
    for a in vectors:
        sentence = np.zeros((49, 300))
        m = len(a)
        start = int((49 - m) / 2)
        sentence[start:start + m] = a
        dataset.append(np.array(sentence))
    return dataset
Esempio n. 3
0
import Word2Vec
import gensim
import numpy as np
import pymysql.cursors

# ===========================================
# load data
connection = pymysql.connect(user='******', password='******', database='GRE')
cursor = connection.cursor()
commit = "select * from GRES"
cursor.execute(commit)
Sentences = [each[1] for each in cursor.fetchall()]
Sentences = Word2Vec.cleanText(Sentences)

# ===========================================
# Load model
model_google = gensim.models.Word2Vec.load_word2vec_format(
    '../model/GoogleNews-vectors-negative300.bin', binary=True)
# Word2Vec.Train_Wrod2VEc(Sentences, model_google)

# ===========================================
# Generalize words
n_dim = 300
train_vectors = [
    Word2Vec.buildWordVector(model_google, z, n_dim) for z in Sentences
]
Word2Vec.storeVecs(train_vectors, '../vectors/google_vecs.txt')
import Word2Vec

# ===========================================
# load data
connection = pymysql.connect(user='******', password='******', database='GRE')
cursor = connection.cursor()
commit = "select * from GRES2"
cursor.execute(commit)
Sentences = [each[1] for each in cursor.fetchall()]
Sentences = Word2Vec.cleanText(Sentences)

# ===========================================
# Load model
model_google = gensim.models.KeyedVectors.load_word2vec_format('../GoogleModel/GoogleNews-vectors-negative300.bin', binary=True)
# Word2Vec.Train_Wrod2VEc(Sentences, model_google)

# ===========================================
# Generalize words
n_dim = 300
train_vectors = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in Sentences]
Word2Vec.storeVecs(train_vectors, '../data for input1/q_vecs.pkl')

commit = "select * from GRES2"
cursor.execute(commit)
Sentences = [each[2] for each in cursor.fetchall()]
Sentences = Word2Vec.cleanText(Sentences)

# Generalize words
train_vectors = [Word2Vec.buildWordVector(model_google, z, n_dim) for z in Sentences]
Word2Vec.storeVecs(train_vectors, '../data for input1/a_vecs.pkl')
Esempio n. 5
0
import Word2Vec
import gensim
import numpy as np
import pymysql.cursors

# ===========================================
# load data
connection = pymysql.connect(user='******', password='******', database='GRE')
cursor = connection.cursor()
commit = "select * from GRES"
cursor.execute(commit)
Sentences = [each[1] for each in cursor.fetchall()]
Sentences = Word2Vec.cleanText(Sentences)

# ===========================================
# Train model
model_w2v = gensim.models.Word2Vec.load('../model/model_w2v')
Word2Vec.Train_Wrod2VEc(Sentences, model_w2v)

# ===========================================
# Generalize words
n_dim = 300
train_vectors = [
    Word2Vec.buildWordVector(model_w2v, z, n_dim) for z in Sentences
]
Word2Vec.storeVecs(train_vectors, '../model/w2v_vecs.txt')