Python loadGloveModel 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: glove_utils

메소드/함수: loadGloveModel

hotexamples.com에서의 예제들: 4

Python loadGloveModel - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 glove_utils.loadGloveModel에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

import os
#import nltk
import re
from collections import Counter

import data_utils
import glove_utils

IMDB_PATH = 'aclImdb'
MAX_VOCAB_SIZE = 50000
GLOVE_PATH = 'glove.840B.300d.txt'

if not os.path.exists('aux_files'):
    os.mkdir('aux_files')
imdb_dataset = data_utils.IMDBDataset(path=IMDB_PATH,
                                      max_vocab_size=MAX_VOCAB_SIZE)

# save the dataset
with open(('aux_files/dataset_%d.pkl' % (MAX_VOCAB_SIZE)), 'wb') as f:
    pickle.dump(imdb_dataset, f)

# create the glove embeddings matrix (used by the classification model)
glove_model = glove_utils.loadGloveModel(GLOVE_PATH)
glove_embeddings, _ = glove_utils.create_embeddings_matrix(
    glove_model, imdb_dataset.dict, imdb_dataset.full_dict)
# save the glove_embeddings matrix
np.save('aux_files/embeddings_glove_%d.npy' % (MAX_VOCAB_SIZE),
        glove_embeddings)

print('All done')

예제 #2

파일 보기

IMDB_PATH = 'aclImdb'
MAX_VOCAB_SIZE = 50000
GLOVE_PATH = 'glove.840B.300d.txt'

if not os.path.exists('aux_files'):
    os.mkdir('aux_files')
imdb_dataset = data_utils.IMDBDataset(path=IMDB_PATH,
                                      max_vocab_size=MAX_VOCAB_SIZE)

# save the dataset
with open(('aux_files/dataset_%d.pkl' % (MAX_VOCAB_SIZE)), 'wb') as f:
    pickle.dump(imdb_dataset, f)

# create the glove embeddings matrix (used by the classification model)
glove_model = glove_utils.loadGloveModel(GLOVE_PATH)
glove_embeddings, _ = glove_utils.create_embeddings_matrix(
    glove_model, imdb_dataset.dict, imdb_dataset.full_dict)
# save the glove_embeddings matrix
np.save('aux_files/embeddings_glove_%d.npy' % (MAX_VOCAB_SIZE),
        glove_embeddings)

# Load the counterfitted-vectors (used by our attack)
glove2 = glove_utils.loadGloveModel('counter-fitted-vectors.txt')
# create embeddings matrix for our vocabulary
counter_embeddings, missed = glove_utils.create_embeddings_matrix(
    glove2, imdb_dataset.dict, imdb_dataset.full_dict)

# save the embeddings for both words we have found, and words that we missed.
np.save(('aux_files/embeddings_counter_%d.npy' % (MAX_VOCAB_SIZE)),
        counter_embeddings)

예제 #3

파일 보기

파일: build_embeddings_yelp.py 프로젝트: EthanCDD/Adversarial-Attack_Genetic-attack

MAX_VOCAB_SIZE = 50000
GLOVE_PATH = '/content/drive/My Drive/Master_Final_Project/Genetic_attack/Code/nlp_adversarial_example_master_pytorch/glove.840B.300d.txt'
COUNTER_PATH = 'counter-fitted-vectors.txt'

if not os.path.exists('aux_files'):
    os.mkdir('aux_files')
yelp_dataset = data_utils_yelp.YELPDataset(path=IMDB_PATH,
                                           max_vocab_size=MAX_VOCAB_SIZE)

# save the dataset
# 将数据序列化保存为pickle文件
with open(('aux_files/dataset_%d.pkl' % (MAX_VOCAB_SIZE)), 'wb') as f:
    pickle.dump(yelp_dataset, f)

# create the glove embeddings matrix (used by the classification model)
glove_model = glove_utils.loadGloveModel(GLOVE_PATH)
# convert all valid words into matrix and their individual labels are same as their column order [300, n_of_words]
glove_embeddings, _ = glove_utils.create_embeddings_matrix(
    glove_model, yelp_dataset.dict, yelp_dataset.full_dict)
# save the glove_embeddings matrix
np.save('aux_files/embeddings_glove_%d.npy' % (MAX_VOCAB_SIZE),
        glove_embeddings)

# Load the counterfitted-vectors (used by our attack)
glove2 = glove_utils.loadGloveModel(COUNTER_PATH)
# create embeddings matrix for our vocabulary
counter_embeddings, missed = glove_utils.create_embeddings_matrix(
    glove2, yelp_dataset.dict, yelp_dataset.full_dict)

# save the embeddings for both words we have found, and words that we missed.
np.save(('aux_files/embeddings_counter_%d.npy' % (MAX_VOCAB_SIZE)),

예제 #4

파일 보기

파일: nli_compute_dist_matrix.py 프로젝트: yunfeiyue/nlp_adversarial_examples

import pickle
import numpy as np
import glove_utils

with open('./nli_tokenizer.pkl', 'rb') as fh:
    tokenizer = pickle.load(fh)

nli_words_index = tokenizer.word_index

inv_word_index = {i: w for (w, i) in nli_words_index.items()}
MAX_VOCAB_SIZE = len(nli_words_index)
# Load the counterfitted-vectors (used by our attack)
glove2 = glove_utils.loadGloveModel('counter-fitted-vectors.txt')
# create embeddings matrix for our vocabulary
counter_embeddings, missed = glove_utils.create_embeddings_matrix(
    glove2, nli_words_index, None)

# save the embeddings for both words we have found, and words that we missed.
np.save(('aux_files/nli_embeddings_counter_%d.npy' % (MAX_VOCAB_SIZE)),
        counter_embeddings)
np.save(('aux_files/nli_missed_embeddings_counter_%d.npy' % (MAX_VOCAB_SIZE)),
        missed)

print('Done preparing the embedding matrix.')
print('Computing the distance matrix.. this may take a while')
c_ = -2 * np.dot(counter_embeddings.T, counter_embeddings)
a = np.sum(np.square(counter_embeddings), axis=0).reshape((1, -1))
b = a.T
dist = a + b + c_
np.save(('aux_files/nli_dist_counter_%d.npy' % (MAX_VOCAB_SIZE)), dist)