Ejemplos de FastText.save_word2vec_format en Python

Lenguaje de programación: Python

Namespace/Package Name: gensim.models

Clase / Tipo: FastText

Método / Función: save_word2vec_format

Ejemplos en hotexamples.com: 2

Python FastText.save_word2vec_format - 2 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de gensim.models.FastText.save_word2vec_format extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

FastText(30)

save(30)

train(30)

load_fasttext_format(30)

build_vocab(30)

load(26)

most_similar(13)

init_sims(10)

similarity(3)

add(2)

fit(2)

similar_by_word(2)

save_word2vec_format(2)

summary(1)

similar_by_vector(1)

save_model(1)

callbacks(1)

prepare_weights(1)

compile(1)

min_alpha(1)

__init__(1)

iter(1)

fit_transform(1)

evaluate(1)

transform(1)

Ejemplo n.º 1

Mostrar archivo

class Word2Vector(object):
    def __init__(self,
                 src_file,
                 dst_file,
                 size=300,
                 window=5,
                 min_count=10,
                 hs=0,
                 sg=0,
                 learning_rate=0.025):
        self.src_file = src_file
        self.model_file = dst_file
        self.size = size
        self.window = window
        self.min_count = min_count
        self.hs = hs  # 1: 分层softmax, 0: 不使用分层softmax
        self.sg = sg  # 1: skip-gram,  0: CBOW
        self.alpha = learning_rate
        self.workers = multiprocessing.cpu_count()

    def train(self, sentences):
        self.model = FastText(sentences,
                              size=self.size,
                              window=self.window,
                              min_count=self.min_count,
                              sg=self.sg,
                              workers=self.workers)
        self.model.save(self.model_file)
        self.model.save_word2vec_format(self.model_file + '.bin', binary=True)

    def train_model(self):
        sentences = LineSentence(self.src_file)
        self.train(sentences)

    def online_train_model(self, sentences):  # 在线训练
        self.model.build_vocab(LineSentence(sentences))
        self.model.train(total_examples=self.model.corpus_count,
                         epochs=self.model.iter)

    def online_train_model(self, file_name, isdir=True):  # 在线训练
        if isdir:
            sentences = PathLineSentences(self.src_file)
        else:
            sentences = LineSentence(self.src_file)
        self.online_train_model(sentences)

    def train_dir_model(self):
        sentences = PathLineSentences(self.src_file)
        self.train(sentences)

    def load_model(self, model_name):
        self.model = FastText.load(model_name)

    def show_similarity(self, word1, word2):
        return self.model.wv.similarity(word1, word2)

    def show_word_vector(self, word):
        return self.model.wv[word]

Ejemplo n.º 2

Mostrar archivo

'''
total_examples = new_wv.corpus_count
new_wv.build_vocab([list(pubmed_wv.vocab.keys())], update=True)
new_wv.intersect_word2vec_format(preTrainedPath, binary=True, lockf=1.0)
'''

### Train for 2 epochs
new_wv.train(sentences, epochs=2) # , total_examples=total_examples
print('Time to train the model 2 epochs: {} mins'.format(round((time() - t) / 60, 2)))
print('----------------------------')
print(new_wv.most_similar(positive=['treatment']))
print(new_wv.most_similar(positive=['female']))
print(new_wv.most_similar(positive=['history']))
print(new_wv.most_similar(positive=['disease']))
print(new_wv.most_similar(positive=['brain']))
new_wv.save_word2vec_format('mimic-pubmed_2.bin', binary=True)
print('----------------------------')


# Train for 10 epochs
new_wv.train(sentences, epochs=8) # , total_examples=total_examples
print('Time to train the model 10 epochs: {} mins'.format(round((time() - t) / 60, 2)))
print('----------------------------')
print(new_wv.most_similar(positive=['treatment']))
print(new_wv.most_similar(positive=['female']))
print(new_wv.most_similar(positive=['history']))
print(new_wv.most_similar(positive=['disease']))
print(new_wv.most_similar(positive=['brain']))
new_wv.save_word2vec_format('mimic-pubmed_10.bin', binary=True)
print('----------------------------')