Exemple #1
0
def init_word_embs_model(model_path,
                         model_type,
                         force_reload=False,
                         top_k=None):
    global WORD_EMBS_MODELS

    if model_type in WORD_EMBS_MODELS and not force_reload:
        WORD_EMBS_MODELS[model_type].top_k = top_k
        return WORD_EMBS_MODELS[model_type]

    if model_type == 'word2vec':
        model = nmw.Word2vec(top_k=top_k)
        model.read(model_path)
    elif model_type == 'glove':
        model = nmw.GloVe(top_k=top_k)
        model.read(model_path)
    elif model_type == 'fasttext':
        model = nmw.Fasttext(top_k=top_k)
        model.read(model_path)
    else:
        raise ValueError(
            'Model type value is unexpected. Expected values include {}'.
            format(model_types))

    WORD_EMBS_MODELS[model_type] = model
    return model
Exemple #2
0
def init_fasttext_model(model_path, force_reload=False):
    # Load model once at runtime
    global FASTTEXT_MODEL
    if FASTTEXT_MODEL and not force_reload:
        return FASTTEXT_MODEL

    fasttext = nmw.Fasttext()
    fasttext.read(model_path)
    FASTTEXT_MODEL = fasttext

    return FASTTEXT_MODEL
Exemple #3
0
    def test_bogus_fasttext_loading(self):
        test_file = os.path.join(os.environ.get("TEST_DIR"), 'res', 'text',
                                 'bogus_fasttext.vec')

        # Change to not supporting incorrect format file after switching to use gensim package
        with self.assertRaises(Exception) as error:
            fasttext = nmw.Fasttext()
            fasttext.read(test_file)
        self.assertIn(
            'cannot copy sequence with size 11 to array axis with dimension 10',
            str(error.exception))
Exemple #4
0
def init_fasttext_model(model_path, force_reload=False, top_k=None):
    # Load model once at runtime
    global FASTTEXT_MODEL
    if model_path in FASTTEXT_MODEL and not force_reload:
        FASTTEXT_MODEL[model_path].top_k = top_k
        return FASTTEXT_MODEL[model_path]

    fasttext = nmw.Fasttext(top_k=top_k)
    fasttext.read(model_path)
    FASTTEXT_MODEL[model_path] = fasttext

    return FASTTEXT_MODEL[model_path]
Exemple #5
0
    def test_bogus_fasttext_loading(self):
        test_file = os.path.join(os.environ.get("TEST_DIR"), 'res', 'text',
                                 'bogus_fasttext.vec')
        expected_vector = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

        fasttext = nmw.Fasttext()
        fasttext.read(test_file)

        for word in fasttext.w2v:
            self.assertSequenceEqual(list(fasttext.w2v[word]), expected_vector)

        self.assertSequenceEqual(
            ["test1", "test2", "test_3", "test 4", "test -> 5"],
            fasttext.get_vocab())

        self.assertEqual(len(fasttext.normalized_vectors), 5)