def word_vec(self, word, use_norm=False):
        """Get the word's representations in vector space, as a 1D numpy array.

        Parameters
        ----------
        word : str
            A single word whose vector needs to be returned.
        use_norm : bool
            If True, returns normalized vector.

        Returns
        -------
        :class:`numpy.ndarray`
            The word's representations in vector space, as a 1D numpy array.

        Raises
        ------
        KeyError
            For words with all ngrams absent, a KeyError is raised.

        Example
        -------
        >>> from gensim.models import FastText
        >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]]
        >>>
        >>> model = FastText(sentences, min_count=1)
        >>> meow_vector = model.word_vec('meow')  # get vector for word

        """
        return FastTextKeyedVectors.word_vec(self.wv, word, use_norm=use_norm)
Ejemplo n.º 2
0
 def test_persistence(self):
     tmpf = get_tmpfile('gensim_fasttext.tst')
     model = FT_gensim(sentences, min_count=1)
     model.save(tmpf)
     self.models_equal(model, FT_gensim.load(tmpf))
     #  test persistence of the KeyedVectors of a model
     wv = model.wv
     wv.save(tmpf)
     loaded_wv = FastTextKeyedVectors.load(tmpf)
     self.assertTrue(np.allclose(wv.syn0_ngrams, loaded_wv.syn0_ngrams))
     self.assertEqual(len(wv.vocab), len(loaded_wv.vocab))
Ejemplo n.º 3
0
 def test_persistence(self):
     model = FT_gensim(sentences, min_count=1)
     model.save(testfile())
     self.models_equal(model, FT_gensim.load(testfile()))
     #  test persistence of the KeyedVectors of a model
     wv = model.wv
     wv.save(testfile())
     loaded_wv = FastTextKeyedVectors.load(testfile())
     self.assertTrue(np.allclose(wv.syn0_ngrams, loaded_wv.syn0_ngrams))
     self.assertEqual(len(wv.vocab), len(loaded_wv.vocab))
     self.assertEqual(len(wv.ngrams), len(loaded_wv.ngrams))
Ejemplo n.º 4
0
    def test_norm_vectors_not_saved(self):
        model = FT_gensim(sentences, min_count=1)
        model.init_sims()
        model.save(testfile())
        loaded_model = FT_gensim.load(testfile())
        self.assertTrue(loaded_model.wv.syn0norm is None)
        self.assertTrue(loaded_model.wv.syn0_ngrams_norm is None)

        wv = model.wv
        wv.save(testfile())
        loaded_kv = FastTextKeyedVectors.load(testfile())
        self.assertTrue(loaded_kv.syn0norm is None)
        self.assertTrue(loaded_kv.syn0_ngrams_norm is None)
    def test_norm_vectors_not_saved(self):
        model = FT_gensim(sentences, min_count=1)
        model.init_sims()
        model.save(testfile())
        loaded_model = FT_gensim.load(testfile())
        self.assertTrue(loaded_model.wv.syn0norm is None)
        self.assertTrue(loaded_model.wv.syn0_ngrams_norm is None)

        wv = model.wv
        wv.save(testfile())
        loaded_kv = FastTextKeyedVectors.load(testfile())
        self.assertTrue(loaded_kv.syn0norm is None)
        self.assertTrue(loaded_kv.syn0_ngrams_norm is None)
Ejemplo n.º 6
0
    def test_norm_vectors_not_saved(self):
        tmpf = get_tmpfile('gensim_fasttext.tst')
        model = FT_gensim(sentences, min_count=1)
        model.init_sims()
        model.save(tmpf)
        loaded_model = FT_gensim.load(tmpf)
        self.assertTrue(loaded_model.wv.vectors_norm is None)
        self.assertTrue(loaded_model.wv.vectors_ngrams_norm is None)

        wv = model.wv
        wv.save(tmpf)
        loaded_kv = FastTextKeyedVectors.load(tmpf)
        self.assertTrue(loaded_kv.vectors_norm is None)
        self.assertTrue(loaded_kv.vectors_ngrams_norm is None)
Ejemplo n.º 7
0
    def test_persistence_fromfile(self):
        with temporary_file(get_tmpfile('gensim_fasttext1.tst')) as corpus_file:
            utils.save_as_line_sentence(sentences, corpus_file)

            tmpf = get_tmpfile('gensim_fasttext.tst')
            model = FT_gensim(corpus_file=corpus_file, min_count=1)
            model.save(tmpf)
            self.models_equal(model, FT_gensim.load(tmpf))
            #  test persistence of the KeyedVectors of a model
            wv = model.wv
            wv.save(tmpf)
            loaded_wv = FastTextKeyedVectors.load(tmpf)
            self.assertTrue(np.allclose(wv.syn0_ngrams, loaded_wv.syn0_ngrams))
            self.assertEqual(len(wv.vocab), len(loaded_wv.vocab))
Ejemplo n.º 8
0
 def word_vec(self, word, use_norm=False):
     return FastTextKeyedVectors.word_vec(self.wv, word, use_norm=use_norm)
Ejemplo n.º 9
0
 def initialize_word_vectors(self):
     self.wv = FastTextKeyedVectors()
     self.wv.min_n = self.min_n
     self.wv.max_n = self.max_n
 def initialize_word_vectors(self):
     """Initializes FastTextKeyedVectors instance to store all vocab/ngram vectors for the model."""
     self.wv = FastTextKeyedVectors()
     self.wv.min_n = self.min_n
     self.wv.max_n = self.max_n
Ejemplo n.º 11
0
from sklearn.feature_extraction.text import TfidfVectorizer
from gensim.models.wrappers.fasttext import FastTextKeyedVectors
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle as pkl
from dostoevsky.tokenization import RegexTokenizer
from dostoevsky.models import FastTextSocialNetworkModel
import datetime

tokenizer = RegexTokenizer()
model = FastTextSocialNetworkModel(tokenizer=tokenizer)

# morph analyzer for text lemmatization
morph = pymorphy2.MorphAnalyzer()
fasttext = FastTextKeyedVectors.load('187/model.model')
pos_log_reg = pkl.load(open('pickles/pos_log_reg.pkl', 'rb'))
neg_log_reg = pkl.load(open('pickles/neg_log_reg.pkl', 'rb'))
pos_log_reg_dost = pkl.load(open('pickles/pos_log_reg_dost.pkl', 'rb'))
neg_log_reg_dost = pkl.load(open('pickles/neg_log_reg_dost.pkl', 'rb'))

old_data = pd.read_pickle('data/new_data.pkl')
old_data['index'] = old_data.index
training_data = pd.read_csv('data/training_data_with_razmetka_final.csv')

data_new = training_data.merge(old_data, on=['index', 'message'])
cut_date = lambda x: datetime.date(x.year, x.month, x.day)
data_new['local_datetime'] = pd.to_datetime(
    data_new.local_datetime).apply(cut_date)

Ejemplo n.º 12
0
 def word_vec(self, word, use_norm=False):
     return FastTextKeyedVectors.word_vec(self.wv, word, use_norm=use_norm)