Exemple #1
0
    def train(self, sentences, total_examples=None, total_words=None,
              epochs=None, start_alpha=None, end_alpha=None,
              word_count=0, queue_factor=2, report_delay=1.0):
        self.neg_labels = []
        if self.negative > 0:
                # precompute negative labels optimization for pure-python training
                self.neg_labels = zeros(self.negative + 1)
                self.neg_labels[0] = 1.

        Word2Vec.train(self, sentences, total_examples=self.corpus_count, epochs=self.iter,
            start_alpha=self.alpha, end_alpha=self.min_alpha)
        self.get_vocab_word_vecs()
Exemple #2
0
    def train(self,
              sentences,
              total_examples=None,
              total_words=None,
              epochs=None,
              start_alpha=None,
              end_alpha=None,
              word_count=0,
              queue_factor=2,
              report_delay=1.0):
        self.neg_labels = []
        if self.negative > 0:
            # precompute negative labels optimization for pure-python training
            self.neg_labels = zeros(self.negative + 1)
            self.neg_labels[0] = 1.

        Word2Vec.train(self,
                       sentences,
                       total_examples=self.corpus_count,
                       epochs=self.iter,
                       start_alpha=self.alpha,
                       end_alpha=self.min_alpha)
        self.get_vocab_word_vecs()
    def train(self,
              sentences,
              total_examples=None,
              total_words=None,
              epochs=None,
              start_alpha=None,
              end_alpha=None,
              word_count=0,
              queue_factor=2,
              report_delay=1.0):
        """Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
        For FastText, each sentence must be a list of unicode strings. (Subclasses may accept other examples.)

        To support linear learning-rate decay from (initial) alpha to min_alpha, and accurate
        progress-percentage logging, either total_examples (count of sentences) or total_words (count of
        raw words in sentences) **MUST** be provided (if the corpus is the same as was provided to
        :meth:`~gensim.models.fasttext.FastText.build_vocab()`, the count of examples in that corpus
        will be available in the model's :attr:`corpus_count` property).

        To avoid common mistakes around the model's ability to do multiple training passes itself, an
        explicit `epochs` argument **MUST** be provided. In the common and recommended case,
        where :meth:`~gensim.models.fasttext.FastText.train()` is only called once,
        the model's cached `iter` value should be supplied as `epochs` value.

        Parameters
        ----------
        sentences : iterable of iterables
            The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora,
            consider an iterable that streams the sentences directly from disk/network.
            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
            or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples.
        total_examples : int
            Count of sentences.
        total_words : int
            Count of raw words in sentences.
        epochs : int
            Number of iterations (epochs) over the corpus.
        start_alpha : float
            Initial learning rate.
        end_alpha : float
            Final learning rate. Drops linearly from `start_alpha`.
        word_count : int
            Count of words already trained. Set this to 0 for the usual
            case of training on all words in sentences.
        queue_factor : int
            Multiplier for size of queue (number of workers * queue_factor).
        report_delay : float
            Seconds to wait before reporting progress.

        Examples
        --------
        >>> from gensim.models import FastText
        >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]]
        >>>
        >>> model = FastText(min_count=1)
        >>> model.build_vocab(sentences)
        >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.iter)

        """
        self.neg_labels = []
        if self.negative > 0:
            # precompute negative labels optimization for pure-python training
            self.neg_labels = zeros(self.negative + 1)
            self.neg_labels[0] = 1.

        Word2Vec.train(self,
                       sentences,
                       total_examples=self.corpus_count,
                       epochs=self.iter,
                       start_alpha=self.alpha,
                       end_alpha=self.min_alpha)
        self.get_vocab_word_vecs()