def _do_train_job(self, sentences, alpha, inits): """Train a single batch of sentences. Return 2-tuple `(effective word count after ignoring unknown words and sentence length trimming, total word count)`. Parameters ---------- sentences : iterable of iterables The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. alpha : float The current learning rate. inits : (:class:`numpy.ndarray`, :class:`numpy.ndarray`) Each worker's private work memory. Returns ------- (int, int) Tuple of (effective word count after ignoring unknown words and sentence length trimming, total word count) """ work, neu1 = inits tally = 0 if self.sg: tally += train_batch_sg(self, sentences, alpha, work, neu1) else: tally += train_batch_cbow(self, sentences, alpha, work, neu1) return tally, self._raw_word_count(sentences)
def _do_train_job(self, sentences, alpha, inits): """Train a single batch of sentences. Return 2-tuple `(effective word count after ignoring unknown words and sentence length trimming, total word count)`. Parameters ---------- sentences : iterable of list of str Can be simply a list of lists of tokens, but for larger corpora, consider an iterable that streams the sentences directly from disk/network. See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus` or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples. alpha : float The current learning rate. inits : tuple of (:class:`numpy.ndarray`, :class:`numpy.ndarray`) Each worker's private work memory. Returns ------- (int, int) Tuple of (effective word count after ignoring unknown words and sentence length trimming, total word count) """ work, neu1 = inits tally = 0 if self.sg: tally += train_batch_sg(self, sentences, alpha, work, neu1) else: tally += train_batch_cbow(self, sentences, alpha, work, neu1) return tally, self._raw_word_count(sentences)