def train_document_dbow( model, doc_words, doctag_indexes, alpha, work=None, train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None, ): """ Update distributed bag of words model ("PV-DBOW") by training on a single document. Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. The document is provided as `doc_words`, a list of word tokens which are looked up in the model's vocab dictionary, and `doctag_indexes`, which provide indexes into the doctag_vectors array. If `train_words` is True, simultaneously train word-to-word (not just doc-to-word) examples, exactly as per Word2Vec skip-gram training. (Without this option, word vectors are neither consulted nor updated during DBOW doc vector training.) Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to prevent learning-updates to those respective model weights, as if using the (partially-)frozen model to infer other compatible vectors. This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version from doc2vec_inner instead. """ if doctag_vectors is None: doctag_vectors = model.docvecs.doctag_syn0 if doctag_locks is None: doctag_locks = model.docvecs.doctag_syn0_lockf if train_words and learn_words: train_sentence_sg(model, doc_words, alpha, work) for doctag_index in doctag_indexes: for word in doc_words: train_sg_pair( model, word, doctag_index, alpha, learn_vectors=learn_doctags, learn_hidden=learn_hidden, context_vectors=doctag_vectors, context_locks=doctag_locks, ) return len(doc_words)
def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None, train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None): """ Update distributed bag of words model ("PV-DBOW") by training on a single document. Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. The document is provided as `doc_words`, a list of word tokens which are looked up in the model's vocab dictionary, and `doctag_indexes`, which provide indexes into the doctag_vectors array. If `train_words` is True, simultaneously train word-to-word (not just doc-to-word) examples, exactly as per Word2Vec skip-gram training. (Without this option, word vectors are neither consulted nor updated during DBOW doc vector training.) Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to prevent learning-updates to those respective model weights, as if using the (partially-)frozen model to infer other compatible vectors. This is the non-optimized, Python version. If you have cython installed, gensim will use the optimized version from doc2vec_inner instead. """ if doctag_vectors is None: doctag_vectors = model.docvecs.doctag_syn0 if doctag_locks is None: doctag_locks = model.docvecs.doctag_syn0_lockf if train_words and learn_words: train_sentence_sg(model, doc_words, alpha, work) for doctag_index in doctag_indexes: for word in doc_words: train_sg_pair(model, word, doctag_index, alpha, learn_vectors=learn_doctags, learn_hidden=learn_hidden, context_vectors=doctag_vectors, context_locks=doctag_locks) return len(doc_words)