def add_new_labels(sentences, model): """ Add new labels (for new docs) to the doc2vec model's `self.vocab`. from: <https://gist.github.com/zseder/4201551d7f8608f0b82b> """ sentence_no = -1 total_words = 0 vocab = model.vocab #model_sentence_n = len([l for l in vocab if l.startswith("SENT")]) model_sentence_n = max(int(l.split('_')[-1]) for l in vocab if l.startswith("SENT")) n_sentences = 0 for sentence_no, sentence in enumerate(sentences): sentence_length = len(sentence.words) for label in sentence.labels: label_e = label.split("_") label_n = int(label_e[1]) + model_sentence_n label = "{0}_{1}".format(label_e[0], label_n) total_words += 1 if label in vocab: vocab[label].count += sentence_length else: vocab[label] = Vocab(count=sentence_length) vocab[label].index = len(model.vocab) - 1 vocab[label].code = [0] vocab[label].sample_probability = 1. model.index2word.append(label) n_sentences += 1 return n_sentences