paragraphs_per_article = args.paragraphs_per_article
    ucbl = args.ucbl_file
    output = args.output_file
    v_size = args.vec_size
    window_size = args.window
    min_count = args.min_count
    workers = args.workers
    n_iter = args.iter
    corpus = LoadFileJson()

    if paragraphs_per_article < 1 and max_nb_wiki_paragraphs > 0:
        print 'You need to set "paragraphs_per_article" to a number bigger than 0 if you want to load wikipedia articles'
    else:
        data = corpus.LoadDocumentsIstexAndUCBL(istex, ucbl, wiki,
                                                max_nb_wiki_paragraphs,
                                                paragraphs_per_article)
        model = Doc2Vec(data,
                        min_count=min_count,
                        size=v_size,
                        workers=workers,
                        iter=n_iter,
                        window=window_size)

        print "Vocabulary size after training: ", len(model.vocab.keys())
        print "count of documents", corpus.count + corpus.wiki_count

        f = open(output + "keysIndex", "w")
        json.dump(corpus.index, f)
        f.close
        Doc2Vec.save(model, output)
def _save_for_inference(model: Doc2Vec, path_name: str) -> None:
    model.delete_temporary_training_data(keep_doctags_vectors=True,
                                         keep_inference=True)
    model.save(path_name)
def save_word_model(model: Doc2Vec, no_classes: str = '', filename: str = ''):

    if not (filename):
        filename = 'Doc2Vec' + '_' + str(no_classes) + "classes"

    model.save('model/' + filename + '.bin')