def test_embedding_trainer_doc2vec_local(self, mock_getcwd): document_schema_fields = [ 'description', 'experienceRequirements', 'qualifications', 'skills' ] with tempfile.TemporaryDirectory() as td: mock_getcwd.return_value = td job_postings_generator = JobPostingCollectionSample(num_records=30) corpus_generator = Doc2VecGensimCorpusCreator( job_postings_generator, document_schema_fields=document_schema_fields) d2v = Doc2VecModel(storage=FSStore(td), size=10, min_count=3, iter=4, window=6, workers=3) trainer = EmbeddingTrainer(corpus_generator, d2v) trainer.train(lookup=True) trainer.save_model() vocab_size = len(d2v.wv.vocab.keys()) assert d2v.model_name == trainer.model_name assert set(os.listdir(os.getcwd())) == set([trainer.model_name]) self.assertDictEqual(trainer.lookup_dict, d2v.lookup_dict) # Save as different name d2v.save('other_name.model') assert set(os.listdir(os.getcwd())) == set( [trainer.model_name, 'other_name.model']) # Load d2v_loaded = Doc2VecModel.load(FSStore(td), trainer.model_name) assert d2v_loaded.metadata["embedding_model"][ "model_type"] == trainer.metadata["embedding_model"][ "model_type"] # Change the store directory new_path = os.path.join(td, 'other_directory') trainer.save_model(FSStore(new_path)) assert set(os.listdir(new_path)) == set([trainer.model_name])