import sys from configuration.tfidf.config import TfidfConfig from model.util.file_parser import parse_dir_json from search_engine.tfidf.search_engine import SearchEngine from search_engine.tfidf.logger.logger_config import init_logger if __name__ == '__main__': init_logger() config = TfidfConfig(sys.argv[1], 'tfidf_search_engine').get_current_config() docs = parse_dir_json(config['data_path']) searchEngine = SearchEngine() searchEngine.load_model(config['model_path'], config['dict_path']) searchEngine.dummy_index(docs) searchEngine.save_index(config['index_path'], config['url_path']) print(searchEngine.dummy_search("israel bank money")[:3]) print(searchEngine.dummy_search("biggest wars in europe history")[:3]) print(searchEngine.dummy_search("bitcoin and blockchain are future")[:3]) print(searchEngine.dummy_search("gay marriages in europe")[:3]) print(searchEngine.dummy_search("USA trump foreign policy")[:3])
from gensim.corpora import Dictionary from gensim.models import CoherenceModel, LdaMulticore from configuration.lda.config import LdaConfig from model.lda.logger.logger_config import init_logger from model.lda.preprocess import Preprocessor from model.util.file_parser import parse_dir_json if __name__ == '__main__': init_logger() log = logging.getLogger('lda_model') config = LdaConfig(sys.argv[1], 'lda_model').get_current_config() _, docs = zip(*parse_dir_json(config['data_path'])) preprocessed_docs = Preprocessor( max_workers=config['max_workers']).process_docs(docs) log.info("Loading model from %s", config['model_path']) lda_model = LdaMulticore.load(config['model_path']) log.info("Loading dictionary from %s", config['dict_path']) dictionary = Dictionary.load(config['dict_path']) coherence_model_lda = CoherenceModel(model=lda_model, texts=preprocessed_docs, dictionary=dictionary, coherence='c_v') coherence_lda = coherence_model_lda.get_coherence()