コード例 #1
0
import sys

from configuration.tfidf.config import TfidfConfig
from model.util.file_parser import parse_dir_json
from search_engine.tfidf.search_engine import SearchEngine
from search_engine.tfidf.logger.logger_config import init_logger

if __name__ == '__main__':
    init_logger()

    config = TfidfConfig(sys.argv[1],
                         'tfidf_search_engine').get_current_config()

    docs = parse_dir_json(config['data_path'])

    searchEngine = SearchEngine()
    searchEngine.load_model(config['model_path'], config['dict_path'])
    searchEngine.dummy_index(docs)

    searchEngine.save_index(config['index_path'], config['url_path'])

    print(searchEngine.dummy_search("israel bank money")[:3])
    print(searchEngine.dummy_search("biggest wars in europe history")[:3])

    print(searchEngine.dummy_search("bitcoin and blockchain are future")[:3])

    print(searchEngine.dummy_search("gay marriages in europe")[:3])

    print(searchEngine.dummy_search("USA trump foreign policy")[:3])
コード例 #2
0
from gensim.corpora import Dictionary
from gensim.models import CoherenceModel, LdaMulticore

from configuration.lda.config import LdaConfig
from model.lda.logger.logger_config import init_logger
from model.lda.preprocess import Preprocessor
from model.util.file_parser import parse_dir_json

if __name__ == '__main__':
    init_logger()
    log = logging.getLogger('lda_model')

    config = LdaConfig(sys.argv[1], 'lda_model').get_current_config()

    _, docs = zip(*parse_dir_json(config['data_path']))

    preprocessed_docs = Preprocessor(
        max_workers=config['max_workers']).process_docs(docs)

    log.info("Loading model from %s", config['model_path'])
    lda_model = LdaMulticore.load(config['model_path'])
    log.info("Loading dictionary from %s", config['dict_path'])
    dictionary = Dictionary.load(config['dict_path'])

    coherence_model_lda = CoherenceModel(model=lda_model,
                                         texts=preprocessed_docs,
                                         dictionary=dictionary,
                                         coherence='c_v')

    coherence_lda = coherence_model_lda.get_coherence()