logging.info("Initializing lexicon.") lexicon = DictLexicon(input_path) lexicon.load() logging.info("Opening index.") index = InvertedIndex(input_path) index.open() logging.info("Initializing searcher.") searcher = Searcher(index, "sentence_id") logging.info("Initializing storage.") storage = LdbStorage(input_path) storage.open_db() if context_input is not None: logging.info("Initializing context lexicon.") c_lexicon = DictLexicon(context_input) c_lexicon.load() logging.info("Opening context index.") c_index = InvertedIndex(context_input) c_index.open() logging.info("Initializing context searcher.") if arguments.language == "rus": c_searcher = Searcher(c_index, "ruwac_document_id")
logging.info("Output: %s" % output_path) if os.path.exists(output_path): shutil.rmtree(output_path) os.makedirs(output_path) logging.info("Initializing lexicon.") lexicon = DictLexicon(output_path) lexicon.load() logging.info("Initializing storage.") storage = LdbStorage(output_path) storage.init_db() storage.open_db() logging.info("Initializing index.") index = InvertedIndex(output_path, field_properties=[ ("document_id", numpy.int32), ]) index.init_index() index.open() logging.info("Initializing ruwac stream and its parser.") if arguments.language == "spa" or arguments.language == "eng":
input_path = arguments.input output_path = arguments.output logging.info("Input: %s" % input_path) logging.info("Output: %s" % output_path) if os.path.exists(output_path): shutil.rmtree(output_path) os.makedirs(output_path) logging.info("Initializing lexicon.") lexicon = DictLexicon(output_path) logging.info("Initializing storage.") storage = LdbStorage(output_path) storage.init_db() storage.open_db() logging.info("Initializing index.") index = InvertedIndex(output_path, field_properties=[ ("sentence_id", numpy.int32), ]) index.init_index() index.open() logging.info("Initializing sentence stream and its parser.") sentence_stream = LFSentenceStream(input_path, language=arguments.language) sentence_parser = LFSentenceParser() sentence_indexer = LFSentenceIndexer(lexicon)
logging.info("Query: %s" % query_paths) logging.info("Output: %s" % output_path) logging.info("Initializing lexicon.") lexicon = DictLexicon(input_path) lexicon.load() logging.info("Opening index.") index = InvertedIndex(input_path) index.open() logging.info("Initializing searcher.") searcher = Searcher(index, "sentence_id") logging.info("Initializing storage.") storage = LdbStorage(input_path) storage.open_db() if context_input is not None: logging.info("Initializing context lexicon.") c_lexicon = DictLexicon(context_input) c_lexicon.load() logging.info("Opening context index.") c_index = InvertedIndex(context_input) c_index.open() logging.info("Initializing context searcher.") if arguments.language == "rus": c_searcher = Searcher(c_index, "ruwac_document_id")