field_properties=[ ("sentence_id", numpy.int32), ]) index.init_index() index.open() logging.info("Initializing sentence stream and its parser.") sentence_stream = LFSentenceStream(input_path, language=arguments.language) sentence_parser = LFSentenceParser() sentence_indexer = LFSentenceIndexer(lexicon) logging.info("Initializing indexing pipeline.") indexing_pipeline = IndexingPipeline(lexicon, index, storage) logging.info("Start indexing file: %s" % input_path) input_mb_size = float(os.path.getsize(input_path)) / (1024**2) logging.info("Input size: %.2fMB" % input_mb_size) indexing_pipeline.index_stream(sentence_stream, sentence_parser, sentence_indexer) logging.info("Closing index.") index.close() logging.info("Closing storage.") storage.close_db() logging.info("Dumping lexicon.") lexicon.dump() logging.info("No way, it's done!")
sentence_parser = RuwacParser() else: raise Exception("Unsupported language: %s" % arguments.language) sentence_indexer = RuwacIndexer(lexicon) logging.info("Initializing indexing pipeline.") indexing_pipeline = IndexingPipeline(lexicon, index, storage) logging.info("Start indexing file: %s" % input_path) input_mb_size = float(os.path.getsize(input_path)) / (1024 ** 2) logging.info("Input size: %.2fMB" % input_mb_size) indexing_pipeline.index_stream(sentence_stream, sentence_parser, sentence_indexer) logging.info("Closing index.") index.close() logging.info("Closing storage.") storage.close_db() logging.info("Dumping lexicon.") lexicon.dump() logging.info("No way, it's done!")