logging.info("Initializing lexicon.")
lexicon = DictLexicon(input_path)
lexicon.load()


logging.info("Opening index.")
index = InvertedIndex(input_path)
index.open()


logging.info("Initializing searcher.")
searcher = Searcher(index, "sentence_id")

logging.info("Initializing storage.")
storage = LdbStorage(input_path)
storage.open_db()

if context_input is not None:

    logging.info("Initializing context lexicon.")
    c_lexicon = DictLexicon(context_input)
    c_lexicon.load()

    logging.info("Opening context index.")
    c_index = InvertedIndex(context_input)
    c_index.open()

    logging.info("Initializing context searcher.")
    if arguments.language == "rus":
        c_searcher = Searcher(c_index, "ruwac_document_id")
logging.info("Output: %s" % output_path)


if os.path.exists(output_path):
    shutil.rmtree(output_path)
os.makedirs(output_path)



logging.info("Initializing lexicon.")
lexicon = DictLexicon(output_path)
lexicon.load()


logging.info("Initializing storage.")
storage = LdbStorage(output_path)
storage.init_db()
storage.open_db()


logging.info("Initializing index.")
index = InvertedIndex(output_path, field_properties=[
    ("document_id", numpy.int32),
])
index.init_index()
index.open()


logging.info("Initializing ruwac stream and its parser.")

if arguments.language == "spa" or arguments.language == "eng":
    input_path = arguments.input
    output_path = arguments.output

logging.info("Input: %s" % input_path)
logging.info("Output: %s" % output_path)

if os.path.exists(output_path):
    shutil.rmtree(output_path)
os.makedirs(output_path)

logging.info("Initializing lexicon.")
lexicon = DictLexicon(output_path)

logging.info("Initializing storage.")
storage = LdbStorage(output_path)
storage.init_db()
storage.open_db()

logging.info("Initializing index.")
index = InvertedIndex(output_path,
                      field_properties=[
                          ("sentence_id", numpy.int32),
                      ])
index.init_index()
index.open()

logging.info("Initializing sentence stream and its parser.")
sentence_stream = LFSentenceStream(input_path, language=arguments.language)
sentence_parser = LFSentenceParser()
sentence_indexer = LFSentenceIndexer(lexicon)
Exemple #4
0
logging.info("Query: %s" % query_paths)
logging.info("Output: %s" % output_path)

logging.info("Initializing lexicon.")
lexicon = DictLexicon(input_path)
lexicon.load()

logging.info("Opening index.")
index = InvertedIndex(input_path)
index.open()

logging.info("Initializing searcher.")
searcher = Searcher(index, "sentence_id")

logging.info("Initializing storage.")
storage = LdbStorage(input_path)
storage.open_db()

if context_input is not None:

    logging.info("Initializing context lexicon.")
    c_lexicon = DictLexicon(context_input)
    c_lexicon.load()

    logging.info("Opening context index.")
    c_index = InvertedIndex(context_input)
    c_index.open()

    logging.info("Initializing context searcher.")
    if arguments.language == "rus":
        c_searcher = Searcher(c_index, "ruwac_document_id")