"sentence") else: input_path = arguments.input output_path = arguments.output logging.info("Input: %s" % input_path) logging.info("Output: %s" % output_path) if os.path.exists(output_path): shutil.rmtree(output_path) os.makedirs(output_path) logging.info("Initializing lexicon.") lexicon = DictLexicon(output_path) logging.info("Initializing storage.") storage = LdbStorage(output_path) storage.init_db() storage.open_db() logging.info("Initializing index.") index = InvertedIndex(output_path, field_properties=[ ("sentence_id", numpy.int32), ]) index.init_index() index.open() logging.info("Initializing sentence stream and its parser.")
"document" ) else: input_path = arguments.input output_path = arguments.output query_paths = arguments.query context_input = arguments.context_input logging.info("Input: %s" % input_path) logging.info("Context: %s" % context_input) logging.info("Query: %s" % query_paths) logging.info("Output: %s" % output_path) logging.info("Initializing lexicon.") lexicon = DictLexicon(input_path) lexicon.load() logging.info("Opening index.") index = InvertedIndex(input_path) index.open() logging.info("Initializing searcher.") searcher = Searcher(index, "sentence_id") logging.info("Initializing storage.") storage = LdbStorage(input_path) storage.open_db()
input_path = arguments.input output_path = arguments.output logging.info("Input: %s" % input_path) logging.info("Output: %s" % output_path) if os.path.exists(output_path): shutil.rmtree(output_path) os.makedirs(output_path) logging.info("Initializing lexicon.") lexicon = DictLexicon(output_path) lexicon.load() logging.info("Initializing storage.") storage = LdbStorage(output_path) storage.init_db() storage.open_db() logging.info("Initializing index.") index = InvertedIndex(output_path, field_properties=[ ("document_id", numpy.int32), ]) index.init_index() index.open()
input_path = os.path.join( arguments.input, "test_out", arguments.test_size, "sentence" ) else: input_path = arguments.input logging.info("Input: %s" % input_path) logging.info("Initializing lexicon.") lexicon = DictLexicon(input_path) lexicon.load() counter = collections.Counter() for term, term_id_and_freq in lexicon.term_dict.iteritems(): counter[term] = term_id_and_freq[1] i = 0 sys.stdout.write("i,term,freq\n") for term, freq in counter.most_common(): i += 1 sys.stdout.write("%d,%s,%d\n" % (i, term, freq)) logging.info("[DONE]")
input_path = arguments.input output_path = arguments.output logging.info("Input: %s" % input_path) logging.info("Output: %s" % output_path) if os.path.exists(output_path): shutil.rmtree(output_path) os.makedirs(output_path) logging.info("Initializing lexicon.") lexicon = DictLexicon(output_path) logging.info("Initializing storage.") storage = LdbStorage(output_path) storage.init_db() storage.open_db() logging.info("Initializing index.") index = InvertedIndex(output_path, field_properties=[ ("sentence_id", numpy.int32), ]) index.init_index() index.open()
context_input = os.path.join(arguments.output, "test_out", arguments.test_size, arguments.language, "document") else: input_path = arguments.input output_path = arguments.output query_paths = arguments.query context_input = arguments.context_input logging.info("Input: %s" % input_path) logging.info("Context: %s" % context_input) logging.info("Query: %s" % query_paths) logging.info("Output: %s" % output_path) logging.info("Initializing lexicon.") lexicon = DictLexicon(input_path) lexicon.load() logging.info("Opening index.") index = InvertedIndex(input_path) index.open() logging.info("Initializing searcher.") searcher = Searcher(index, "sentence_id") logging.info("Initializing storage.") storage = LdbStorage(input_path) storage.open_db() if context_input is not None:
choices=("tiny", "medium", "large"), default="tiny") arg_parser.add_argument("-i", "--input", type=str) arguments = arg_parser.parse_args() if arguments.test == 1: input_path = os.path.join(arguments.input, "test_out", arguments.test_size, "sentence") else: input_path = arguments.input logging.info("Input: %s" % input_path) logging.info("Initializing lexicon.") lexicon = DictLexicon(input_path) lexicon.load() counter = collections.Counter() for term, term_id_and_freq in lexicon.term_dict.iteritems(): counter[term] = term_id_and_freq[1] i = 0 sys.stdout.write("i,term,freq\n") for term, freq in counter.most_common(): i += 1 sys.stdout.write("%d,%s,%d\n" % (i, term, freq)) logging.info("[DONE]")