parser = argparse.ArgumentParser(
    description='Run all queries on the inverted index.')
parser.add_argument(
    '--new',
    default=True,
    help='If True then build a new index from scratch. If False then attempt to'
    ' reuse existing index')
parser.add_argument(
    '--sim',
    default='BM25',
    help='The type of similarity to use. Should be "TF" or "TFIDF" or "BM25"')
args = parser.parse_args()

index = InvertedIndex(Preprocessor())
index.index_directory(os.path.join('gov', 'documents'),
                      use_stored_index=(not args.new))

sim_name_to_class = {
    'TF': TF_Similarity,
    'TFIDF': TFIDF_Similarity,
    'BM25': BM25_Similarity
}

sim = sim_name_to_class[args.sim]
index.set_similarity(sim)
print(f'Setting similarity to {sim.__name__}')

print()
print('Index ready.')

topics_file = os.path.join('gov', 'topics', 'gov.topics')
Ejemplo n.º 2
0
import argparse
import os

from inverted_index import InvertedIndex
from preprocessor import Preprocessor
from similarity_measures import TF_Similarity, TFIDF_Similarity, BM25_Similarity

parser = argparse.ArgumentParser(description='Run all queries on the inverted index.')
parser.add_argument('--new', default=True, help='If True then build a new index from scratch. If False then attempt to'
                                                ' reuse existing index')
parser.add_argument('--sim', default='BM25', help='The type of similarity to use. Should be "TF" or "TFIDF" or "BM25')
args = parser.parse_args()

index = InvertedIndex(Preprocessor())
index.index_directory(os.path.join('gov', 'documents'), use_stored_index=True)

sim_name_to_class = {'TF': TF_Similarity,
                     'TFIDF': TFIDF_Similarity,
                     'BM25': BM25_Similarity}

sim = sim_name_to_class[args.sim]
index.set_similarity(sim)
print(f'Setting similarity to {sim.__name__}')

print()
print('Index ready.')


topics_file = os.path.join('gov', 'topics', 'gov.topics')
runs_file = os.path.join('runs', 'retrieved.runs')