class TestMySearchEngine(unittest.TestCase):
    def setUp(self):
        self.x = SearchEngine("database")
        self.x.database.update(idealdict)

    def test_searchengine_type(self):
        result = self.x.search("round")
        self.assertIsInstance(result, dict)

    def test_MyError_type_number(self):
        with self.assertRaises(ValueError):
            self.x.search(15)

    def test_empty_string(self):
        result = self.x.search('')
        self.assertIsInstance(result, dict)
        self.assertEqual(result, {})

    def test_search_by_token(self):
        result = self.x.search('The')
        self.assertIsInstance(result, dict)
        self.assertEqual(result, idealdict['The'])

    def tearDown(self):
        del self.x
        for f in os.listdir('.'):
            if f.startswith('database.'):
                os.remove(f)
Ejemplo n.º 2
0
for x, y in opts:
    if x == '-d':
        dictionary_file = y
    elif x == '-p':
        postings_file = y
    elif x == '-q':
        query_file = y
    elif x == '-o':
        results_file = y
    else:
        raise AssertionError('unhandled option')

if dictionary_file == None or postings_file == None or query_file == None or results_file == None:
    print(f'usage: {sys.argv[0]} -d dictionary-file -p postings-file -q file-of-queries -o output-file-of-results')
    sys.exit(2)

document_file = 'document.txt'
dictionary = load_dictionary(dictionary_file)
documents = load_documents(document_file)
search_engine = SearchEngine(dictionary, documents, postings_file)
query, relevant_doc_ids = read_query(query_file)

with open(results_file, 'w') as f:
    f.seek(0)
    try:
        result = search_engine.search(query, relevant_doc_ids)
        f.write(' '.join([str(i) for i in result]) + '\n')
    except ParseError as e:
        f.write(f'parse error encountered: {e}')

Ejemplo n.º 3
0
from searchengine import SearchEngine
from tokenstore import TokenStore


# Todo: reformat command line interface
def command_line(arg=None):
    while True:
        raw_query = input("Please type in what do you want to search: ").lower()

        if " --" in raw_query and not arg:
            raw_query, arg = raw_query.split(" --")

        query = re.sub("[^A-Za-z0-9 ]+", "", raw_query)

        if len(query) > 32:
            print("the query is too long, please shorten the search word")
        else:
            return query.split(), arg


if __name__ == '__main__':
    store = TokenStore()
    search_engine = SearchEngine(store)

    signal.signal(signal.SIGINT, lambda _s, _f: exit(0))

    while True:
        search_queries, arg = command_line()
        search_engine.search(*[search_queries, False] if arg == "all" else [search_queries])