コード例 #1
0
 def build(cls):
     command_print("Build started")
     cls.crawler = WebCrawler()
     # run method for index
     cls.crawler.scrape_index_pages()
     # run method for all country pages
     cls.crawler.scrape_country_pages()
     # run method for all continent pages
     cls.crawler.scrape_continent_pages()
     # create the index from memory
     cls.crawler.create_index_file()
     command_print("Build completed")
コード例 #2
0
 def print_or_find(cls, search_query, is_find_command):
     if cls.inverted_index:
         result_dict = cls.inverted_index.get_documents(search_query, is_find_command)
         if not result_dict:
             command_print("No result found against query '{}'".format(search_query))
         for document_url, count in result_dict.items():
             command_print(document_url, count)
     else:
         command_print("Inverted Index is not loaded.")
コード例 #3
0
    def get_documents(self, search_query, is_find_command):
        """
        Return documents related to words in search query
        :param is_find_command:
        :param search_query:
        :return: 
        """
        result_documents = {}
        final_keys_set = None
        result_document_counter = Counter({})
        query_words = search_query.split(" ")
        max_query_words = 1
        if is_find_command:
            max_query_words = MAX_FIND_QUERY_WORDS
        if len(query_words) > max_query_words:
            command_print("Search query can't have more than {} words".format(max_query_words))
        else:
            # Iterating each query word one by one and getting its documents and count dictionary
            for query_word in query_words:
                if query_word:
                    current_word_documents = self.inverted_index_dict.get(query_word, {})
                    result_copy_dict = current_word_documents.copy()
                    
                    if not final_keys_set:
                        final_keys_set = set(result_copy_dict.keys())
                    else:
                        final_keys_set = final_keys_set & set(result_copy_dict.keys())
                    result_document_counter += Counter(result_copy_dict)

            if final_keys_set:
                summed_counter_dict = dict(result_document_counter)
                result_documents = {key: summed_counter_dict[key] for key in final_keys_set if
                                    key in summed_counter_dict}
                if is_find_command:
                    result_documents = dict(sorted(result_documents.items(), key=operator.itemgetter(1), reverse=True))
        return result_documents
コード例 #4
0
 def __new__(cls):
     if not os.path.isfile(INVERTED_INDEX_FILE_NAME):
         command_print(INVERTED_INDEX_FILE_NAME, "File doesn't exist. Please Use build command first")
     else:
         return super(InvertedIndex, cls).__new__(cls)
コード例 #5
0
 def load(cls):
     cls.inverted_index = InvertedIndex()
     if cls.inverted_index:
         command_print("Inverted index loaded")
コード例 #6
0
        else:
            command_print("Inverted Index is not loaded.")


if __name__ == '__main__':
    command = ""
    main_obj = Main()
    print("Following commands are valid available commands:")
    print("1) build")
    print("2) load")
    print("3) find")
    print("4) print")
    print("5) quit")
    while True:
        command = input("> ")
        if command.strip().lower() == "quit":
            break
        elif command.startswith("build"):
            main_obj.build()
        elif command.startswith("load"):
            main_obj.load()
        # print and find used the same command so we find out if
        # the user entered find or print
        elif command.startswith("find "):
            main_obj.print_or_find(command.split("find ", 1)[1], True)
        elif command.startswith("print "):
            main_obj.print_or_find(command.split("print ", 1)[1], False)
        else:
            command_print("Invalid Command:", command)
        command = ""