Example #1
def handle_files(file_list_ref, documents_dictionary_ref):
    terms_dictionary = {}

    if stem_mode:
        # This code take a document's text from the list and parsing & stemming the text
        for value in file_list_ref:
            doc_id = value[0]
            file_name = value[2]
            after_stemming = Stemmer.stemWithCache(Parser.start(value[1]))
            # This function update the document parameters
            __update_and_merge_dictionaries(doc_id, file_name,
            # This function merge all the dictionary in loop and create dictionary for the whole part
        # This code take a document's text from the list and only parsing the text
        for value in file_list_ref:
            doc_id = value[0]
            file_name = value[2]
            after_parse = Parser.start(value[1])
            # This function update the document parameters
            __update_and_merge_dictionaries(doc_id, file_name,
            # This function merge all the dictionary in loop and create dictionary for the whole part

    # This function create new temp posting file for each part
Example #2
 def __parse_stem(self, text):
     if self.stem_mode:  # stem mode is True
         after_dictionary = Stemmer.stemWithCache(Parser.start(text))
     else:  # stem mode is False
         after_dictionary = Parser.start(text)
     return after_dictionary
Example #3
    def find(self, query, stem_mode):

        total_value = 0
        for x in range(0, 5):  # We want to use this value for the weights
            total_value += x

        sum_of_df = 0
        wiki_wiki = wikipediaapi.Wikipedia(
            'en')  # Which language we want to search the term for
        page_py = wiki_wiki.page(query)  # Define the query in the file

        query_dictionary = {}  # The dictionary we will return to the user
        if page_py.exists():
            line = page_py.summary  # Here we collect the summary about the page in wiki
            if len(
            ) < 300:  # If we wiki didn't return a specific term, we ask for the sections
                line = print_sections(page_py.sections)
            if stem_mode:
                stop_set = {'disambigu'}  # Popular words we want to avoid
                query_after = Stemmer.stemWithCache(Parser.start(query))
                terms_dictionary = Stemmer.stemWithCache(Parser.start(line))
                stop_set = {'Disambiguation'}  # Popular words we want to avoid
                query_after = Parser.start(query)
                terms_dictionary = Parser.start(line)

            concept = {}
            links = page_py.links  # Here we collect the links from the page in wiki
            for title in sorted(links.keys()):
                if stem_mode:
                    term = Stemmer.stemWithCache(
                    term = Parser.start(links[title].title)

                for t, value in term.items(
                ):  # For each term in summary dictionary, we need to check the values
                    if links[title].ns == 0 and t in terms_dictionary and \
                            t not in query_after and t not in stop_set:
                        if t not in concept:
                            concept[t] = value
                                t] += value  # we want to add the value (the df to the dictionary)

            # Here we ask only for most common query results
            query_dictionary = dict(
            for term, value in query_dictionary.items():
                sum_of_df += value

            for term, value in query_dictionary.items():
                positive_value = int(total_value * value / sum_of_df) + 1
                if positive_value == 0:
                    positive_value = 1
                query_dictionary[term] = positive_value
            if len(query_after) is not 0:
                query = list(query_after.keys())[0]
            print("Invalid query")

        query_dictionary[query] = number_of_results
        return query_dictionary