Python Stemmer.stemWithCache Examples

Programming Language: Python

Namespace/Package Name: Model.Stemmer

Class/Type: Stemmer

Method/Function: stemWithCache

Examples at hotexamples.com: 3

Python Stemmer.stemWithCache - 3 examples found. These are the top rated real world Python examples of Model.Stemmer.Stemmer.stemWithCache extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

stemWithCache(3)

reset(2)

stemming(2)

Stemmer(1)

_stem(1)

clean_cache(1)

get_dictionary(1)

get_dictionary_value(1)

get_dictionary_without_stemming(1)

save_stemmed_vocabulary(1)

Example #1

Show file

File: Control.py Project: nevoit/NT-Search-Engine

def handle_files(file_list_ref, documents_dictionary_ref):
    terms_dictionary = {}

    if stem_mode:
        # This code take a document's text from the list and parsing & stemming the text
        for value in file_list_ref:
            doc_id = value[0]
            file_name = value[2]
            after_stemming = Stemmer.stemWithCache(Parser.start(value[1]))
            # This function update the document parameters
            __update_and_merge_dictionaries(doc_id, file_name,
                                            terms_dictionary,
                                            documents_dictionary_ref,
                                            after_stemming)
            # This function merge all the dictionary in loop and create dictionary for the whole part
    else:
        # This code take a document's text from the list and only parsing the text
        for value in file_list_ref:
            doc_id = value[0]
            file_name = value[2]
            after_parse = Parser.start(value[1])
            # This function update the document parameters
            __update_and_merge_dictionaries(doc_id, file_name,
                                            terms_dictionary,
                                            documents_dictionary_ref,
                                            after_parse)
            # This function merge all the dictionary in loop and create dictionary for the whole part

    # This function create new temp posting file for each part
    Indexer.create_temp_posting_file(terms_dictionary)

Example #2

Show file

File: Searcher.py Project: nevoit/NT-Search-Engine

 def __parse_stem(self, text):
     if self.stem_mode:  # stem mode is True
         after_dictionary = Stemmer.stemWithCache(Parser.start(text))
     else:  # stem mode is False
         after_dictionary = Parser.start(text)
     return after_dictionary

Example #3

Show file

File: Wikipedia.py Project: nevoit/NT-Search-Engine

    def find(self, query, stem_mode):

        total_value = 0
        for x in range(0, 5):  # We want to use this value for the weights
            total_value += x

        sum_of_df = 0
        wiki_wiki = wikipediaapi.Wikipedia(
            'en')  # Which language we want to search the term for
        page_py = wiki_wiki.page(query)  # Define the query in the file

        query_dictionary = {}  # The dictionary we will return to the user
        if page_py.exists():
            line = page_py.summary  # Here we collect the summary about the page in wiki
            if len(
                    line
            ) < 300:  # If we wiki didn't return a specific term, we ask for the sections
                line = print_sections(page_py.sections)
            if stem_mode:
                stop_set = {'disambigu'}  # Popular words we want to avoid
                query_after = Stemmer.stemWithCache(Parser.start(query))
                terms_dictionary = Stemmer.stemWithCache(Parser.start(line))
            else:
                stop_set = {'Disambiguation'}  # Popular words we want to avoid
                query_after = Parser.start(query)
                terms_dictionary = Parser.start(line)

            concept = {}
            links = page_py.links  # Here we collect the links from the page in wiki
            for title in sorted(links.keys()):
                if stem_mode:
                    term = Stemmer.stemWithCache(
                        Parser.start(links[title].title))
                else:
                    term = Parser.start(links[title].title)

                for t, value in term.items(
                ):  # For each term in summary dictionary, we need to check the values
                    if links[title].ns == 0 and t in terms_dictionary and \
                            t not in query_after and t not in stop_set:
                        if t not in concept:
                            concept[t] = value
                        else:
                            concept[
                                t] += value  # we want to add the value (the df to the dictionary)

            # Here we ask only for most common query results
            query_dictionary = dict(
                Counter(concept).most_common(number_of_results))
            for term, value in query_dictionary.items():
                sum_of_df += value

            for term, value in query_dictionary.items():
                positive_value = int(total_value * value / sum_of_df) + 1
                if positive_value == 0:
                    positive_value = 1
                query_dictionary[term] = positive_value
            if len(query_after) is not 0:
                query = list(query_after.keys())[0]
        else:
            print("Invalid query")

        query_dictionary[query] = number_of_results
        return query_dictionary