Esempi in Python per Cleaner.get_clean

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: Cleaner

Classe/tipologia: Cleaner

Metodo/funzione: get_clean

Esempi su hotexamples.com: 1

Cleaner.get_clean in Python: 1 esempio trovato. Questo è il miglior esempio reale in Python per Cleaner.Cleaner.get_clean, estratto da progetti open source. Lo puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Cleaner(30)

clean_bmi(6)

Clean_Birthday(5)

Clean_Age(4)

clean_text(4)

clean_gender(3)

clean(3)

preprocess_text(2)

n_gram(2)

text_header_remover(2)

clean_file(2)

clean_empid(2)

__init__(2)

stop(1)

run(1)

replace(1)

remove_punct(1)

remove_non_marked(1)

remove_nan(1)

remove_multiple_method_comments(1)

preprocess_danmu(1)

case_fold(1)

get_df(1)

get_data_category_count(1)

get_clean(1)

getDF(1)

cleanSubtitles(1)

extractDate(1)

edit_bulk_comments(1)

delete_tags(1)

clean_df(1)

getContent(1)

Esempio n. 1

Mostra file

class InvertedIndexManager(object):
    def __init__(self):
        self.invert_index_model = InvertedIndexModel()
        self.cleaner = Cleaner()
        self.last_doc_id = 0

    def __read_doc(self, doc_name=""):
        lines = list()
        with open(doc_name, 'r') as doc:
            lines = doc.read().splitlines()
        return lines

    def __get_tokens(self, line=""):
        return nltk.word_tokenize(line)

    def __get_token_frequency(self, lines=[]):
        term_frequency = dict()

        for line in lines:
            token_list = self.__get_tokens(line=line)
            token_list = self.cleaner.get_clean(p_token_list=token_list)
            for token in token_list:
                if token not in term_frequency:
                    term_frequency[token] = 0
                term_frequency[token] = term_frequency[token] + 1
        return term_frequency

    def process_doc(self, doc_name=""):
        self.last_doc_id = self.last_doc_id + 1

        lines = self.__read_doc(doc_name=doc_name)
        term_frequency = self.__get_token_frequency(lines=lines)

        for term in term_frequency:
            frequency = term_frequency[term]
            self.invert_index_model.add_posting(term=term,
                                                doc_id=self.last_doc_id,
                                                tf_score=frequency)
        self.invert_index_model.update_idf()

    def process_query(self, query=""):
        term_list = self.__get_tokens(line=query)
        inverted_index = self.invert_index_model.get_inverted_index()
        docs = dict()

        for term in term_list:
            if term not in inverted_index:
                continue

            index_node = inverted_index[term]
            idf_score = index_node.get_idf()
            posting_list = index_node.get_posting_list()

            for posting in posting_list.get_list():
                doc = posting.doc_id
                tf_score = posting.tf_socre
                if doc not in docs:
                    docs[doc] = 0
                docs[doc] = docs[doc] + float(tf_score) * float(idf_score)
        return docs

    def print_index(self):
        print(self.invert_index_model.get_string())