Esempi in Python per Query.get_vocabulary

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: models.query

Classe/tipologia: Query

Metodo/funzione: get_vocabulary

Esempi su hotexamples.com: 4

Query.get_vocabulary in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per models.query.Query.get_vocabulary, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Query(24)

execute(9)

build(9)

exist_table(9)

values_to_update(7)

get_vocabulary(4)

get_by_id(3)

name(2)

cache_validate(2)

log_key(2)

user(2)

fb_user_id(2)

session_id(2)

id(2)

put(2)

gql(2)

parent_id(1)

properties(1)

select(1)

referrer(1)

remark(1)

save(1)

save_new(1)

order_type(1)

sql(1)

template_name(1)

title(1)

url(1)

other_sql(1)

latest_rev(1)

order(1)

get_max_datapoint_count(1)

by_country(1)

by_location(1)

by_region(1)

by_title(1)

description(1)

field_config(1)

get_ms_interval(1)

on_calendar(1)

get_targets(1)

get_term_frequency(1)

get_tf(1)

get_unix_range(1)

group(1)

ip_address(1)

log_type(1)

where(1)

Esempio n. 1

Mostra file

File: search_engine.py Progetto: qchuchu/UnsupervisedQuestionAnswering

 def __get_doc_scores(self, posting_list, query: Query):
     click.secho("[Search Engine] Computing search scores ...",
                 fg="bright_blue")
     query_tf_idf = {}
     norm_query_vector = 0
     query_vocabulary = query.get_vocabulary()
     for token in query_vocabulary:
         tf_idf = query.get_tf(token) * self.collection.get_idf(token)
         query_tf_idf[token] = tf_idf
         norm_query_vector += tf_idf**2
     norm_query_vector = sqrt(norm_query_vector)
     doc_scores = {}
     for doc_id in posting_list:
         score = 0
         for token in query_vocabulary:
             if self.weighting_model == "tw-idf":
                 weight = self.collection.get_tw_idf(target_term=token,
                                                     target_doc_id=doc_id,
                                                     b=0.003)
             elif self.weighting_model == "tf-idf":
                 weight = self.collection.get_piv_plus(target_term=token,
                                                       target_doc_id=doc_id,
                                                       b=0.2)
             else:
                 weight = self.collection.get_bm25_plus(
                     target_term=token,
                     target_doc_id=doc_id,
                     b=0.75,
                     k1=1.2)
             score += query_tf_idf[token] * weight
         score /= self.collection.documents_norms[doc_id] * norm_query_vector
         doc_scores[doc_id] = score
     return doc_scores

Esempio n. 2

Mostra file

File: search_engine.py Progetto: qchuchu/UnsupervisedQuestionAnswering

 def __get_posting_list(self, query: Query):
     final_posting_list = []
     vocabulary = query.get_vocabulary()
     for token in vocabulary:
         if not final_posting_list:
             final_posting_list = self.collection.get_posting_list(token)
         else:
             posting_list = self.collection.get_posting_list(token)
             final_posting_list = merge_or_postings_list(
                 final_posting_list, posting_list)
     return final_posting_list

Esempio n. 3

Mostra file

    def get_list_of_documents(self, query: Query):
        """Return documents where the words of the query appear"""
        target_documents_list = []
        # Get words of query
        vocabulary = query.get_vocabulary()
        for word in vocabulary:
            if not target_documents_list:
                target_documents_list = self.collection.get_documents_containing_term(word)
                print(
                    f"[Search Engine] the word {word} is present in {len(target_documents_list)} items"
                    )

            else:
                documents_list = self.collection.get_documents_containing_term(word)
                print(
                    f"[Search Engine] the word {word} is present in {len(documents_list)} items"
                    )
                print("Merge ...")
                # merge the two lists and order the final list
                target_documents_list = sorted(list(set(target_documents_list) | set(documents_list)))
        return target_documents_list

Esempio n. 4

Mostra file

 def compute_scores(self, list_of_docs, query: Query):
     """ Scores each document, depending of the tokens it contains."""
     print("Search Engine is computing search scores ...")
     query_tf_idf = {}
     vocab_query = query.get_vocabulary()
     # get the tf_idf for words in the query
     for word in vocab_query:
         tf_idf = query.get_term_frequency(word) * self.collection.compute_idf(word)
         query_tf_idf[word] = tf_idf
     # score the documents which contain the words
     doc_scores = {}
     for doc_id in list_of_docs:
         score = 0
         for word in vocab_query:
             normalized_tf = self.collection.log_normalization(
                 term=word, id_document=doc_id
             )
             if normalized_tf == 0:  # the word is not in the document
                 doc_tf_idf = 0
             else:
                 doc_tf_idf = normalized_tf * self.collection.compute_idf(word) #tf-idf for the word in the document
             score += query_tf_idf[word] * doc_tf_idf
         doc_scores[doc_id] = score
     return doc_scores