def query_weight(query_string, dicts, idx_file, N): ''' Calculate query's weight ''' # term's index term_index = {} # query's parameter table query_table = {} # docs set for merge document docs_set = set() for term in query_string: if term in dicts: InvertedIndex.add_word_postlist(term, seek_inverted_index_file(idx_file, dicts[term], term)) # term_index[term] = seek_inverted_index_file(idx_file, dicts[term], term) # add doc# to set for doc in InvertedIndex.get_posting_list(term).docitemmap: docs_set.add(int(doc)) query_table[term] = {} query_table[term]["tf"] = 1 query_table[term]["df"] = InvertedIndex.get_posting_list(term).df query_table[term]["idf"] = math.log(N / query_table[term]["df"], 10) query_table[term]["w"] = (1 + math.log(query_table[term]["tf"])) * query_table[term]["idf"] else: InvertedIndex.add_word_postlist(term, PostingList()) query_table[term] = {} query_table[term]["tf"] = 1 query_table[term]["df"] = 0 query_table[term]["idf"] = 0 query_table[term]["w"] = 0 return (query_table, docs_set, InvertedIndex.InvertedIndex())
def query_weight(query_string, dicts, idx_file): ''' Calculate query's weight ''' # term's index term_index = {} # query's parameter table query_table = {} # docs set for merge document docs_set = set() for term in query_string: if term in dicts: term_index[term] = seek_inverted_index_file(idx_file, dicts[term], term) print term_index[term] # add doc# to set for doc in term_index[term].docitemmap: docs_set.add(int(doc)) query_table[term] = {} query_table[term]["tf"] = 1 query_table[term]["df"] = term_index[term].df query_table[term]["idf"] = math.log(N / query_table[term]["df"], 10) query_table[term]["w"] = (1 + math.log(query_table[term]["tf"])) * query_table[term]["idf"] else: term_index[term] = {} query_table[term] = {} query_table[term]["tf"] = 1 query_table[term]["df"] = 0 query_table[term]["idf"] = 0 query_table[term]["w"] = 0 return (query_table, docs_set, term_index)
def query_weight(query_string, dicts, idx_file): ''' Calculate query's weight ''' # term's index term_index = {} # query's parameter table query_table = {} # docs set for merge document docs_set = set() for term in query_string: if term in dicts: term_index[term] = seek_inverted_index_file( idx_file, dicts[term], term) print term_index[term] # add doc# to set for doc in term_index[term].docitemmap: docs_set.add(int(doc)) query_table[term] = {} query_table[term]["tf"] = 1 query_table[term]["df"] = term_index[term].df query_table[term]["idf"] = math.log(N / query_table[term]["df"], 10) query_table[term]["w"] = (1 + math.log( query_table[term]["tf"])) * query_table[term]["idf"] else: term_index[term] = {} query_table[term] = {} query_table[term]["tf"] = 1 query_table[term]["df"] = 0 query_table[term]["idf"] = 0 query_table[term]["w"] = 0 return (query_table, docs_set, term_index)