Exemple #1
0
def query_weight(query_string, dicts, idx_file, N):
    ''' Calculate query's weight
    '''
    # term's index
    term_index = {}
    # query's parameter table
    query_table = {} 
    # docs set for merge document
    docs_set = set()

    for term in query_string:
        if term in dicts:
            InvertedIndex.add_word_postlist(term, seek_inverted_index_file(idx_file, dicts[term], term))
            # term_index[term] = seek_inverted_index_file(idx_file, dicts[term], term)
            # add doc# to set
            for doc in InvertedIndex.get_posting_list(term).docitemmap:
                docs_set.add(int(doc))

            query_table[term] = {}
            query_table[term]["tf"] = 1
            query_table[term]["df"] = InvertedIndex.get_posting_list(term).df  
            query_table[term]["idf"] = math.log(N / query_table[term]["df"], 10)
            query_table[term]["w"] = (1 + math.log(query_table[term]["tf"])) * query_table[term]["idf"]
        else:
            InvertedIndex.add_word_postlist(term, PostingList())
            query_table[term] = {}
            query_table[term]["tf"] = 1
            query_table[term]["df"] = 0
            query_table[term]["idf"] = 0
            query_table[term]["w"] = 0
   
    return (query_table, docs_set, InvertedIndex.InvertedIndex())
Exemple #2
0
def query_weight(query_string, dicts, idx_file):
    ''' Calculate query's weight
    '''
    # term's index
    term_index = {}
    # query's parameter table
    query_table = {}
    
    # docs set for merge document
    docs_set = set()

    for term in query_string:
        if term in dicts:
            term_index[term] = seek_inverted_index_file(idx_file, dicts[term], term)
            print term_index[term]

            # add doc# to set
            for doc in term_index[term].docitemmap:
                docs_set.add(int(doc))
            query_table[term] = {}
            query_table[term]["tf"] = 1
            query_table[term]["df"] = term_index[term].df
            
            query_table[term]["idf"] = math.log(N / query_table[term]["df"], 10)
            query_table[term]["w"] = (1 + math.log(query_table[term]["tf"])) * query_table[term]["idf"]
        else:
            term_index[term] = {}
            query_table[term] = {}
            query_table[term]["tf"] = 1
            query_table[term]["df"] = 0
            query_table[term]["idf"] = 0
            query_table[term]["w"] = 0
    return (query_table, docs_set, term_index)
Exemple #3
0
def query_weight(query_string, dicts, idx_file):
    ''' Calculate query's weight
    '''
    # term's index
    term_index = {}
    # query's parameter table
    query_table = {}

    # docs set for merge document
    docs_set = set()

    for term in query_string:
        if term in dicts:
            term_index[term] = seek_inverted_index_file(
                idx_file, dicts[term], term)
            print term_index[term]

            # add doc# to set
            for doc in term_index[term].docitemmap:
                docs_set.add(int(doc))
            query_table[term] = {}
            query_table[term]["tf"] = 1
            query_table[term]["df"] = term_index[term].df

            query_table[term]["idf"] = math.log(N / query_table[term]["df"],
                                                10)
            query_table[term]["w"] = (1 + math.log(
                query_table[term]["tf"])) * query_table[term]["idf"]
        else:
            term_index[term] = {}
            query_table[term] = {}
            query_table[term]["tf"] = 1
            query_table[term]["df"] = 0
            query_table[term]["idf"] = 0
            query_table[term]["w"] = 0
    return (query_table, docs_set, term_index)