Python get_inverted_index_keys 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: search_engine.common_tools

메소드/함수: get_inverted_index_keys

hotexamples.com에서의 예제들: 5

Python get_inverted_index_keys - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 search_engine.common_tools.get_inverted_index_keys에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def get_tf_idf_documents_ntc(documents_ids, query_words, centroid=[]):
    """
    This function will calculate tf_idf based on lnc method for documents
    :param centroid:
    :param documents_ids: union of the documents which contains any words of the given query
    :param query_words: the words in the given query
    :return: a list of tuples, each tuple contains document id and a dictionary which contains term and tf_idf based on lnc
    """
    document_term_tf_idf = []
    length_inverted_index = len(wl.inverted_index)
    for doc_id in documents_ids:
        doc_id = int(doc_id)
        doc_dic = {}
        for word in query_words:
            if word in wl.documents_terms_frequency[doc_id]:
                tf = wl.documents_terms_frequency[doc_id].get(word)
                idf = get_df_word(word)
                idf = math.log(length_inverted_index / idf, 10)
                doc_dic[word] = tf * idf
            elif len(centroid) != 0:
                doc_dic[word] = centroid[
                    c_tools.get_inverted_index_keys().index(word)]

        document_term_tf_idf.append([doc_id, doc_dic])
    return document_term_tf_idf

예제 #2

파일 보기

파일: clustering.py 프로젝트: amoazeni75/IR-search-engine

def calculate_cosine_similarity_cluster_query(query_dict, centroid):
    numerator = 0
    query_vector_length = rank_tools.calculate_vector_length(list(query_dict.values()))
    for k, v in query_dict.items():
        index = c_tools.get_inverted_index_keys().index(k)
        numerator += v * centroid['centroid'][index]

    denumerator = centroid['length'] * query_vector_length
    if denumerator == 0:
        return 0
    else:
        return numerator / denumerator

예제 #3

파일 보기

def prepare_frequency_vector(term_freq_dictionary, type_cal='None'):
    vector = [0] * config.dictionary_size
    for k, v in term_freq_dictionary.items():
        index = c_tools.get_inverted_index_keys().index(k)
        if type_cal == 'None':
            vector[index] = v
        elif type_cal == 'Log':
            if v == 0:
                vector[index] = 0
            else:
                vector[index] = 1 + math.log(v, 10)
    return vector

예제 #4

파일 보기

def prepare_one_document_vector(words):
    """
    This function will create a vector which each elements in it is 0 (the document does not have the element corresponding
     to its index in dictionary) or non-zero (the document has the element corresponding to its index in the dictionary)
    :param words: the list of words, which the document has them
    :return: a vector that its length is equal to dictionary size
    """
    vector = [0] * config.dictionary_size
    # vector = np.zeros(config.dictionary_size)
    for key in words:
        index = c_tools.get_inverted_index_keys().index(key)
        tf = 1 + math.log(words[key], 10)
        vector[index] = tf

    return vector

예제 #5

파일 보기

def get_tf_idf_documents_ltn(documents_ids, query_words, centroid=[]):
    document_term_tf_idf = []
    length_inverted_index = len(wl.inverted_index)
    for doc_id in documents_ids:
        doc_id = int(doc_id)
        doc_dic = {}
        for word in query_words:
            if word in wl.documents_terms_frequency[doc_id]:
                tf = 1 + math.log(
                    wl.documents_terms_frequency[doc_id].get(word), 10)
                idf = get_df_word(word)
                idf = math.log(length_inverted_index / idf, 10)
                doc_dic[word] = tf * idf
            elif len(centroid) != 0:
                doc_dic[word] = centroid[
                    c_tools.get_inverted_index_keys().index(word)]

        document_term_tf_idf.append([doc_id, doc_dic])
    return document_term_tf_idf