Python Reverse_indexの例

プログラミング言語: Python

名前空間/パッケージ名: reverse_index

クラス/型: Reverse_index

hotexamples.comのコード掲載数: 4

Python Reverse_index - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのreverse_index.Reverse_indexの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Reverse_index(2)

add_entry(2)

idf(2)

set_id_set(2)

get_all_words(1)

get_entry(1)

コード例 #1

ファイルを表示

    def create_with_ponderation_tf_idf(self, index, compute_norm=True):
        N = len(index)
        reverse_index = Reverse_index(self.index_type)
        reverse_index.idf = self.create_idf_counter(index)
        reverse_index.other_infos['norms'] = defaultdict(
            lambda: defaultdict(float))
        id_full_list = []

        for (document_id, tf_counter) in index:
            for term in tf_counter:
                tf_idf_ponderation = (
                    1 + self.custom_log(tf_counter[term])) * log10(
                        float(N) / reverse_index.idf[term])
                reverse_index.add_entry(term, document_id, tf_idf_ponderation)

                id_full_list.append(document_id)
                if compute_norm:
                    reverse_index.other_infos['norms'][document_id][
                        'linear'] += tf_idf_ponderation
                    reverse_index.other_infos['norms'][document_id][
                        'quadratic'] += tf_idf_ponderation * tf_idf_ponderation

        reverse_index.set_id_set(set(id_full_list))

        return reverse_index

コード例 #2

ファイルを表示

ファイル: reverse_index_builder.py プロジェクト: mariuslp/search_engine_assignment

    def create_with_ponderation_tf_idf(self, index, compute_norm=True):
        N = len(index)
        reverse_index = Reverse_index(self.index_type)
        reverse_index.idf = self.create_idf_counter(index)
        reverse_index.other_infos['norms'] = defaultdict(lambda: defaultdict(float))
        id_full_list = []

        for (document_id, tf_counter) in index:
            for term in tf_counter:
                tf_idf_ponderation = (1 + self.custom_log(tf_counter[term])) * log10(float(N) / reverse_index.idf[term])
                reverse_index.add_entry(term, document_id, tf_idf_ponderation)

                id_full_list.append(document_id)
                if compute_norm:
                    reverse_index.other_infos['norms'][document_id]['linear'] += tf_idf_ponderation
                    reverse_index.other_infos['norms'][document_id]['quadratic'] += tf_idf_ponderation * tf_idf_ponderation

        reverse_index.set_id_set(set(id_full_list))

        return reverse_index

コード例 #3

ファイルを表示

    def create_with_ponderation_normal_frequency(self, index):
        # w = tf / max_document (tf)
        reverse_index = Reverse_index(self.index_type)
        reverse_index.idf = self.create_idf_counter(index)
        reverse_index.other_infos['norms'] = defaultdict(
            lambda: defaultdict(float))
        id_full_list = []
        max_frequency_in_document = defaultdict(int)

        # First, create unnormalized reverse index...
        for (document_id, tf_counter) in index:
            for term in tf_counter:
                tf_ponderation = tf_counter[term]
                reverse_index.add_entry(term, document_id, tf_ponderation)
                max_frequency_in_document[document_id] = max(
                    max_frequency_in_document[document_id], tf_ponderation)

                id_full_list.append(document_id)

        # Then, normalize each term by the maximum frequency occurence in the document
        for word in reverse_index.get_all_words():
            for document_id in reverse_index.get_entry(word):
                reverse_index.get_entry(
                    word)[document_id] = reverse_index.get_entry(
                        word)[document_id] / float(
                            max_frequency_in_document[document_id])
                reverse_index.other_infos['norms'][document_id][
                    'linear'] += tf_ponderation
                reverse_index.other_infos['norms'][document_id][
                    'quadratic'] += tf_ponderation * tf_ponderation

        reverse_index.set_id_set(set(id_full_list))

        return reverse_index

コード例 #4

ファイルを表示

ファイル: reverse_index_builder.py プロジェクト: mariuslp/search_engine_assignment

    def create_with_ponderation_normal_frequency(self, index):
        # w = tf / max_document (tf)
        reverse_index = Reverse_index(self.index_type)
        reverse_index.idf = self.create_idf_counter(index)
        reverse_index.other_infos['norms'] = defaultdict(lambda: defaultdict(float))
        id_full_list = []
        max_frequency_in_document = defaultdict(int)

        # First, create unnormalized reverse index...
        for (document_id, tf_counter) in index:
            for term in tf_counter:
                tf_ponderation = tf_counter[term]
                reverse_index.add_entry(term, document_id, tf_ponderation)
                max_frequency_in_document[document_id] = max(max_frequency_in_document[document_id], tf_ponderation)

                id_full_list.append(document_id)

        # Then, normalize each term by the maximum frequency occurence in the document
        for word in reverse_index.get_all_words():
            for document_id in reverse_index.get_entry(word):
                reverse_index.get_entry(word)[document_id] = reverse_index.get_entry(word)[document_id] / float(max_frequency_in_document[document_id])
                reverse_index.other_infos['norms'][document_id]['linear'] += tf_ponderation
                reverse_index.other_infos['norms'][document_id]['quadratic'] += tf_ponderation * tf_ponderation

        reverse_index.set_id_set(set(id_full_list))

        return reverse_index