Python gen_inverted_idx Examples

Programming Language: Python

Namespace/Package Name: inverted_idx

Method/Function: gen_inverted_idx

Examples at hotexamples.com: 5

Python gen_inverted_idx - 5 examples found. These are the top rated real world Python examples of inverted_idx.gen_inverted_idx extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

import tokenizer
import inverted_idx


#Smooth scaling
def calculate(inverted_idx, documents):
    idf = {}
    doc_no = len(documents)
    for term in inverted_idx:
        idf[term] = 1 + math.log(doc_no /
                                 (1 + len(inverted_idx[term].postings)))
    return idf


if __name__ == "__main__":
    docs = {
        "0":
        "abc alo ola 456 zzz ola",
        "1":
        "alo ola 321 123",
        "2":
        "hello 123 456 123",
        "3":
        "hello alo ola abc 123 456 zzz",
        "4":
        "123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123"
    }
    inv_idx = inverted_idx.gen_inverted_idx(docs)

    res = calculate(inv_idx, docs)
    print(res)

Example #2

Show file

File: vector_model.py Project: axblueblader/model-evaluation

 def __init__(self, docs, class_map):
     self.inverted_idx, self.terms_in_doc = inverted_idx.gen_inverted_idx(
         docs, class_map)
     self.docs = docs
     self.idx_map = []
     self._convert_to_vectors()

Example #3

Show file

 def __init__(self, docs):
     self.inverted_idx = inverted_idx.gen_inverted_idx(docs)
     self.docs = docs
     self._convert_to_vectors()

Example #4

Show file

 def __init__(self, docs):
     self.inverted_idx = inverted_idx.gen_inverted_idx(docs)

Example #5

Show file

File: idf.py Project: axblueblader/model-evaluation

# Smooth scaling


def calculate(inverted_idx, documents):
    idf = {}
    doc_no = len(documents)
    for term in inverted_idx:
        idf[term] = 1 + math.log(doc_no /
                                 (1 + len(inverted_idx[term].postings)))
    return idf


if __name__ == "__main__":
    docs = {
        "0":
        "abc alo ola 456 zzz ola",
        "1":
        "alo ola 321 123",
        "2":
        "hello 123 456 123",
        "3":
        "hello alo ola abc 123 456 zzz",
        "4":
        "123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123 123"
    }
    inv_idx, terms_in_doc = inverted_idx.gen_inverted_idx(docs)

    res = calculate(inv_idx, docs)
    print(res)