Python WordCollectionInfo 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: collection_info

클래스/타입: WordCollectionInfo

hotexamples.com에서의 예제들: 7

Python WordCollectionInfo - 7개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 collection_info.WordCollectionInfo에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

WordCollectionInfo(7)

자주 사용되는 메소드들

WordCollectionInfo (7)

예제 #1

파일 보기

파일: fileIO.py 프로젝트: karina-cherednyk/Information-Retrieval-Model

def load_nfull_collection(dir,
                          vocab_json='vocab.json',
                          postings_json='postings.json'):
    with open(dir + '/' + vocab_json, 'r') as vocab_file:
        vocabulary = json.load(vocab_file)
    with open(dir + '/' + postings_json, 'r') as postings_file:
        postings = json.load(postings_file)
    return WordCollectionInfo(vocabulary, postings)

예제 #2

파일 보기

파일: reversed_index.py 프로젝트: karina-cherednyk/Information-Retrieval-Model

def get_biword_index(documents) -> WordCollectionInfo:
    vocabulary = dict()
    postings = list()
    all_words_counter = 0
    unique_word_id = 0
    for word_pair, doc_id in get_words_pair(documents):
        unique_word_id = add_word(word_pair, unique_word_id, doc_id,
                                  vocabulary, postings)
        all_words_counter += 1
    return WordCollectionInfo(SortedDict(vocabulary), postings, documents,
                              all_words_counter)

예제 #3

파일 보기

파일: fileIO.py 프로젝트: karina-cherednyk/Information-Retrieval-Model

def load_collection(dir,
                    vocab_json='vocabSER.txt',
                    postings_json='postingsSER.txt',
                    documents_json='documentsSER.txt'):
    with open(dir + '/' + vocab_json, 'rb') as vocab_file:
        vocabulary = pickle.load(vocab_file)
    with open(dir + '/' + postings_json, 'rb') as postings_file:
        postings = pickle.load(postings_file)
    with open(dir + '/' + documents_json, 'rb') as vocab_file:
        documents = pickle.load(vocab_file)
    return WordCollectionInfo(vocabulary, postings, documents)

예제 #4

파일 보기

def get_biword_index(documents) -> WordCollectionInfo:
    words_dict = {}
    all_words_counter = 0
    for word, docID in get_words_pair(documents):
        all_words_counter += 1
        if word not in words_dict:
            words_dict[word] = Term([docID])  # postings is a list of docID
        elif words_dict[word].postings[-1] != docID:
            words_dict[word].postings.append(docID)
        words_dict[word].freq += 1
    return WordCollectionInfo(SortedDict(words_dict), documents, all_words_counter)

예제 #5

파일 보기

def get_incidence_matrix(documents) -> WordCollectionInfo:
    vocabulary = dict()
    postings = list()
    all_words_counter = 0
    unique_word_id = 0
    doc_count = len(documents)
    for word, doc_id in get_words(documents):
        unique_word_id = add_word(word, unique_word_id, doc_id, vocabulary,
                                  postings, doc_count)
        all_words_counter += 1
    return WordCollectionInfo(SortedDict(vocabulary), postings, documents,
                              all_words_counter)

예제 #6

파일 보기

def get_coordinate_index(documents) -> WordCollectionInfo:
    words_dict = {}
    all_words_counter = 0
    for word,pos, docID in get_positioned_words(documents):
        all_words_counter += 1
        if word not in words_dict:
            words_dict[word] = Term() #postings  is [ dictionary key-docID, value - list of positions in document ]
        if docID not in words_dict[word].postings:
            words_dict[word].postings[docID] = {"frequncy":1, "positions":[pos]}
        else:
            words_dict[word].postings[docID]["frequency"]+=1
            words_dict[word].postings[docID]["positions"].append(pos)
    words_dict[word].freq += 1
    return WordCollectionInfo(SortedDict(words_dict), documents, all_words_counter)

예제 #7

파일 보기

파일: reversed_index.py 프로젝트: karina-cherednyk/Information-Retrieval-Model

def get_coordinate_index(documents) -> WordCollectionInfo:
    vocabulary = dict()
    postings = list()
    all_words_counter = 0
    unique_word_id = 0
    for word, pos, doc_id in get_positioned_words(documents):
        if word not in vocabulary:
            word_id = unique_word_id
            vocabulary[word] = {"id": word_id, "frequency": 0}
            postings.append(dict())
            unique_word_id += 1
        vocabulary[word]["frequency"] += 1
        word_id = vocabulary[word]["id"]
        if doc_id not in postings[word_id]:
            postings[word_id][doc_id] = [pos]
        else:
            postings[word_id][doc_id].append(pos)
        all_words_counter += 1
    return WordCollectionInfo(SortedDict(vocabulary), postings, documents,
                              all_words_counter)