Python InvertedIndex 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: textanalysis.irdatastructs

클래스/타입: InvertedIndex

hotexamples.com에서의 예제들: 4

Python InvertedIndex - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 textanalysis.irdatastructs.InvertedIndex에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

InvertedIndex(2)

add(1)

load(1)

save(1)

예제 #1

파일 보기

__author__ = 'matias'

from textanalysis.irdatastructs import InvertedIndex
from matplotlib import pyplot as plt

entity_type = "disease"

index = InvertedIndex(entity_type)
index.load()

ranking = []
for term in index.index:
    ranking.append((term, len(set(index.index[term]))))

ranking.sort(key=lambda tup: tup[1])

count = 1
with open("%s_stopwords.txt" % (entity_type, ), 'w') as outfile:
    for e in ranking:
        print count, e
        if e[1] > 80:
            outfile.write("%s\n" % (e[0], ))
        count += 1

print len(ranking)

# plot IDF for all entity terms
plt.plot([1.0 / tup[1] for tup in ranking[:-1]])
plt.show()

예제 #2

파일 보기

파일: index_stats.py 프로젝트: carriercomm/medical-text

__author__ = 'matias'

from textanalysis.irdatastructs import InvertedIndex
from matplotlib import pyplot as plt

entity_type = "disease"

index = InvertedIndex(entity_type)
index.load()

ranking = []
for term in index.index:
    ranking.append((term, len(set(index.index[term]))))

ranking.sort(key=lambda tup:tup[1])

count = 1
with open("%s_stopwords.txt" % (entity_type,), 'w') as outfile:
    for e in ranking:
        print count, e
        if e[1] > 80:
            outfile.write("%s\n" % (e[0],))
        count += 1

print len(ranking)

# plot IDF for all entity terms
plt.plot([1.0/tup[1] for tup in ranking[:-1]])
plt.show()

예제 #3

파일 보기

파일: build_index.py 프로젝트: carriercomm/medical-text

__author__ = 'matias'

from textanalysis.entityextractor import DiseaseExtractor, SymptomExtractor
from textanalysis.texts import CaseReportLibrary
from textanalysis.irdatastructs import InvertedIndex

d_index = InvertedIndex("disease")
s_index = InvertedIndex("symptom")

cases = CaseReportLibrary()
d_extractor = DiseaseExtractor()
s_extractor = SymptomExtractor()

count = 0
max_count = 50000
for case in cases:
    text = case.get_text()
    count += 1
    symptoms = list(set(s_extractor.extract(text)))
    diseases = list(set(d_extractor.extract(text)))
    s_index.add(symptoms,count)
    d_index.add(diseases,count)
    if count >= max_count:
        break
    print count,"/",max_count
    print symptoms + diseases

s_index.save()
d_index.save()

예제 #4

파일 보기

__author__ = 'matias'

from textanalysis.entityextractor import DiseaseExtractor, SymptomExtractor
from textanalysis.texts import CaseReportLibrary
from textanalysis.irdatastructs import InvertedIndex

d_index = InvertedIndex("disease")
s_index = InvertedIndex("symptom")

cases = CaseReportLibrary()
d_extractor = DiseaseExtractor()
s_extractor = SymptomExtractor()

count = 0
max_count = 50000
for case in cases:
    text = case.get_text()
    count += 1
    symptoms = list(set(s_extractor.extract(text)))
    diseases = list(set(d_extractor.extract(text)))
    s_index.add(symptoms, count)
    d_index.add(diseases, count)
    if count >= max_count:
        break
    print count, "/", max_count
    print symptoms + diseases

s_index.save()
d_index.save()