Python Document._term_frequencyの例

プログラミング言語: Python

名前空間/パッケージ名: models

クラス/型: Document

メソッド/関数: _term_frequency

hotexamples.comのコード掲載数: 2

Python Document._term_frequency - 2件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのmodels.Document._term_frequencyの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

save(30)

Document(30)

objects(10)

filename(4)

name(3)

add_component(3)

serialize(3)

all(3)

doc_type(3)

delete(3)

isover18s(2)

get_by_id(2)

file(2)

price(2)

query(2)

hash_url(2)

usersname(2)

description(2)

set_user_from_request(2)

category(2)

Image(2)

_term_frequency(2)

type(2)

by_property(2)

id(2)

thumbnail(1)

path(1)

image(1)

insert(1)

user_name(1)

specific_instance(1)

issue(1)

urlencfilename(1)

last_change(1)

select_file(1)

text(1)

to_dbref(1)

title(1)

rename_shab(1)

path_html(1)

path_pdf(1)

set_default_user(1)

the_doc(1)

print_document(1)

sample(1)

priv(1)

time_published(1)

read(1)

rename_prev(1)

pdf_file(1)

コード例 #1

ファイルを表示

ファイル: IR.py プロジェクト: armanfatahi/ContextSensitiveIR

 def retrive_documents(self,query_id):
     k1 = 1.2
     k3 = 8.00
     avg_dl = 122
     b = 1 # from 0.25 to 2.00 increase 0.25
     q = Query(query_id)
     #q.set_concepts(self.QueryConceptExtraction(q.text))
     self._expand_query(q)
     return
     print "Retrieving Documents for: ", q.text
     Collection._load()
     Collection._load_go()
     Collection._load_tags()
     Collection._load_indexes()      #Loads documents into _documents with PMID and Index
     score = dict()
     N = Collection._count
     Nt = dict()
     for term in q.text:
         Nt[term] = Collection._get_frequency(term)
     counter = 0
     for doc in Collection._documents:
         summation = 0;
         dl = doc.length * 1.00
         for t in q.text:
             tfn = doc.get_frequency(t)
             QQ = ' '.join(q.text)
             qtf = Document._term_frequency(QQ, t)
             K = k1*((1-b)+b*(dl/avg_dl))
             w = log((N-Nt[t]+0.5)/(Nt[t]+0.5),2)
             if w<0:
                 #this makes the result a negative number
                 # if we break the result will be bigger than or equal to zero
                 break
             p1 = (((k1+1)*tfn)/(K+tfn))
             p2 = ((k3+1)*qtf/(k3+qtf))
             p3 = w
             summation += p1*p2*p3
         score[doc.PMID] = summation
         counter += 1

コード例 #2

ファイルを表示

ファイル: IR.py プロジェクト: armanfatahi/ContextSensitiveIR

 def _expand_query(self,q):
     #--STEP 1----------Extract TOP DOCUMENTS ----------------------------
     tp = TextProcessor()
     param = Parameter()
     k1      = 1.2
     k3      = 8.00
     avg_dl  = 122
     b       = 1                     # from 0.25 to 2.00 increase 0.25    
     Collection._load_indexes()      # Loads indexes into _documents
     N = len(Collection._documents)
     score = dict()
     for D in Collection._documents:
         summation = 0;
         dl = D.length * 1.00
         for t in q.text:
             Nt = Collection._get_frequency(t)
             tfn = D.get_frequency(t)
             qtf = q.get_frequency(t)
             K = k1*((1-b)+b*(dl/avg_dl))
             w = log((N-Nt+0.5)/(Nt+0.5),2)
             if w<0:
                 #this makes the result a negative number
                 # if we break the result will be bigger than or equal to zero
                 break
             p1 = (((k1+1)*tfn)/(K+tfn))
             p2 = ((k3+1)*qtf/(k3+qtf))
             p3 = w
             summation += p1*p2*p3
             
         score[D.PMID] = summation
     M = param.GetDocNumberForLocalContext()
     TopDocs = []
     TopNums = []
     new_score = dict()
     for item in score.iterkeys():
         if score[item] > 0:
             new_score[item] = score[item]
     
     for i in range(M):
         TopNums.append(0)
         TopDocs.append('')
     for D in score.iterkeys():
         for i in range(M):
             if score[D] > TopNums[i]:
                 for j in range(M-i-1):
                     TopDocs[M-j-1] = TopDocs[M-j-2]
                     TopNums[M-j-1] = TopNums[M-j-2]
                 TopDocs[i] = D
                 TopNums[i] = score[D]
                 break
     Display._plot(new_score, q)
     TopDocsTexts = ''        
     TopDocsTexts = tp.Tokenize(TopDocsTexts)
     TopDocsTexts = TextProcessor._remove_stop_words(TopDocsTexts)
     #---STEP 2---------Calculate weight of each term which is a member of new query----------------------------
     K = TopDocsTexts
     Beta = 0.4
     weight = dict()
     MaxTFQ = 0.001
     for term in TopDocsTexts:
         tfq = q.get_frequency(term)
         if tfq > MaxTFQ:
             MaxTFQ = tfq
     tfqN = 0
     MaxInfo = 0
     for term in TopDocsTexts:
         Lambda = Document._term_frequency(' '.join(K), term)
         Freq_t_k = Document._term_frequency(' '.join(K), term)
         log1 = log(1.00/(1.00+Lambda),2)
         log2 = log(Lambda/(1.00+Lambda),2)
         InfoBO1 = -log1 - Freq_t_k * log2
         if InfoBO1 > MaxInfo:
             MaxInfo = InfoBO1
     for term in TopDocsTexts:
         Lambda = Document._term_frequency(' '.join(K), term)
         Freq_t_k = Document._term_frequency(' '.join(K), term)
         log1 = log(1.00/(1.00+Lambda),2)
         log2 = log(Lambda/(1.00+Lambda),2)
         InfoBO1 = -log1 - Freq_t_k * log2
         tfq = q.get_frequency(term)
         tfqN = (tfq +0.00) /MaxTFQ
         if MaxInfo >0 :
             weight[term] = tfqN + Beta*(InfoBO1/MaxInfo)
         else:
             weight[term] = 0
     QPrime = []
     for term in weight.iterkeys():
         if weight[term] > 0.25:
             QPrime.append(term)
     return  QPrime