Example #1
0
 def retrive_documents(self,query_id):
     k1 = 1.2
     k3 = 8.00
     avg_dl = 122
     b = 1 # from 0.25 to 2.00 increase 0.25
     q = Query(query_id)
     #q.set_concepts(self.QueryConceptExtraction(q.text))
     self._expand_query(q)
     return
     print "Retrieving Documents for: ", q.text
     Collection._load()
     Collection._load_go()
     Collection._load_tags()
     Collection._load_indexes()      #Loads documents into _documents with PMID and Index
     score = dict()
     N = Collection._count
     Nt = dict()
     for term in q.text:
         Nt[term] = Collection._get_frequency(term)
     counter = 0
     for doc in Collection._documents:
         summation = 0;
         dl = doc.length * 1.00
         for t in q.text:
             tfn = doc.get_frequency(t)
             QQ = ' '.join(q.text)
             qtf = Document._term_frequency(QQ, t)
             K = k1*((1-b)+b*(dl/avg_dl))
             w = log((N-Nt[t]+0.5)/(Nt[t]+0.5),2)
             if w<0:
                 #this makes the result a negative number
                 # if we break the result will be bigger than or equal to zero
                 break
             p1 = (((k1+1)*tfn)/(K+tfn))
             p2 = ((k3+1)*qtf/(k3+qtf))
             p3 = w
             summation += p1*p2*p3
         score[doc.PMID] = summation
         counter += 1
Example #2
0
 def Indexing(self):
     '''
     IR Indexing Operations
         - Elimination of Stopwords
         - 
     '''
     DB._execute("DELETE from collection_index")
     print "Indexing is started..."
     tp = TextProcessor() 
     Collection._load()
     Collection._load_tags() #loading document with PMID, tags and abstracts
     for doc in Collection._documents:
         index_list = []
         for term in doc.abstract:
             index_list.append(term)
         if GlobalVariables.global_context_activated:
             for term in doc.tag:
                 index_list.append(term)
         index_list = tp.EliminateStopWords(index_list)
         index_list = tp.Stem(index_list)
         doc.set_index(index_list)
     print "Indexing is Done!"