Esempio n. 1
0
    def __init__(self, questions):
        self.documents = {}
        self.index = {}
        
        Posting.index = self

        #questions is formatted as a list of dictionaries with attributes 'docID', 'text' and 'cluster'
        for d in range(len(questions)):
            #print questions[d]
            doc = Document(text=questions[d]['title'], docID = questions[d]['qid'])
            self.documents[doc.getName] = doc
            #print 'loading', doc.getName(), '...'
            #doc.printPostingsList()
            pl = doc.getPostingsList()
            for term in pl:
                self.addTerm(term,pl[term])
        self.setIDFForAll()