def __init__(self, questions): self.documents = {} self.index = {} Posting.index = self #questions is formatted as a list of dictionaries with attributes 'docID', 'text' and 'cluster' for d in range(len(questions)): #print questions[d] doc = Document(text=questions[d]['title'], docID = questions[d]['qid']) self.documents[doc.getName] = doc #print 'loading', doc.getName(), '...' #doc.printPostingsList() pl = doc.getPostingsList() for term in pl: self.addTerm(term,pl[term]) self.setIDFForAll()