def factory(cls, topic, id, words, bible): doc = Doc() doc.topic = topic doc.id = id doc.words = {} for word in words: # Update the bible. if not word in bible.words: bible.words[word] = dotdict() bible.words[word].index = len(bible.words) - 1 bible.words[word].docs = [doc] else: if not doc in bible.words[word].docs: bible.words[word].docs.append(doc) # Update this doc. if word in doc.words: doc.words[word] += 1 else: doc.words[word] = 1 # Increment the number of documents processed. Doc.count += 1 # No cluster yet! doc.cluster = None return doc
def __init__(self, k): """ Init. Key arguments: k -- final number of clusters. """ self.k = k self.clusters = [] for i in range(k): self.clusters.append(dotdict()) self.clusters[-1].docs = []