Esempio n. 1
0
    def factory(cls, topic, id, words, bible):
        doc = Doc()

        doc.topic = topic
        doc.id = id
        doc.words = {}

        for word in words:
            # Update the bible.
            if not word in bible.words:
                bible.words[word] = dotdict()
                bible.words[word].index = len(bible.words) - 1
                bible.words[word].docs = [doc]
            else:
                if not doc in bible.words[word].docs:
                    bible.words[word].docs.append(doc)

            # Update this doc.
            if word in doc.words:
                doc.words[word] += 1
            else:
                doc.words[word] = 1

        # Increment the number of documents processed.
        Doc.count += 1

        # No cluster yet!
        doc.cluster = None

        return doc
Esempio n. 2
0
 def factory(cls, topic, id, words, bible):
     doc = Doc()
     
     doc.topic = topic
     doc.id = id
     doc.words = {}
     
     for word in words:
         # Update the bible.
         if not word in bible.words:
             bible.words[word] = dotdict()
             bible.words[word].index = len(bible.words) - 1
             bible.words[word].docs = [doc]
         else:
             if not doc in bible.words[word].docs:
                 bible.words[word].docs.append(doc)
         
         # Update this doc.
         if word in doc.words:
             doc.words[word] += 1
         else:
             doc.words[word] = 1
     
     # Increment the number of documents processed.
     Doc.count += 1
     
     # No cluster yet!
     doc.cluster = None
     
     return doc
Esempio n. 3
0
 def __init__(self, k):
     """
     Init.
     
     Key arguments:
     k -- final number of clusters.
     """
     self.k = k
     
     self.clusters = []
     for i in range(k):
         self.clusters.append(dotdict())
         self.clusters[-1].docs = []
Esempio n. 4
0
    def __init__(self, k):
        """
        Init.
        
        Key arguments:
        k -- final number of clusters.
        """
        self.k = k

        self.clusters = []
        for i in range(k):
            self.clusters.append(dotdict())
            self.clusters[-1].docs = []