Esempio n. 1
0
    def do_read(self, filename):
        data = JokerData(filename) 
        data.parse_docs()

        self.stem_words(data)
        self.data.append(data)
        
        count = 0 
        for dfile in self.data:
           for doc in dfile.docs:
               count += 1
        self.total_docs = count
 
        self.calculate_idfs(data)
        self.db.persist_docs(data)
Esempio n. 2
0
 def restore_state(self):
     if self.params_set == False:
         print "Error connection parameters not set"
         return None
     else:
         statment = "SELECT id, text, words FROM documents"
         self.cursor.execute(statment)
         rows = self.cursor.fetchall()
         ndict = {}
         
         data = JokerData("no.fn")
         for row in rows:
             txt = TXTData("no.fn")
             txt.set_text(row[1])
             txt.set_words(row[2].split(' ')) 
             txt.unique_word_frequency()
             ndict[row[0]] = txt 
         data.set_docs(ndict)
         return data