def do_read(self, filename): data = JokerData(filename) data.parse_docs() self.stem_words(data) self.data.append(data) count = 0 for dfile in self.data: for doc in dfile.docs: count += 1 self.total_docs = count self.calculate_idfs(data) self.db.persist_docs(data)
def restore_state(self): if self.params_set == False: print "Error connection parameters not set" return None else: statment = "SELECT id, text, words FROM documents" self.cursor.execute(statment) rows = self.cursor.fetchall() ndict = {} data = JokerData("no.fn") for row in rows: txt = TXTData("no.fn") txt.set_text(row[1]) txt.set_words(row[2].split(' ')) txt.unique_word_frequency() ndict[row[0]] = txt data.set_docs(ndict) return data