def add_new_revision(self, new_rev): """ add new revision; id of new revision should be greater then id of any stored revision self.cr would be Revision with tokenized text """ c = 100 if len(self.revisions) > 0 and new_rev.revision_id <= self.revisions[-1].revision_id: print "Error: revisions should be in ascending order" print "Error on revsion with id = %s"%new_rev.revision_id print "self.revisions[-1].revision_id is %s"%self.revisions[-1].revision_id print " boolean expr %s"%(new_rev.revision_id <= self.revisions[-1].revision_id) raise Exception('revisions should be in ascending order') return -1 # okay, adding newest revision # saving length of raw article #sum_len = 0 #for i in xrange(len(new_rev.revision_text)): # sum_len += len(new_rev.revision_text[i]) #new_rev.save_raw_text_len(sum_len) # filtering raw text from markup and tokenize for i in xrange(len(new_rev.revision_text)): new_rev.revision_text[i] = filter_wiki.filter_wiki(new_rev.revision_text[i]) new_rev.tokenize() self.revisions.append(new_rev) # store only last c revisions if len(self.revisions) > c: del self.revisions[0] self.cr = new_rev self.cr_number += 1
def rawtext2tokens(text): """ text is a string """ # filtering from wiki markup text = filter_wiki.filter_wiki(text) text = line2tokens(text) return text