def onReview(self, **params): """Present a list of message for (re)training.""" # Train/discard sumbitted messages. self._writePreamble("Review") id = '' numTrained = 0 numDeferred = 0 if params.get('go') != _('Refresh'): for key, value in params.items(): if key.startswith('classify:'): old_class, id = key.split(':')[1:3] if value == _('spam'): targetCorpus = self.state.spamCorpus stats_as_ham = False elif value == _('ham'): targetCorpus = self.state.hamCorpus stats_as_ham = True elif value == _('discard'): targetCorpus = None try: self.state.unknownCorpus.removeMessage( self.state.unknownCorpus[id]) except KeyError: pass # Must be a reload. else: # defer targetCorpus = None numDeferred += 1 if targetCorpus: sourceCorpus = None if self.state.unknownCorpus.get(id) is not None: sourceCorpus = self.state.unknownCorpus elif self.state.hamCorpus.get(id) is not None: sourceCorpus = self.state.hamCorpus elif self.state.spamCorpus.get(id) is not None: sourceCorpus = self.state.spamCorpus if sourceCorpus is not None: try: # fromCache is a fix for sf #851785. # See the comments in Corpus.py targetCorpus.takeMessage(id, sourceCorpus, fromCache=True) if numTrained == 0: self.write(_("<p><b>Training... ")) self.flush() numTrained += 1 self.stats.RecordTraining(\ stats_as_ham, old_class=old_class) except KeyError: pass # Must be a reload. # Report on any training, and save the database if there was any. if numTrained > 0: plural = '' if numTrained == 1: response = "Trained on one message. " else: response = "Trained on %d messages. " % (numTrained,) self._doSave() self.write(response) self.write("<br> ") title = "" keys = [] sourceCorpus = self.state.unknownCorpus # If any messages were deferred, show the same page again. if numDeferred > 0: start = self._keyToTimestamp(id) # Else after submitting a whole page, display the prior page or the # next one. Derive the day of the submitted page from the ID of the # last processed message. elif id: start = self._keyToTimestamp(id) unused, unused, prior, unused, next = self._buildReviewKeys(start) if prior: start = prior else: start = next # Else if they've hit Previous or Next, display that page. elif params.get('go') == _('Next day'): start = self._keyToTimestamp(params['next']) elif params.get('go') == _('Previous day'): start = self._keyToTimestamp(params['prior']) # Else if an id has been specified, just show that message # Else if search criteria have been specified, show the messages # that match those criteria. elif params.get('find') is not None: prior = next = 0 keys = set() # so we don't end up with duplicates push = keys.add try: max_results = int(params['max_results']) except ValueError: max_results = 1 key = params['find'] if 'ignore_case' in params: ic = True else: ic = False error = False if key == "": error = True page = _("<p>You must enter a search string.</p>") else: if len(keys) < max_results and \ 'id' in params: if self.state.unknownCorpus.get(key): push((key, self.state.unknownCorpus)) elif self.state.hamCorpus.get(key): push((key, self.state.hamCorpus)) elif self.state.spamCorpus.get(key): push((key, self.state.spamCorpus)) if 'subject' in params or 'body' in params or \ 'headers' in params: # This is an expensive operation, so let the user know # that something is happening. self.write(_('<p>Searching...</p>')) for corp in [self.state.unknownCorpus, self.state.hamCorpus, self.state.spamCorpus]: for k in corp.keys(): if len(keys) >= max_results: break msg = corp[k] msg.load() if 'subject' in params: subj = str(msg['Subject']) if self._contains(subj, key, ic): push((k, corp)) if 'body' in params: # For [ 906581 ] Assertion failed in search # subject. Can the headers be a non-string? msg_body = msg.as_string() msg_body = msg_body[msg_body.index('\r\n\r\n'):] if self._contains(msg_body, key, ic): push((k, corp)) if 'headers' in params: for nm, val in msg.items(): # For [ 906581 ] Assertion failed in # search subject. Can the headers be # a non-string? nm = str(nm) val = str(val) if self._contains(nm, key, ic) or \ self._contains(val, key, ic): push((k, corp)) if len(keys): if len(keys) == 1: title = _("Found message") else: title = _("Found messages") keys = list(keys) else: page = _("<p>Could not find any matching messages. " \ "Maybe they expired?</p>") title = _("Did not find message") box = self._buildBox(title, 'status.gif', page) self.write(box) self.write(self._buildBox(_('Find message'), 'query.gif', self.html.findMessage)) self._writePostamble() return # Else show the most recent day's page, as decided by _buildReviewKeys. else: start = 0 # Build the lists of messages: spams, hams and unsure. if len(keys) == 0: keys, date, prior, this, next = self._buildReviewKeys(start) keyedMessageInfo = {options["Headers", "header_unsure_string"]: [], options["Headers", "header_ham_string"]: [], options["Headers", "header_spam_string"]: [], } invalid_keys = [] for key in keys: if isinstance(key, tuple): key, sourceCorpus = key else: sourceCorpus = self.state.unknownCorpus # Parse the message, get the judgement header and build a message # info object for each message. message = sourceCorpus[key] try: message.load() except IOError: # Someone has taken this file away from us. It was # probably a virus protection program, so that's ok. # Don't list it in the review, though. invalid_keys.append(key) continue judgement = message[options["Headers", "classification_header_name"]] if judgement is None: judgement = options["Headers", "header_unsure_string"] else: judgement = judgement.split(';')[0].strip() messageInfo = self._makeMessageInfo(message) keyedMessageInfo[judgement].append((key, messageInfo)) for key in invalid_keys: keys.remove(key) # Present the list of messages in their groups in reverse order of # appearance, by default, or according to the specified sort order. if keys: page = self.html.reviewtable.clone() if prior: page.prior.value = prior del page.priorButton.disabled if next: page.next.value = next del page.nextButton.disabled templateRow = page.reviewRow.clone() # The decision about whether to reverse the sort # order has to go here, because _sortMessages gets called # thrice, and so the ham list would end up sorted backwards. sort_order = params.get('sort') if self.previous_sort == sort_order: reverse = True self.previous_sort = None else: reverse = False self.previous_sort = sort_order page.table = "" # To make way for the real rows. for header, label in ((options["Headers", "header_unsure_string"], 'Unsure'), (options["Headers", "header_ham_string"], 'Ham'), (options["Headers", "header_spam_string"], 'Spam')): messages = keyedMessageInfo[header] if messages: sh = self.html.reviewSubHeader.clone() # Setup the header row sh.optionalHeaders = '' h = self.html.headerHeader.clone() for disp_header in options["html_ui", "display_headers"]: h.headerLink.href = 'review?sort=%sHeader' % \ (disp_header.lower(),) h.headerName = disp_header.title() sh.optionalHeaders += h if not options["html_ui", "display_score"]: del sh.score_header if not options["html_ui", "display_received_time"]: del sh.received_header subHeader = str(sh) subHeader = subHeader.replace('TYPE', label) page.table += self.html.blankRow page.table += subHeader self._appendMessages(page.table, messages, label, sort_order, reverse) page.table += self.html.trainRow if title == "": title = _("Untrained messages received on %s") % date box = self._buildBox(title, None, page) # No icon, to save space. else: page = _("<p>There are no untrained messages to display. " \ "Return <a href='home'>Home</a>, or " \ "<a href='review'>check again</a>.</p>") title = _("No untrained messages") box = self._buildBox(title, 'status.gif', page) self.write(box) self._writePostamble(help_topic="review")
def onReview(self, **params): """Present a list of message for (re)training.""" self._writePreamble("Review") id = '' numTrained = 0 numDeferred = 0 if params.get('go') != _('Refresh'): for key, value in params.items(): if key.startswith('classify:'): old_class, id = key.split(':')[1:3] if value == _('spam'): targetCorpus = self.state.spamCorpus stats_as_ham = False elif value == _('ham'): targetCorpus = self.state.hamCorpus stats_as_ham = True elif value == _('discard'): targetCorpus = None try: self.state.unknownCorpus.removeMessage( self.state.unknownCorpus[id]) except KeyError: pass # Must be a reload. else: # defer targetCorpus = None numDeferred += 1 if targetCorpus: sourceCorpus = None if self.state.unknownCorpus.get(id) is not None: sourceCorpus = self.state.unknownCorpus elif self.state.hamCorpus.get(id) is not None: sourceCorpus = self.state.hamCorpus elif self.state.spamCorpus.get(id) is not None: sourceCorpus = self.state.spamCorpus if sourceCorpus is not None: try: targetCorpus.takeMessage(id, sourceCorpus, fromCache=True) if numTrained == 0: self.write(_("<p><b>Training... ")) self.flush() numTrained += 1 self.stats.RecordTraining(\ stats_as_ham, old_class=old_class) except KeyError: pass # Must be a reload. if numTrained > 0: plural = '' if numTrained == 1: response = "Trained on one message. " else: response = "Trained on %d messages. " % (numTrained,) self._doSave() self.write(response) self.write("<br> ") title = "" keys = [] sourceCorpus = self.state.unknownCorpus if numDeferred > 0: start = self._keyToTimestamp(id) elif id: start = self._keyToTimestamp(id) unused, unused, prior, unused, next = self._buildReviewKeys(start) if prior: start = prior else: start = next elif params.get('go') == _('Next day'): start = self._keyToTimestamp(params['next']) elif params.get('go') == _('Previous day'): start = self._keyToTimestamp(params['prior']) elif params.get('find') is not None: prior = next = 0 keys = set() # so we don't end up with duplicates push = keys.add try: max_results = int(params['max_results']) except ValueError: max_results = 1 key = params['find'] if params.has_key('ignore_case'): ic = True else: ic = False error = False if key == "": error = True page = _("<p>You must enter a search string.</p>") else: if len(keys) < max_results and \ params.has_key('id'): if self.state.unknownCorpus.get(key): push((key, self.state.unknownCorpus)) elif self.state.hamCorpus.get(key): push((key, self.state.hamCorpus)) elif self.state.spamCorpus.get(key): push((key, self.state.spamCorpus)) if params.has_key('subject') or params.has_key('body') or \ params.has_key('headers'): self.write(_('<p>Searching...</p>')) for corp in [self.state.unknownCorpus, self.state.hamCorpus, self.state.spamCorpus]: for k in corp.keys(): if len(keys) >= max_results: break msg = corp[k] msg.load() if params.has_key('subject'): subj = str(msg['Subject']) if self._contains(subj, key, ic): push((k, corp)) if params.has_key('body'): msg_body = msg.as_string() msg_body = msg_body[msg_body.index('\r\n\r\n'):] if self._contains(msg_body, key, ic): push((k, corp)) if params.has_key('headers'): for nm, val in msg.items(): nm = str(nm) val = str(val) if self._contains(nm, key, ic) or \ self._contains(val, key, ic): push((k, corp)) if len(keys): if len(keys) == 1: title = _("Found message") else: title = _("Found messages") keys = list(keys) else: page = _("<p>Could not find any matching messages. " \ "Maybe they expired?</p>") title = _("Did not find message") box = self._buildBox(title, 'status.gif', page) self.write(box) self.write(self._buildBox(_('Find message'), 'query.gif', self.html.findMessage)) self._writePostamble() return else: start = 0 if len(keys) == 0: keys, date, prior, this, next = self._buildReviewKeys(start) keyedMessageInfo = {options["Headers", "header_unsure_string"]: [], options["Headers", "header_ham_string"]: [], options["Headers", "header_spam_string"]: [], } invalid_keys = [] for key in keys: if isinstance(key, types.TupleType): key, sourceCorpus = key else: sourceCorpus = self.state.unknownCorpus message = sourceCorpus[key] try: message.load() except IOError: invalid_keys.append(key) continue judgement = message[options["Headers", "classification_header_name"]] if judgement is None: judgement = options["Headers", "header_unsure_string"] else: judgement = judgement.split(';')[0].strip() messageInfo = self._makeMessageInfo(message) keyedMessageInfo[judgement].append((key, messageInfo)) for key in invalid_keys: keys.remove(key) if keys: page = self.html.reviewtable.clone() if prior: page.prior.value = prior del page.priorButton.disabled if next: page.next.value = next del page.nextButton.disabled templateRow = page.reviewRow.clone() sort_order = params.get('sort') if self.previous_sort == sort_order: reverse = True self.previous_sort = None else: reverse = False self.previous_sort = sort_order page.table = "" # To make way for the real rows. for header, label in ((options["Headers", "header_unsure_string"], 'Unsure'), (options["Headers", "header_ham_string"], 'Ham'), (options["Headers", "header_spam_string"], 'Spam')): messages = keyedMessageInfo[header] if messages: sh = self.html.reviewSubHeader.clone() sh.optionalHeaders = '' h = self.html.headerHeader.clone() for disp_header in options["html_ui", "display_headers"]: h.headerLink.href = 'review?sort=%sHeader' % \ (disp_header.lower(),) h.headerName = disp_header.title() sh.optionalHeaders += h if not options["html_ui", "display_score"]: del sh.score_header if not options["html_ui", "display_received_time"]: del sh.received_header subHeader = str(sh) subHeader = subHeader.replace('TYPE', label) page.table += self.html.blankRow page.table += subHeader self._appendMessages(page.table, messages, label, sort_order, reverse) page.table += self.html.trainRow if title == "": title = _("Untrained messages received on %s") % date box = self._buildBox(title, None, page) # No icon, to save space. else: page = _("<p>There are no untrained messages to display. " \ "Return <a href='home'>Home</a>, or " \ "<a href='review'>check again</a>.</p>") title = _("No untrained messages") box = self._buildBox(title, 'status.gif', page) self.write(box) self._writePostamble(help_topic="review")
def onReview(self, **params): """Present a list of message for (re)training.""" # Train/discard sumbitted messages. self._writePreamble("Review") id = '' numTrained = 0 numDeferred = 0 if params.get('go') != _('Refresh'): for key, value in params.items(): if key.startswith('classify:'): old_class, id = key.split(':')[1:3] if value == _('spam'): targetCorpus = self.state.spamCorpus stats_as_ham = False elif value == _('ham'): targetCorpus = self.state.hamCorpus stats_as_ham = True elif value == _('discard'): targetCorpus = None try: self.state.unknownCorpus.removeMessage( self.state.unknownCorpus[id]) except KeyError: pass # Must be a reload. else: # defer targetCorpus = None numDeferred += 1 if targetCorpus: sourceCorpus = None if self.state.unknownCorpus.get(id) is not None: sourceCorpus = self.state.unknownCorpus elif self.state.hamCorpus.get(id) is not None: sourceCorpus = self.state.hamCorpus elif self.state.spamCorpus.get(id) is not None: sourceCorpus = self.state.spamCorpus if sourceCorpus is not None: try: # fromCache is a fix for sf #851785. # See the comments in Corpus.py targetCorpus.takeMessage(id, sourceCorpus, fromCache=True) if numTrained == 0: self.write(_("<p><b>Training... ")) self.flush() numTrained += 1 self.stats.RecordTraining(\ stats_as_ham, old_class=old_class) except KeyError: pass # Must be a reload. # Report on any training, and save the database if there was any. if numTrained > 0: plural = '' if numTrained == 1: response = "Trained on one message. " else: response = "Trained on %d messages. " % (numTrained, ) self._doSave() self.write(response) self.write("<br> ") title = "" keys = [] sourceCorpus = self.state.unknownCorpus # If any messages were deferred, show the same page again. if numDeferred > 0: start = self._keyToTimestamp(id) # Else after submitting a whole page, display the prior page or the # next one. Derive the day of the submitted page from the ID of the # last processed message. elif id: start = self._keyToTimestamp(id) unused, unused, prior, unused, next = self._buildReviewKeys(start) if prior: start = prior else: start = next # Else if they've hit Previous or Next, display that page. elif params.get('go') == _('Next day'): start = self._keyToTimestamp(params['next']) elif params.get('go') == _('Previous day'): start = self._keyToTimestamp(params['prior']) # Else if an id has been specified, just show that message # Else if search criteria have been specified, show the messages # that match those criteria. elif params.get('find') is not None: prior = next = 0 keys = set() # so we don't end up with duplicates push = keys.add try: max_results = int(params['max_results']) except ValueError: max_results = 1 key = params['find'] if params.has_key('ignore_case'): ic = True else: ic = False error = False if key == "": error = True page = _("<p>You must enter a search string.</p>") else: if len(keys) < max_results and \ params.has_key('id'): if self.state.unknownCorpus.get(key): push((key, self.state.unknownCorpus)) elif self.state.hamCorpus.get(key): push((key, self.state.hamCorpus)) elif self.state.spamCorpus.get(key): push((key, self.state.spamCorpus)) if params.has_key('subject') or params.has_key('body') or \ params.has_key('headers'): # This is an expensive operation, so let the user know # that something is happening. self.write(_('<p>Searching...</p>')) for corp in [ self.state.unknownCorpus, self.state.hamCorpus, self.state.spamCorpus ]: for k in corp.keys(): if len(keys) >= max_results: break msg = corp[k] msg.load() if params.has_key('subject'): subj = str(msg['Subject']) if self._contains(subj, key, ic): push((k, corp)) if params.has_key('body'): # For [ 906581 ] Assertion failed in search # subject. Can the headers be a non-string? msg_body = msg.as_string() msg_body = msg_body[msg_body.index('\r\n\r\n' ):] if self._contains(msg_body, key, ic): push((k, corp)) if params.has_key('headers'): for nm, val in msg.items(): # For [ 906581 ] Assertion failed in # search subject. Can the headers be # a non-string? nm = str(nm) val = str(val) if self._contains(nm, key, ic) or \ self._contains(val, key, ic): push((k, corp)) if len(keys): if len(keys) == 1: title = _("Found message") else: title = _("Found messages") keys = list(keys) else: page = _("<p>Could not find any matching messages. " \ "Maybe they expired?</p>") title = _("Did not find message") box = self._buildBox(title, 'status.gif', page) self.write(box) self.write( self._buildBox(_('Find message'), 'query.gif', self.html.findMessage)) self._writePostamble() return # Else show the most recent day's page, as decided by _buildReviewKeys. else: start = 0 # Build the lists of messages: spams, hams and unsure. if len(keys) == 0: keys, date, prior, this, next = self._buildReviewKeys(start) keyedMessageInfo = { options["Headers", "header_unsure_string"]: [], options["Headers", "header_ham_string"]: [], options["Headers", "header_spam_string"]: [], } invalid_keys = [] for key in keys: if isinstance(key, types.TupleType): key, sourceCorpus = key else: sourceCorpus = self.state.unknownCorpus # Parse the message, get the judgement header and build a message # info object for each message. message = sourceCorpus[key] try: message.load() except IOError: # Someone has taken this file away from us. It was # probably a virus protection program, so that's ok. # Don't list it in the review, though. invalid_keys.append(key) continue judgement = message[options["Headers", "classification_header_name"]] if judgement is None: judgement = options["Headers", "header_unsure_string"] else: judgement = judgement.split(';')[0].strip() messageInfo = self._makeMessageInfo(message) keyedMessageInfo[judgement].append((key, messageInfo)) for key in invalid_keys: keys.remove(key) # Present the list of messages in their groups in reverse order of # appearance, by default, or according to the specified sort order. if keys: page = self.html.reviewtable.clone() if prior: page.prior.value = prior del page.priorButton.disabled if next: page.next.value = next del page.nextButton.disabled templateRow = page.reviewRow.clone() # The decision about whether to reverse the sort # order has to go here, because _sortMessages gets called # thrice, and so the ham list would end up sorted backwards. sort_order = params.get('sort') if self.previous_sort == sort_order: reverse = True self.previous_sort = None else: reverse = False self.previous_sort = sort_order page.table = "" # To make way for the real rows. for header, label in ((options["Headers", "header_unsure_string"], 'Unsure'), (options["Headers", "header_ham_string"], 'Ham'), (options["Headers", "header_spam_string"], 'Spam')): messages = keyedMessageInfo[header] if messages: sh = self.html.reviewSubHeader.clone() # Setup the header row sh.optionalHeaders = '' h = self.html.headerHeader.clone() for disp_header in options["html_ui", "display_headers"]: h.headerLink.href = 'review?sort=%sHeader' % \ (disp_header.lower(),) h.headerName = disp_header.title() sh.optionalHeaders += h if not options["html_ui", "display_score"]: del sh.score_header if not options["html_ui", "display_received_time"]: del sh.received_header subHeader = str(sh) subHeader = subHeader.replace('TYPE', label) page.table += self.html.blankRow page.table += subHeader self._appendMessages(page.table, messages, label, sort_order, reverse) page.table += self.html.trainRow if title == "": title = _("Untrained messages received on %s") % date box = self._buildBox(title, None, page) # No icon, to save space. else: page = _("<p>There are no untrained messages to display. " \ "Return <a href='home'>Home</a>, or " \ "<a href='review'>check again</a>.</p>") title = _("No untrained messages") box = self._buildBox(title, 'status.gif', page) self.write(box) self._writePostamble(help_topic="review")