def is_boring(old, new): oldu = canonicalize(old.decode('utf8')) newu = canonicalize(new.decode('utf8')) if oldu.splitlines()[1:] == newu.splitlines()[1:]: return True for charset in CHARSET_LIST: try: if oldu.encode(charset) == new: logger.debug('Boring!') return True except UnicodeEncodeError: pass return False
def is_boring(old, new): oldu = canonicalize(old.decode('utf8')) newu = canonicalize(new.decode('utf8')) def extra_canonical(s): """Ignore changes in whitespace or the date line""" nondate_portion = s.split('\n', 1)[1] return nondate_portion.split() if extra_canonical(oldu) == extra_canonical(newu): return True for charset in CHARSET_LIST: try: if oldu.encode(charset) == new: logger.debug('Boring!') return True except UnicodeEncodeError: pass return False
def is_boring(old, new): oldu = canonicalize(old.decode('utf8')) newu = canonicalize(new.decode('utf8')) def extra_canonical(s): """Ignore changes in whitespace or the date line""" # This is fragile: depending on the text looking a particular way! nondate_portion = s.split('\n', 1)[1] return nondate_portion.split() if extra_canonical(oldu) == extra_canonical(newu): return True # This seems kind of fragile. Are we 100% sure that differences between # these encodings are unimportant? Also, how does this relate to non-latin # text? for charset in CHARSET_LIST: try: if oldu.encode(charset) == new: logger.debug('Boring!') return True except UnicodeEncodeError: pass return False