def cleanHTML(text, skipchars=[], extra_careful=True): '''This is an attempt to get rid of " ä " etc within a string Still working on it ... any help appreicated.''' entitydefs_inverted = {} for k, v in entitydefs.iteritems(): entitydefs_inverted[v] = k badchars_regex = re.compile('|'.join(entitydefs.values())) been_fixed_regex = re.compile('&\w+;|&#[0-9]+;') # if extra_careful we don't attempt to do anything to # the string if it might have been converted already. if extra_careful and been_fixed_regex.findall(text): return text if type(skipchars) == type('s'): skipchars = [skipchars] keyholder = {} for x in badchars_regex.findall(text): if x not in skipchars: keyholder[x] = 1 text = text.replace('&', '&') text = text.replace('\x80', '€') for key in keyholder.keys(): if key == '&': continue better = entitydefs_inverted[key] if not better.startswith('&#'): better = '&%s;' % entitydefs_inverted[each] text = text.replace(key, better) return text
def cleanHTML(text, skipchars=[], extra_careful=True): '''This is an attempt to get rid of " ä " etc within a string Still working on it ... any help appreicated.''' entitydefs_inverted = {} for k,v in entitydefs.iteritems(): entitydefs_inverted[v] = k badchars_regex = re.compile('|'.join(entitydefs.values())) been_fixed_regex = re.compile('&\w+;|&#[0-9]+;') # if extra_careful we don't attempt to do anything to # the string if it might have been converted already. if extra_careful and been_fixed_regex.findall(text): return text if type(skipchars) == type('s'): skipchars = [skipchars] keyholder= {} for x in badchars_regex.findall(text): if x not in skipchars: keyholder[x] = 1 text = text.replace('&','&') text = text.replace('\x80', '€') for key in keyholder.keys(): if key == '&': continue better = entitydefs_inverted[key] if not better.startswith('&#'): better = '&%s;' % entitydefs_inverted[each] text = text.replace(key, better) return text
if math.floor(size) == size: return "%d %s" % (int(size), final_unit) else: return "%3.1f %s" % (size, final_unit) if unit != "Yotta": size /= 1024.0 return "%3.1f %s" % (size, final_unit) entitydefs_inverted = {} for k, v in entitydefs.items(): entitydefs_inverted[v] = k _badchars_regex = re.compile("|".join(entitydefs.values())) _been_fixed_regex = re.compile("&\w+;|&#[0-9]+;") def html_entity_fixer(text, skipchars=[], extra_careful=1): # if extra_careful we don't attempt to do anything to # the string if it might have been converted already. if extra_careful and _been_fixed_regex.findall(text): return text if type(skipchars) == type("s"): skipchars = [skipchars] keyholder = [] for char in _badchars_regex.findall(text): if char not in skipchars:
joined = '%s<%s %s>' % (part1, tag, attribute) joined += '%s%s</%s>%s</p>' % (dashes, _p_splitted[0], tag, _p_splitted[1]) else: joined = '%s<%s %s>%s%s</%s>' % (part1, tag, attribute, dashes, part2, tag) return joined return text def niceboolean(value): falseness = ('', 'no', 'off', 'false', 'none', '0', 'f', 'n') return str(value).lower().strip() not in falseness _badchars_regex = re.compile('|'.join(entitydefs.values())) _been_fixed_regex = re.compile('&\w+;|&#[0-9]+;') def html_entity_fixer(text, skipchars=[], extra_careful=1): """ return a text properly html fixed """ if not text: # then don't even begin to try to do anything return text # if extra_careful we don't attempt to do anything to # the string if it might have been converted already. if extra_careful and _been_fixed_regex.findall(text): return text if isinstance(skipchars, basestring):
part.__init__(self, 'tr', style=style, attributes=attributes) self.addPart('th', content=self.text) self.addPart('td', content=self.field) # need some functions for HTML # ought to be somewhere else in Python? # cgi.escape only seems to do <, >, and & from htmlentitydefs import entitydefs import re entitydefs_inverted = {} for k, v in entitydefs.items(): entitydefs_inverted[v] = k needencoding = re.compile('|'.join(entitydefs.values())) alreadyencoded = re.compile('&\w+;|&#[0-9]+;') # encodes any special characters to their HTML equivalents def encode(text, skip=None, once_only=1): # if extra_careful, check to see if this text has already been converted if not (once_only and alreadyencoded.findall(text)): if not isinstance(skip, list): skip = [skip] # do ampersands on their own or we might end up converting our conversions if '&' not in skip: text = text.replace('&', '&') skip.append('&')
self.addPiece(self.text) self.addPiece(self.field) else: part.__init__(self, 'tr', style=style, attributes=attributes) self.addPart('th', content=self.text) self.addPart('td', content=self.field) from htmlentitydefs import entitydefs import re entitydefs_inverted = {} for k,v in entitydefs.items(): entitydefs_inverted[v] = k needencoding = re.compile('|'.join(entitydefs.values())) alreadyencoded = re.compile('&\w+;|&#[0-9]+;') #need some functions for HTML #ought to be somewhere else in Python? #cgi.escape only seems to do <, >, and & #encodes any special characters to their HTML equivalents def encode(text, skip=None, once_only=1): # if extra_careful, check to see if this text has already been converted if not (once_only and alreadyencoded.findall(text)): if not isinstance(skip, list): skip = [skip] #do ampersands on their own or we might end up converting our conversions if '&' not in skip: