def innerHTML(self): """ charset is the desired output charset """ result = u'' for c in self.children: result += smart_unicode(c, encoding = self.charset[0]) return result
def innerHTML(self): """ charset is the desired output charset """ result = u'' for c in self.children: result += smart_unicode(c, encoding=self.charset[0]) return result
def url_unquote(string): def decoder(x): return x.group(1).decode("hex") ## we can use this to handle arbitrary levels of quoting try: string = re.sub("%(..)", decoder, string) except: pass ## references seem to _always_ be encoded using utf8 - even if the ## page is encoded using a different charset??? This whole quoting ## thing is very confusing. return smart_unicode(string, 'utf8')
def __getitem__(self, item): return smart_unicode(self.attributes[item.lower()], self.charset[0],'ignore')
def __unicode__(self): return smart_unicode(self.__str__(), encoding = self.charset[0])
def __unicode__(self): return smart_unicode(self.__str__(), encoding=self.charset[0])
def __getitem__(self, item): return smart_unicode(self.attributes[item.lower()], self.charset[0], 'ignore')