def Parse_images_url(content): try: content = gbtools.stringQ2B(encoding(content)) img = re.compile(r"""<img\s.*?\s?src\s*=\s*['|"]?([^\s'"]+).*?>""", re.I) m = img.findall(content) return m except Exception, data: logging.info('the error of Parse_images is %s', data) return None
def decoding(s): if isinstance(s, unicode): return s else: ce = '' cl = ['utf-8', 'gb2312', 'GB18030'] s = gbtools.stringQ2B(s) result = False for a in cl: try: ce = s.decode(a) result = True break except UnicodeDecodeError, data: result = False pass if result: return ce else: return s