def handle_wb_url(self, url): try: res = self.downloader.download(url) if not res or not res.content: logger.debug('改url:' + url + '无微博内容') return # logger.debug('wb_html:'+res.content) if 'mbloglist' in url: data = re.search('"data":"(.*?)"}', res.content, re.S) data = data.group(1) if data else '' html = data.replace('\\r', '').replace('\\n', '').replace( '\\t', '').replace('\\', '').strip() # print html else: html = re.search( 'js/pl/content/homeFeed/index.js.*?html":"(.*?)"}', res.content, re.S) if not html: return html = html.group(1).replace('\\t', '').replace('\\n', '').replace( '\\r', '').replace('\\', '') if not html: logger.debug(u'该url:' + url + u'无微博信息') return wbs = Parser.parse_wb(html, self.args['uid']) if not wbs: return self.wb_q.put_nowait(wbs) # print (self.wb_q.qsize()) except Exception, e: logger.warning('error in hand_wb_url:' + str(e) + u',url为:' + url)