def __parse_account_info(self, data, req_url): __biz = tools.get_param(req_url, "__biz") regex = 'id="nickname">(.*?)</strong>' account = tools.get_info(data, regex, fetch_one=True).strip() regex = 'profile_avatar">.*?<img src="(.*?)"' head_url = tools.get_info(data, regex, fetch_one=True) regex = 'class="profile_desc">(.*?)</p>' summary = tools.get_info(data, regex, fetch_one=True).strip() # 认证信息(关注的账号直接点击查看历史消息,无认证信息) regex = '<i class="icon_verify success">.*?</i>(.*?)</span>' verify = tools.get_info(data, regex, fetch_one=True) verify = verify.strip() if verify else "" # 二维码 regex = 'var username = "" \|\| "(.*?)";' # || 需要转译 qr_code = tools.get_info(data, regex, fetch_one=True) qr_code = "http://open.weixin.qq.com/qr/code?username="******"__biz": __biz, "account": account, "head_url": head_url, "summary": summary, "qr_code": qr_code, "verify": verify, "spider_time": tools.get_current_date(), } if account_data: data_pipeline.save_account(account_data)
def __parse_account_info(self, data, req_url): ''' @summary: --------- @param data: --------- @result: ''' __biz = tools.get_param(req_url, '__biz') regex = 'id="nickname">(.*?)</strong>' account = tools.get_info(data, regex, fetch_one=True).strip() regex = 'profile_avatar">.*?<img src="(.*?)"' head_url = tools.get_info(data, regex, fetch_one=True) regex = 'class="profile_desc">(.*?)</p>' summary = tools.get_info(data, regex, fetch_one=True).strip() # 认证信息(关注的账号直接点击查看历史消息,无认证信息) regex = '<i class="icon_verify success">.*?</i>(.*?)</span>' verify = tools.get_info(data, regex, fetch_one=True) verify = verify.strip() if verify else '' # 二维码 regex = 'var username = "" \|\| "(.*?)";' # || 需要转译 qr_code = tools.get_info(data, regex, fetch_one=True) qr_code = 'http://open.weixin.qq.com/qr/code?username='******'__biz': __biz, 'account': account, 'head_url': head_url, 'summary': summary, 'qr_code': qr_code, 'verify': verify, 'spider_time': tools.get_current_date() } if account_data: data_pipeline.save_account(account_data)