def yan(browser, task_id, tag): tag.click() time.sleep(3) try: vec_servpasswd = browser.find_element_by_id('vec_servpasswd') except: vec_servpasswd = '' if vec_servpasswd: vec_servpasswd = browser.find_element_by_id('vec_servpasswd') data = get_info(task_id) serverpwd = data['serverpwd'] vec_servpasswd.send_keys(serverpwd) # 输入服务密码 stc_send_sms = browser.find_element_by_id('stc-send-sms') stc_send_sms.click() time.sleep(0.8) alert = browser.switch_to.alert # 页面弹窗按确定 alert.accept() log.crawler.info('获取随机码') data['status_code'] = 2003 data_to_redis(task_id, data) vec_smspasswd = browser.find_element_by_id('vec_smspasswd') smspwd = get_smspwd(task_id) # 输入手机验证码 if smspwd: vec_smspasswd.send_keys(smspwd) else: time.sleep(20) # 第二次获取随机码 stc_send_sms.click() time.sleep(2) alert = browser.switch_to.alert # 页面弹窗按确定 alert.accept() log.crawler.info('第二次获取随机码') data['status_code'] = 2003 data_to_redis(task_id, data) smspwd = get_smspwd(task_id) vec_smspasswd.send_keys(smspwd) # 输入手机验证码 get_cap(browser) WebDriverWait(browser, 3).until( EC.presence_of_element_located((By.ID, "vecbtn"))) vecbtn = browser.find_element_by_id('vecbtn') vecbtn.click() time.sleep(1) data['smspwd'] = '' data_to_redis(task_id, data) ms = browser.find_elements_by_xpath('//*[@id="detailerrmsg"]') time.sleep(1) if ms: a = ms[0].text if a == '随机密码错误!': log.crawler.info('随机密码错误!') data['status_code'] = 2006 data_to_redis(task_id, data) else: log.crawler.info('服务密码错误') data['status_code'] = 1003 data_to_redis(task_id, data) cut = browser.find_elements_by_xpath( '//*[@id="undefined"]/span')[0] cut.click() yan(browser, task_id, tag)
def index(): phone_num = request.form.get('phone_num') serverpwd = request.form.get('serverpwd') task_id = request.form.get('task_id') os.makedirs('D:\work_space\ChinaMobile\html_source\{}'.format(str(task_id))) data = { 'task_id': task_id, 'phone_num': phone_num, 'serverpwd': serverpwd, 'smspwd': '', 'status_code': 1 } data_to_redis(task_id, data) start_spider(phone_num, task_id) # data = get_info(task_id) # data['status_code'] = #'页面获取完成,开始解析页面' # data_to_redis(task_id, data) log.crawler.info('页面获取完成,开始解析页面') all_data = last_data(task_id, phone_num) data = get_info(task_id) data['status_code'] = 3001 data_to_redis(task_id, data) d = { 'taskid': task_id, 'result': all_data } save_data_in_mongo(d, "chinamobile") return jsonify(all_data)
def get_sms(): smspwd = request.form.get('smspwd') task_id = request.form.get('task_id') data = get_info(task_id) data['smspwd'] = smspwd data['status_code'] = 2004 data_to_redis(task_id, data) return '短信验证码保存成功'
def get_plan_amt(browser, task_id): for i in [1, 2, 4]: title = browser.find_elements_by_xpath( '//*[@id="switch-data"]/li[{}]/p'.format(i))[0] yan(browser, task_id, title) for j in range(1, 7): a = 'month' + str(j) month = browser.find_elements_by_xpath( '//*[@id="{}"]'.format(a))[0] yan(browser, task_id, month) data = get_info(task_id) data['status_code'] = 3000 data_to_redis(task_id, data) time.sleep(1) source = browser.page_source title_list = ['套餐及固定费', '通话详单', '上网详单', '短信详单'] file = str(task_id) + '\\' + title_list[i - 1] + a + '_1' + '.html' file_name = os.path.join(filedir, file) save_source(file_name, source) b = browser.find_elements_by_xpath('//*[@id="notes1"]') if b: c = b[0].text # a = '第1/1页' pages = c.split('/')[1].split('页')[0] if int(pages) > 1: for k in range(2, int(pages) + 1): time.sleep(1) next = browser.find_element_by_class_name('next') yan(browser, task_id, next) data = get_info(task_id) data['status_code'] = 3000 data_to_redis(task_id, data) time.sleep(1) source = browser.page_source file = str(task_id) + '\\' + title_list[ i - 1] + a + '_' + str(k) + '.html' file_name = os.path.join(filedir, file) save_source(file_name, source) else: pass else: pass
def get_smspwd(task_id): t1 = time.time() while True: data = get_info(task_id) smspwd = data['smspwd'] if smspwd: return smspwd else: t2 = time.time() if int(t2 - t1) > 50: return ''
def start_spider(phone_num, task_id): browser = get_browser() login_user_cap(browser, phone_num, task_id) # 登录 get_detail_data(browser, task_id) # 详单验证 data = get_info(task_id) data['status_code'] = 3000 data_to_redis(task_id, data) get_plan_amt(browser, task_id) # 详细账单 get_user_info(browser, phone_num, task_id) # 个人信息 get_user_gprs(browser, phone_num, task_id) # 流量 get_user_count(browser, phone_num, task_id) # 缴费记录 get_user_bill(browser, phone_num, task_id) # 账单
def get_detail_data(browser, task_id): time.sleep(7) detiail_tag = browser.find_elements_by_xpath( '//*[@id="stcnavmenu"]/ul[2]/li/ul/li[4]/a')[0] detiail_tag.click() WebDriverWait(browser, 3).until(EC.presence_of_element_located((By.ID, "month1"))) li = browser.find_elements_by_xpath('//*[@id="month1"]')[0] # WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.ID, "vec_servpasswd"))) log.crawler.info('详情页验证码') yan(browser, task_id, li) data = get_info(task_id) data['status_code'] = 2000 data_to_redis(task_id, data) log.crawler.info('鉴权成功')
def login_user_cap(browser, phone_num, task_id): url = 'https://login.10086.cn/login.html' browser.get(url=url) browser.maximize_window() # 窗口最大化 WebDriverWait(browser, 3).until( EC.presence_of_element_located((By.ID, "sms_login_1"))) sms_login_1 = browser.find_element_by_id('sms_login_1') sms_login_1.click() WebDriverWait(browser, 3).until(EC.presence_of_element_located((By.ID, "sms_name"))) sms_name = browser.find_element_by_id('sms_name') sms_name.send_keys(phone_num) getSMSPwd1 = browser.find_element_by_id('getSMSPwd1') getSMSPwd1.click() smsphone_err = browser.find_element_by_id('smsphone_err') data = get_info(task_id) if smsphone_err.is_displayed(): log.crawler.info('手机号有误') data['status_code'] = 1002 data_to_redis(task_id, data) browser.quit() WebDriverWait(browser, 3).until(EC.presence_of_element_located( (By.ID, "sms_pwd_l"))) sms_pwd_l = browser.find_element_by_id('sms_pwd_l') data['status_code'] = 2003 data_to_redis(task_id, data) smspwd = get_smspwd(task_id) sms_pwd_l.send_keys(smspwd) # 输入手机验证码 submit_bt = browser.find_element_by_id('submit_bt') smspwd_err = browser.find_element_by_id('smspwd_err') submit_bt.click() time.sleep(0.2) if smspwd_err.is_displayed(): log.crawler.info('请输入正确短信随机码') data['status_code'] = 2006 data_to_redis(task_id, data) time.sleep(2) log.crawler.info('登陆成功') data['status_code'] = 1004 data['smspwd'] = '' data_to_redis(task_id, data) time.sleep(1) source = browser.page_source file = str(task_id) + '\\' + 'home.html' file_name = os.path.join(filedir, file) save_source(file_name, source)
def get_smspwd(task_id): while True: data = get_info(task_id) smspwd = data['smspwd'] if smspwd: return smspwd