def parse(self, response): # 日志输出 logger.info("当前页面: " + self._browser.current_url) # 点击账户余额、余额宝、花呗的显示金额按钮(JS注入的方式) try: show_account_button = self._browser.find_element_by_xpath( '//*[@id="showAccountAmount"]/a[1]') show_account_button.click() time.sleep(random.uniform(0.3, 0.9)) except NoSuchElementException: pass try: show_yuerbao_button = self._browser.find_element_by_xpath( '//*[@id="showYuebaoAmount"]/a[1]') show_yuerbao_button.click() time.sleep(random.uniform(0.3, 0.9)) except NoSuchElementException: pass try: show_huabei_button = self._browser.find_element_by_xpath( '//*[@id="showHuabeiAmount"]/a[1]') show_huabei_button.click() except NoSuchElementException: pass # 等待 time.sleep(2) # 个人页面元素选择器对象 page_sel = scrapy.Selector(text=self._browser.page_source) # 构造数据 user_item = AlipayUserItem() user_item['user'] = self.username user_item['user_rest_huabei'] = \ page_sel.xpath('string(//*[@id="available-amount-container"]/span)').extract_first() user_item['user_total_huabei'] = \ page_sel.xpath('string(//*[@id="credit-amount-container"]/span)').extract_first() user_item['user_yeb_earn'] = \ page_sel.xpath('string(//a[@id="J-income-num"])').extract_first() user_item['user_yeb_rest'] = \ page_sel.xpath('string(//*[@id="J-assets-mfund-amount"]/span)').extract_first() user_item['create_time'] = str( int(time.mktime(datetime.datetime.now().timetuple())) * 1000) yield user_item # 获取cookie print(self.cookie) # 点击交易记录选项卡 # self._browser.find_element_by_xpath('//ul[@class="global-nav"]/li[@class="global-nav-item "]/a').click() # mouse_mov_and_click(x=1180, y=160) self._browser.get('https://consumeprod.alipay.com/record/advanced.htm') # 爬取账单 alipay_bill_spider = AlipayBillParser() alipay_bill_spider.crawler(cookie_dict=self.cookie)
def parse_personal(self, response): # 日志输出 logger.info("当前页面: " + self._browser.current_url) # 隐藏金额的接口 # 账户余额: account WithDynamicFont / accountWithDynamicFont # 余额宝: mfund WithDynamicFont / mfundWithDynamicFontClass # 花呗: huabei WithDynamicFont / huabeiWithDynamicFontClass ctoken = self.cookie['ctoken'] account_type = ["account", "mfund", "huabei"] account_data = [] for name in account_type: url_model = \ "https://my.alipay.com/portal/" + name + "WithDynamicFont.json" \ "?className=" + name + "WithDynamicFontClass" \ "&encrypt=true" \ "&_input_charset=utf-8" \ "&ctoken=" + ctoken + "" \ "&_output_charset=utf-8" session_get = requests.session() requests.utils.add_dict_to_cookiejar(session_get.cookies, self.cookie) html_res = session_get.get(url=url_model) account_res = \ [re.compile(r'<[^>]+>', re.S).sub('', res) for res in html_res.json()["result"].values()] account_data.append(account_res) logger.debug(account_data) # 花呗 user_item = AlipayUserItem() user_item['user'] = "******" user_item['user_account'] = account_data[0][0] user_item['user_yeb_rest'] = account_data[1][0] user_item['user_rest_huabei'] = account_data[2][0] user_item['user_total_huabei'] = account_data[2][-1] user_item['create_time'] = str(int(time.mktime(datetime.datetime.now().timetuple())) * 1000) yield user_item # 智能等待 --- 1 time.sleep(random.uniform(1, 2)) # 获取完后跳转到账单页面 self._browser.find_element_by_xpath('//ul[@class="global-nav"]/li[@class="global-nav-item "]/a').click() try: # 账单页面设置 # 下拉框a标签点击事件触发 self._browser.find_element_by_xpath('//div[@id="J-datetime-select"]/a[1]').click() # 选择下拉框的选项 self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="threeMonths"]'). \ click() ''' self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="customDate"]').click() # 起始日期和最终日期的初始化 begin_date_tag = "beginDate" end_date_tag = "endDate" # 设置起始日期 remove_start_time_read_only = "document.getElementById('" + begin_date_tag + "')." \ "removeAttribute('readonly')" self._browser.execute_script(remove_start_time_read_only) ele_begin = self._browser.find_element_by_id(begin_date_tag) ele_begin.clear() self._slow_input(ele_begin, self.begin_date) # 智能等待 --- 1 time.sleep(random.uniform(1, 2)) # 设置结束日期 remove_end_time_read_only = "document.getElementById('" + end_date_tag + "').removeAttribute('readonly')" self._browser.execute_script(remove_end_time_read_only) ele_end = self._browser.find_element_by_id(end_date_tag) ele_end.clear() self._slow_input(ele_end, self.end_date) ''' # 智能等待 --- 2 time.sleep(random.uniform(0.5, 0.9)) # 选择交易分类 self._browser.find_element_by_xpath('//div[@id="J-category-select"]/a[1]').click() # 选择交易分类项 self._bill_option_control() # 智能等待 --- 3 time.sleep(random.uniform(1, 2)) # 按钮(交易记录点击搜索) self._browser.find_element_by_id("J-set-query-form").click() logger.info("跳转到自定义时间页面....") logger.info(self._browser.current_url) # 跳转 yield scrapy.Request(url=self._browser.current_url, callback=self.parse, cookies=self.cookie) except Exception as err: print(err) print(self._browser.current_url) self._browser.close()
def parse_personal(self, response): # 日志输出 logger.info("当前页面: " + self._browser.current_url) # 隐藏金额的接口 # 账户余额: account WithDynamicFont / accountWithDynamicFont # 余额宝: mfund WithDynamicFont / mfundWithDynamicFontClass # 花呗: huabei WithDynamicFont / huabeiWithDynamicFontClass # ctoken = self.cookie['ctoken'] # account_type = ["account", "mfund", "huabei"] # account_data = [] # for name in account_type: # url_model = \ # "https://my.alipay.com/portal/" + name + "WithDynamicFont.json" \ # "?className=" + name + "WithDynamicFontClass" \ # "&encrypt=true" \ # "&_input_charset=utf-8" \ # "&ctoken=" + ctoken + "" \ # "&_output_charset=utf-8" # session_get = requests.session() # requests.utils.add_dict_to_cookiejar(session_get.cookies, self.cookie) # html_res = session_get.get(url=url_model) # account_res = \ # [re.compile(r'<[^>]+>', re.S).sub('', res) for res in html_res.json()["result"].values()] # account_data.append(account_res) # logger.debug(account_data) # # # 花呗 # user_item = AlipayUserItem() # user_item['user'] = "******" # user_item['user_account'] = account_data[0][0] # user_item['user_yeb_rest'] = account_data[1][0] # user_item['user_rest_huabei'] = account_data[2][0] # user_item['user_total_huabei'] = account_data[2][-1] # user_item['create_time'] = str(int(time.mktime(datetime.datetime.now().timetuple())) * 1000) # 点击账户余额和余额宝的显示金额按钮 show_account_button = self._browser.find_element_by_xpath( '//*[@id="showAccountAmount"]/a[1]') show_account_button.click() time.sleep(random.uniform(0.3, 0.9)) show_yuerbao_button = self._browser.find_element_by_xpath( '//*[@id="showYuebaoAmount"]/a[1]') show_yuerbao_button.click() # 隐式等待 self._browser.implicitly_wait(5) # 个人页面元素选择器对象 # print("Response: %s" % response) page_sel = scrapy.Selector(text=self._browser.page_source) # 构造数据 user_item = AlipayUserItem() user_item['user'] = "******" user_item['user_rest_huabei'] = \ page_sel.xpath('string(//*[@id="J-assets-pcredit"]/div/div/div[2]/div/p[2]/span/strong)').extract()[0] # user_item['user_total_huabei'] = \ # page_sel.xpath('string(//div[@class="amount-des"]/p[2]/strong)').extract()[0] user_item['user_yeb_earn'] = \ page_sel.xpath('string(//a[@id="J-income-num"])').extract()[0] user_item['user_yeb_rest'] = \ page_sel.xpath('string(//*[@id="J-assets-mfund-amount"]/strong)').extract()[0] user_item['create_time'] = str( int(time.mktime(datetime.datetime.now().timetuple())) * 1000) yield user_item # 智能等待 --- 1 time.sleep(random.uniform(1, 2)) # 获取完后跳转到账单页面 self._browser.find_element_by_xpath( '//ul[@class="global-nav"]/li[@class="global-nav-item "]/a').click( ) try: # 账单页面设置 # 下拉框a标签点击事件触发 self._browser.find_element_by_xpath( '//div[@id="J-datetime-select"]/a[1]').click() # 选择下拉框的选项 self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="threeMonths"]'). \ click() ''' self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="customDate"]').click() # 起始日期和最终日期的初始化 begin_date_tag = "beginDate" end_date_tag = "endDate" # 设置起始日期 remove_start_time_read_only = "document.getElementById('" + begin_date_tag + "')." \ "removeAttribute('readonly')" self._browser.execute_script(remove_start_time_read_only) ele_begin = self._browser.find_element_by_id(begin_date_tag) ele_begin.clear() self._slow_input(ele_begin, self.begin_date) # 智能等待 --- 1 time.sleep(random.uniform(1, 2)) # 设置结束日期 remove_end_time_read_only = "document.getElementById('" + end_date_tag + "').removeAttribute('readonly')" self._browser.execute_script(remove_end_time_read_only) ele_end = self._browser.find_element_by_id(end_date_tag) ele_end.clear() self._slow_input(ele_end, self.end_date) ''' # 智能等待 --- 2 time.sleep(random.uniform(0.5, 0.9)) # 选择交易分类 self._browser.find_element_by_xpath( '//div[@id="J-category-select"]/a[1]').click() # 选择交易分类项 self._bill_option_control() # 智能等待 --- 3 time.sleep(random.uniform(1, 2)) # 按钮(交易记录点击搜索) self._browser.find_element_by_id("J-set-query-form").click() logger.info("跳转到自定义时间页面....") logger.info(self._browser.current_url) # 跳转 yield scrapy.Request(url=self._browser.current_url, callback=self.parse, cookies=self.cookie) except Exception as err: print(err) print(self._browser.current_url) self._browser.close()
def parse_personal(self, response): # 日志输出 logger.info("当前页面: " + self._browser.current_url) # 点击账户余额、余额宝、花呗的显示金额按钮(JS注入的方式) # show_account_button = self._browser.find_element_by_xpath('//*[@id="showAccountAmount"]/a[1]') # show_account_button.click() # time.sleep(random.uniform(0.3, 0.9)) # show_yuerbao_button = self._browser.find_element_by_xpath('//*[@id="showYuebaoAmount"]/a[1]') # show_yuerbao_button.click() # time.sleep(random.uniform(0.3, 0.9)) # show_huabei_button = self._browser.find_element_by_xpath('//*[@id="showHuabeiAmount"]/a[1]') # show_huabei_button.click() # 点击账户余额、余额宝、花呗的显示金额按钮(DDx64.dll的方式) mouse_mov_and_click(x=580, y=380) time.sleep(random.uniform(1, 2)) mouse_mov_and_click(x=1120, y=380) time.sleep(random.uniform(1, 2)) mouse_mov_and_click(x=570, y=510) # 隐式等待 # self._browser.implicitly_wait(2) time.sleep(2) # 个人页面元素选择器对象 # print("Response: %s" % response) page_sel = scrapy.Selector(text=self._browser.page_source) # 构造数据 user_item = AlipayUserItem() user_item['user'] = self.username user_item['user_rest_huabei'] = \ page_sel.xpath('string(//*[@id="available-amount-container"]/span)').extract_first() user_item['user_total_huabei'] = \ page_sel.xpath('string(//*[@id="credit-amount-container"]/span)').extract_first() user_item['user_yeb_earn'] = \ page_sel.xpath('string(//a[@id="J-income-num"])').extract_first() user_item['user_yeb_rest'] = \ page_sel.xpath('string(//*[@id="J-assets-mfund-amount"]/span)').extract_first() user_item['create_time'] = str( int(time.mktime(datetime.datetime.now().timetuple())) * 1000) yield user_item # 智能等待 --- 1 time.sleep(random.uniform(2, 3)) # 点击交易记录选项卡 # self._browser.find_element_by_xpath('//ul[@class="global-nav"]/li[@class="global-nav-item "]/a').click() mouse_mov_and_click(x=1180, y=160) # 判断点击后的页面 if "checkSecurity" in self._browser.current_url: logger.info("当前页面: " + self._browser.current_url) # logger.info("需要验证,暂时无解决办法,跳出爬虫") is_change_page = \ WebDriverWait(self._browser, 10, 2).until_not(lambda x: "checkSecurity" not in x.current_url) logger.debug("变量is_change_page的值为: {}".format(is_change_page)) if is_change_page: try: # 下拉框a标签点击事件触发 self._browser.find_element_by_xpath( '//*[@id="J-select-range"]/a[1]').click() # 选择下拉框的选项 self._browser.find_element_by_xpath( '//ul[@class="ui-select-content"]/li[@data-value="threeMonths"]'). \ click() ''' self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="customDate"]').click() # 起始日期和最终日期的初始化 begin_date_tag = "beginDate" end_date_tag = "endDate" # 设置起始日期 remove_start_time_read_only = "document.getElementById('" + begin_date_tag + "')." \ "removeAttribute('readonly')" self._browser.execute_script(remove_start_time_read_only) ele_begin = self._browser.find_element_by_id(begin_date_tag) ele_begin.clear() self._slow_input(ele_begin, self.begin_date) # 智能等待 --- 1 time.sleep(random.uniform(1, 2)) # 设置结束日期 remove_end_time_read_only = "document.getElementById('" + end_date_tag + "').removeAttribute('readonly')" self._browser.execute_script(remove_end_time_read_only) ele_end = self._browser.find_element_by_id(end_date_tag) ele_end.clear() self._slow_input(ele_end, self.end_date) ''' # 智能等待 --- 2 time.sleep(random.uniform(0.5, 0.9)) # 选择交易分类 self._browser.find_element_by_xpath( '//div[@id="J-category-select"]/a[1]').click() # 选择交易分类项 self._bill_option_control() # 智能等待 --- 3 time.sleep(random.uniform(1, 2)) # 按钮(交易记录点击搜索) self._browser.find_element_by_id( "J-set-query-form").click() logger.info("跳转到自定义时间页面....") logger.info(self._browser.current_url) # 跳转 yield scrapy.Request(url=self._browser.current_url, callback=self.parse, cookies=self.cookie) except Exception as err: logger.error(err) logger.debug(self._browser.current_url) # self._browser.close() else: # 跳转 yield scrapy.Request(url=self._browser.current_url, callback=self.parse, cookies=self.cookie) else: try: # 下拉框a标签点击事件触发 self._browser.find_element_by_xpath( '//*[@id="J-datetime-select"]/a[1]').click() # 选择下拉框的选项 self._browser.find_element_by_xpath( '//ul[@class="ui-select-content"]/li[@data-value="threeMonths"]'). \ click() ''' self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="customDate"]').click() # 起始日期和最终日期的初始化 begin_date_tag = "beginDate" end_date_tag = "endDate" # 设置起始日期 remove_start_time_read_only = "document.getElementById('" + begin_date_tag + "')." \ "removeAttribute('readonly')" self._browser.execute_script(remove_start_time_read_only) ele_begin = self._browser.find_element_by_id(begin_date_tag) ele_begin.clear() self._slow_input(ele_begin, self.begin_date) # 智能等待 --- 1 time.sleep(random.uniform(1, 2)) # 设置结束日期 remove_end_time_read_only = "document.getElementById('" + end_date_tag + "').removeAttribute('readonly')" self._browser.execute_script(remove_end_time_read_only) ele_end = self._browser.find_element_by_id(end_date_tag) ele_end.clear() self._slow_input(ele_end, self.end_date) ''' # 智能等待 --- 2 time.sleep(random.uniform(0.5, 0.9)) # 选择交易分类 self._browser.find_element_by_xpath( '//div[@id="J-category-select"]/a[1]').click() # 选择交易分类项 self._bill_option_control() # 智能等待 --- 3 time.sleep(random.uniform(1, 2)) # 按钮(交易记录点击搜索) self._browser.find_element_by_id("J-set-query-form").click() logger.info("跳转到自定义时间页面....") logger.info(self._browser.current_url) # 跳转 yield scrapy.Request(url=self._browser.current_url, callback=self.parse, cookies=self.cookie) except Exception as err: logger.error(err) logger.debug(self._browser.current_url)
def parse_personal(self, response): # 日志输出 logger.info("当前页面: " + self._browser.current_url) # 个人页面元素选择器对象 page_sel = scrapy.Selector(response) # 花呗 user_item = AlipayUserItem() user_item['user'] = self.username user_item['user_rest_huabei'] = \ page_sel.xpath('string(//div[@class="amount-des"]/p[1]/span[@class="highlight"]/strong)').extract()[0] user_item['user_total_huabei'] = \ page_sel.xpath('string(//div[@class="amount-des"]/p[2]/strong)').extract()[0] user_item['user_yeb_rest'] = \ page_sel.xpath('string(//p[@class="i-assets-mFund-amount"]/strong)').extract()[0] user_item['user_yeb_earn'] = \ page_sel.xpath('string(//a[@id="J-income-num"])').extract()[0] user_item['create_time'] = str(int(time.mktime(datetime.datetime.now().timetuple())) * 1000) yield user_item # 智能等待 --- 1 time.sleep(random.uniform(0.2, 0.9)) # 获取完后跳转到账单页面 self._browser.find_element_by_xpath('//ul[@class="global-nav"]/li[@class="global-nav-item "]/a').click() # 账单页面设置 # 下拉框a标签点击事件触发 self._browser.find_element_by_xpath('//div[@id="J-datetime-select"]/a[1]').click() # 选择下拉框的选项 self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="threeMonths"]').click() ''' self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="customDate"]').click() # 起始日期和最终日期的初始化 begin_date_tag = "beginDate" end_date_tag = "endDate" # 设置起始日期 remove_start_time_read_only = "document.getElementById('" + begin_date_tag + "')." \ "removeAttribute('readonly')" self._browser.execute_script(remove_start_time_read_only) ele_begin = self._browser.find_element_by_id(begin_date_tag) ele_begin.clear() self._slow_input(ele_begin, self.begin_date) # 智能等待 --- 1 time.sleep(random.uniform(1, 2)) # 设置结束日期 remove_end_time_read_only = "document.getElementById('" + end_date_tag + "').removeAttribute('readonly')" self._browser.execute_script(remove_end_time_read_only) ele_end = self._browser.find_element_by_id(end_date_tag) ele_end.clear() self._slow_input(ele_end, self.end_date) ''' # 智能等待 --- 2 time.sleep(random.uniform(0.5, 0.9)) # 选择交易分类 self._browser.find_element_by_xpath('//div[@id="J-category-select"]/a[1]').click() # 选择交易分类项 self._bill_option_control() # 智能等待 --- 3 time.sleep(random.uniform(1, 2)) # 按钮(交易记录点击搜索) self._browser.find_element_by_id("J-set-query-form").click() logger.info("跳转到自定义时间页面....") logger.info(self._browser.current_url) # 跳转 yield scrapy.Request(url=self._browser.current_url, callback=self.parse, cookies=self.cookie)