コード例 #1
0
    def parse(self, response):
        # 日志输出
        logger.info("当前页面: " + self._browser.current_url)

        # 点击账户余额、余额宝、花呗的显示金额按钮(JS注入的方式)
        try:
            show_account_button = self._browser.find_element_by_xpath(
                '//*[@id="showAccountAmount"]/a[1]')
            show_account_button.click()
            time.sleep(random.uniform(0.3, 0.9))
        except NoSuchElementException:
            pass
        try:
            show_yuerbao_button = self._browser.find_element_by_xpath(
                '//*[@id="showYuebaoAmount"]/a[1]')
            show_yuerbao_button.click()
            time.sleep(random.uniform(0.3, 0.9))
        except NoSuchElementException:
            pass
        try:
            show_huabei_button = self._browser.find_element_by_xpath(
                '//*[@id="showHuabeiAmount"]/a[1]')
            show_huabei_button.click()
        except NoSuchElementException:
            pass

        # 等待
        time.sleep(2)

        # 个人页面元素选择器对象
        page_sel = scrapy.Selector(text=self._browser.page_source)

        # 构造数据
        user_item = AlipayUserItem()
        user_item['user'] = self.username
        user_item['user_rest_huabei'] = \
            page_sel.xpath('string(//*[@id="available-amount-container"]/span)').extract_first()
        user_item['user_total_huabei'] = \
            page_sel.xpath('string(//*[@id="credit-amount-container"]/span)').extract_first()
        user_item['user_yeb_earn'] = \
            page_sel.xpath('string(//a[@id="J-income-num"])').extract_first()
        user_item['user_yeb_rest'] = \
            page_sel.xpath('string(//*[@id="J-assets-mfund-amount"]/span)').extract_first()
        user_item['create_time'] = str(
            int(time.mktime(datetime.datetime.now().timetuple())) * 1000)
        yield user_item

        # 获取cookie
        print(self.cookie)

        # 点击交易记录选项卡
        # self._browser.find_element_by_xpath('//ul[@class="global-nav"]/li[@class="global-nav-item "]/a').click()
        # mouse_mov_and_click(x=1180, y=160)

        self._browser.get('https://consumeprod.alipay.com/record/advanced.htm')

        # 爬取账单
        alipay_bill_spider = AlipayBillParser()
        alipay_bill_spider.crawler(cookie_dict=self.cookie)
コード例 #2
0
    def parse_personal(self, response):
        # 日志输出
        logger.info("当前页面: " + self._browser.current_url)

        # 隐藏金额的接口
        # 账户余额: account WithDynamicFont / accountWithDynamicFont
        # 余额宝: mfund WithDynamicFont / mfundWithDynamicFontClass
        # 花呗: huabei WithDynamicFont / huabeiWithDynamicFontClass
        ctoken = self.cookie['ctoken']
        account_type = ["account", "mfund", "huabei"]
        account_data = []
        for name in account_type:
            url_model = \
                "https://my.alipay.com/portal/" + name + "WithDynamicFont.json" \
                                                         "?className=" + name + "WithDynamicFontClass" \
                                                                                "&encrypt=true" \
                                                                                "&_input_charset=utf-8" \
                                                                                "&ctoken=" + ctoken + "" \
                                                                                "&_output_charset=utf-8"
            session_get = requests.session()
            requests.utils.add_dict_to_cookiejar(session_get.cookies, self.cookie)
            html_res = session_get.get(url=url_model)
            account_res = \
                [re.compile(r'<[^>]+>', re.S).sub('', res) for res in html_res.json()["result"].values()]
            account_data.append(account_res)
        logger.debug(account_data)

        # 花呗
        user_item = AlipayUserItem()
        user_item['user'] = "******"
        user_item['user_account'] = account_data[0][0]
        user_item['user_yeb_rest'] = account_data[1][0]
        user_item['user_rest_huabei'] = account_data[2][0]
        user_item['user_total_huabei'] = account_data[2][-1]
        user_item['create_time'] = str(int(time.mktime(datetime.datetime.now().timetuple())) * 1000)
        yield user_item

        # 智能等待 --- 1
        time.sleep(random.uniform(1, 2))

        # 获取完后跳转到账单页面
        self._browser.find_element_by_xpath('//ul[@class="global-nav"]/li[@class="global-nav-item "]/a').click()

        try:
            # 账单页面设置
            # 下拉框a标签点击事件触发
            self._browser.find_element_by_xpath('//div[@id="J-datetime-select"]/a[1]').click()

            # 选择下拉框的选项
            self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="threeMonths"]'). \
                click()

            '''
            self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="customDate"]').click()

            # 起始日期和最终日期的初始化
            begin_date_tag = "beginDate"
            end_date_tag = "endDate"

            # 设置起始日期
            remove_start_time_read_only = "document.getElementById('" + begin_date_tag + "')." \
                                                                                         "removeAttribute('readonly')"
            self._browser.execute_script(remove_start_time_read_only)
            ele_begin = self._browser.find_element_by_id(begin_date_tag)
            ele_begin.clear()
            self._slow_input(ele_begin, self.begin_date)

            # 智能等待 --- 1
            time.sleep(random.uniform(1, 2))

            # 设置结束日期
            remove_end_time_read_only = "document.getElementById('" + end_date_tag + "').removeAttribute('readonly')"
            self._browser.execute_script(remove_end_time_read_only)
            ele_end = self._browser.find_element_by_id(end_date_tag)
            ele_end.clear()
            self._slow_input(ele_end, self.end_date)
            '''

            # 智能等待 --- 2
            time.sleep(random.uniform(0.5, 0.9))

            # 选择交易分类
            self._browser.find_element_by_xpath('//div[@id="J-category-select"]/a[1]').click()

            # 选择交易分类项
            self._bill_option_control()

            # 智能等待 --- 3
            time.sleep(random.uniform(1, 2))

            # 按钮(交易记录点击搜索)
            self._browser.find_element_by_id("J-set-query-form").click()
            logger.info("跳转到自定义时间页面....")
            logger.info(self._browser.current_url)

            # 跳转
            yield scrapy.Request(url=self._browser.current_url,
                                 callback=self.parse,
                                 cookies=self.cookie)
        except Exception as err:
            print(err)
            print(self._browser.current_url)
            self._browser.close()
コード例 #3
0
    def parse_personal(self, response):
        # 日志输出
        logger.info("当前页面: " + self._browser.current_url)

        # 隐藏金额的接口
        # 账户余额: account WithDynamicFont / accountWithDynamicFont
        # 余额宝: mfund WithDynamicFont / mfundWithDynamicFontClass
        # 花呗: huabei WithDynamicFont / huabeiWithDynamicFontClass
        # ctoken = self.cookie['ctoken']
        # account_type = ["account", "mfund", "huabei"]
        # account_data = []
        # for name in account_type:
        #     url_model = \
        #         "https://my.alipay.com/portal/" + name + "WithDynamicFont.json" \
        #                                                  "?className=" + name + "WithDynamicFontClass" \
        #                                                                         "&encrypt=true" \
        #                                                                         "&_input_charset=utf-8" \
        #                                                                         "&ctoken=" + ctoken + "" \
        #                                                                         "&_output_charset=utf-8"
        #     session_get = requests.session()
        #     requests.utils.add_dict_to_cookiejar(session_get.cookies, self.cookie)
        #     html_res = session_get.get(url=url_model)
        #     account_res = \
        #         [re.compile(r'<[^>]+>', re.S).sub('', res) for res in html_res.json()["result"].values()]
        #     account_data.append(account_res)
        # logger.debug(account_data)
        #
        # # 花呗
        # user_item = AlipayUserItem()
        # user_item['user'] = "******"
        # user_item['user_account'] = account_data[0][0]
        # user_item['user_yeb_rest'] = account_data[1][0]
        # user_item['user_rest_huabei'] = account_data[2][0]
        # user_item['user_total_huabei'] = account_data[2][-1]
        # user_item['create_time'] = str(int(time.mktime(datetime.datetime.now().timetuple())) * 1000)
        # 点击账户余额和余额宝的显示金额按钮
        show_account_button = self._browser.find_element_by_xpath(
            '//*[@id="showAccountAmount"]/a[1]')
        show_account_button.click()
        time.sleep(random.uniform(0.3, 0.9))
        show_yuerbao_button = self._browser.find_element_by_xpath(
            '//*[@id="showYuebaoAmount"]/a[1]')
        show_yuerbao_button.click()

        # 隐式等待
        self._browser.implicitly_wait(5)

        # 个人页面元素选择器对象
        # print("Response: %s" % response)
        page_sel = scrapy.Selector(text=self._browser.page_source)

        # 构造数据
        user_item = AlipayUserItem()
        user_item['user'] = "******"
        user_item['user_rest_huabei'] = \
            page_sel.xpath('string(//*[@id="J-assets-pcredit"]/div/div/div[2]/div/p[2]/span/strong)').extract()[0]
        # user_item['user_total_huabei'] = \
        #     page_sel.xpath('string(//div[@class="amount-des"]/p[2]/strong)').extract()[0]
        user_item['user_yeb_earn'] = \
            page_sel.xpath('string(//a[@id="J-income-num"])').extract()[0]
        user_item['user_yeb_rest'] = \
            page_sel.xpath('string(//*[@id="J-assets-mfund-amount"]/strong)').extract()[0]
        user_item['create_time'] = str(
            int(time.mktime(datetime.datetime.now().timetuple())) * 1000)
        yield user_item

        # 智能等待 --- 1
        time.sleep(random.uniform(1, 2))

        # 获取完后跳转到账单页面
        self._browser.find_element_by_xpath(
            '//ul[@class="global-nav"]/li[@class="global-nav-item "]/a').click(
            )

        try:
            # 账单页面设置
            # 下拉框a标签点击事件触发
            self._browser.find_element_by_xpath(
                '//div[@id="J-datetime-select"]/a[1]').click()

            # 选择下拉框的选项
            self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="threeMonths"]'). \
                click()
            '''
            self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="customDate"]').click()

            # 起始日期和最终日期的初始化
            begin_date_tag = "beginDate"
            end_date_tag = "endDate"

            # 设置起始日期
            remove_start_time_read_only = "document.getElementById('" + begin_date_tag + "')." \
                                                                                         "removeAttribute('readonly')"
            self._browser.execute_script(remove_start_time_read_only)
            ele_begin = self._browser.find_element_by_id(begin_date_tag)
            ele_begin.clear()
            self._slow_input(ele_begin, self.begin_date)

            # 智能等待 --- 1
            time.sleep(random.uniform(1, 2))

            # 设置结束日期
            remove_end_time_read_only = "document.getElementById('" + end_date_tag + "').removeAttribute('readonly')"
            self._browser.execute_script(remove_end_time_read_only)
            ele_end = self._browser.find_element_by_id(end_date_tag)
            ele_end.clear()
            self._slow_input(ele_end, self.end_date)
            '''

            # 智能等待 --- 2
            time.sleep(random.uniform(0.5, 0.9))

            # 选择交易分类
            self._browser.find_element_by_xpath(
                '//div[@id="J-category-select"]/a[1]').click()

            # 选择交易分类项
            self._bill_option_control()

            # 智能等待 --- 3
            time.sleep(random.uniform(1, 2))

            # 按钮(交易记录点击搜索)
            self._browser.find_element_by_id("J-set-query-form").click()
            logger.info("跳转到自定义时间页面....")
            logger.info(self._browser.current_url)

            # 跳转
            yield scrapy.Request(url=self._browser.current_url,
                                 callback=self.parse,
                                 cookies=self.cookie)
        except Exception as err:
            print(err)
            print(self._browser.current_url)
            self._browser.close()
コード例 #4
0
    def parse_personal(self, response):
        # 日志输出
        logger.info("当前页面: " + self._browser.current_url)

        # 点击账户余额、余额宝、花呗的显示金额按钮(JS注入的方式)
        # show_account_button = self._browser.find_element_by_xpath('//*[@id="showAccountAmount"]/a[1]')
        # show_account_button.click()
        # time.sleep(random.uniform(0.3, 0.9))
        # show_yuerbao_button = self._browser.find_element_by_xpath('//*[@id="showYuebaoAmount"]/a[1]')
        # show_yuerbao_button.click()
        # time.sleep(random.uniform(0.3, 0.9))
        # show_huabei_button = self._browser.find_element_by_xpath('//*[@id="showHuabeiAmount"]/a[1]')
        # show_huabei_button.click()

        # 点击账户余额、余额宝、花呗的显示金额按钮(DDx64.dll的方式)
        mouse_mov_and_click(x=580, y=380)
        time.sleep(random.uniform(1, 2))
        mouse_mov_and_click(x=1120, y=380)
        time.sleep(random.uniform(1, 2))
        mouse_mov_and_click(x=570, y=510)

        # 隐式等待
        # self._browser.implicitly_wait(2)
        time.sleep(2)

        # 个人页面元素选择器对象
        # print("Response: %s" % response)
        page_sel = scrapy.Selector(text=self._browser.page_source)

        # 构造数据
        user_item = AlipayUserItem()
        user_item['user'] = self.username
        user_item['user_rest_huabei'] = \
            page_sel.xpath('string(//*[@id="available-amount-container"]/span)').extract_first()
        user_item['user_total_huabei'] = \
            page_sel.xpath('string(//*[@id="credit-amount-container"]/span)').extract_first()
        user_item['user_yeb_earn'] = \
            page_sel.xpath('string(//a[@id="J-income-num"])').extract_first()
        user_item['user_yeb_rest'] = \
            page_sel.xpath('string(//*[@id="J-assets-mfund-amount"]/span)').extract_first()
        user_item['create_time'] = str(
            int(time.mktime(datetime.datetime.now().timetuple())) * 1000)
        yield user_item

        # 智能等待 --- 1
        time.sleep(random.uniform(2, 3))

        # 点击交易记录选项卡
        # self._browser.find_element_by_xpath('//ul[@class="global-nav"]/li[@class="global-nav-item "]/a').click()
        mouse_mov_and_click(x=1180, y=160)

        # 判断点击后的页面
        if "checkSecurity" in self._browser.current_url:
            logger.info("当前页面: " + self._browser.current_url)
            # logger.info("需要验证,暂时无解决办法,跳出爬虫")
            is_change_page = \
                WebDriverWait(self._browser, 10, 2).until_not(lambda x: "checkSecurity" not in x.current_url)
            logger.debug("变量is_change_page的值为: {}".format(is_change_page))
            if is_change_page:
                try:
                    # 下拉框a标签点击事件触发
                    self._browser.find_element_by_xpath(
                        '//*[@id="J-select-range"]/a[1]').click()

                    # 选择下拉框的选项
                    self._browser.find_element_by_xpath(
                        '//ul[@class="ui-select-content"]/li[@data-value="threeMonths"]'). \
                        click()
                    '''
                    self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="customDate"]').click()

                    # 起始日期和最终日期的初始化
                    begin_date_tag = "beginDate"
                    end_date_tag = "endDate"

                    # 设置起始日期
                    remove_start_time_read_only = "document.getElementById('" + begin_date_tag + "')." \
                                                                                                 "removeAttribute('readonly')"
                    self._browser.execute_script(remove_start_time_read_only)
                    ele_begin = self._browser.find_element_by_id(begin_date_tag)
                    ele_begin.clear()
                    self._slow_input(ele_begin, self.begin_date)

                    # 智能等待 --- 1
                    time.sleep(random.uniform(1, 2))

                    # 设置结束日期
                    remove_end_time_read_only = "document.getElementById('" + end_date_tag + "').removeAttribute('readonly')"
                    self._browser.execute_script(remove_end_time_read_only)
                    ele_end = self._browser.find_element_by_id(end_date_tag)
                    ele_end.clear()
                    self._slow_input(ele_end, self.end_date)
                    '''

                    # 智能等待 --- 2
                    time.sleep(random.uniform(0.5, 0.9))

                    # 选择交易分类
                    self._browser.find_element_by_xpath(
                        '//div[@id="J-category-select"]/a[1]').click()

                    # 选择交易分类项
                    self._bill_option_control()

                    # 智能等待 --- 3
                    time.sleep(random.uniform(1, 2))

                    # 按钮(交易记录点击搜索)
                    self._browser.find_element_by_id(
                        "J-set-query-form").click()
                    logger.info("跳转到自定义时间页面....")
                    logger.info(self._browser.current_url)

                    # 跳转
                    yield scrapy.Request(url=self._browser.current_url,
                                         callback=self.parse,
                                         cookies=self.cookie)
                except Exception as err:
                    logger.error(err)
                    logger.debug(self._browser.current_url)
                    # self._browser.close()
            else:
                # 跳转
                yield scrapy.Request(url=self._browser.current_url,
                                     callback=self.parse,
                                     cookies=self.cookie)
        else:
            try:
                # 下拉框a标签点击事件触发
                self._browser.find_element_by_xpath(
                    '//*[@id="J-datetime-select"]/a[1]').click()

                # 选择下拉框的选项
                self._browser.find_element_by_xpath(
                    '//ul[@class="ui-select-content"]/li[@data-value="threeMonths"]'). \
                    click()
                '''
                self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="customDate"]').click()

                # 起始日期和最终日期的初始化
                begin_date_tag = "beginDate"
                end_date_tag = "endDate"

                # 设置起始日期
                remove_start_time_read_only = "document.getElementById('" + begin_date_tag + "')." \
                                                                                             "removeAttribute('readonly')"
                self._browser.execute_script(remove_start_time_read_only)
                ele_begin = self._browser.find_element_by_id(begin_date_tag)
                ele_begin.clear()
                self._slow_input(ele_begin, self.begin_date)

                # 智能等待 --- 1
                time.sleep(random.uniform(1, 2))

                # 设置结束日期
                remove_end_time_read_only = "document.getElementById('" + end_date_tag + "').removeAttribute('readonly')"
                self._browser.execute_script(remove_end_time_read_only)
                ele_end = self._browser.find_element_by_id(end_date_tag)
                ele_end.clear()
                self._slow_input(ele_end, self.end_date)
                '''

                # 智能等待 --- 2
                time.sleep(random.uniform(0.5, 0.9))

                # 选择交易分类
                self._browser.find_element_by_xpath(
                    '//div[@id="J-category-select"]/a[1]').click()

                # 选择交易分类项
                self._bill_option_control()

                # 智能等待 --- 3
                time.sleep(random.uniform(1, 2))

                # 按钮(交易记录点击搜索)
                self._browser.find_element_by_id("J-set-query-form").click()
                logger.info("跳转到自定义时间页面....")
                logger.info(self._browser.current_url)

                # 跳转
                yield scrapy.Request(url=self._browser.current_url,
                                     callback=self.parse,
                                     cookies=self.cookie)
            except Exception as err:
                logger.error(err)
                logger.debug(self._browser.current_url)
コード例 #5
0
    def parse_personal(self, response):
        # 日志输出
        logger.info("当前页面: " + self._browser.current_url)

        # 个人页面元素选择器对象
        page_sel = scrapy.Selector(response)

        # 花呗
        user_item = AlipayUserItem()
        user_item['user'] = self.username
        user_item['user_rest_huabei'] = \
            page_sel.xpath('string(//div[@class="amount-des"]/p[1]/span[@class="highlight"]/strong)').extract()[0]
        user_item['user_total_huabei'] = \
            page_sel.xpath('string(//div[@class="amount-des"]/p[2]/strong)').extract()[0]
        user_item['user_yeb_rest'] = \
            page_sel.xpath('string(//p[@class="i-assets-mFund-amount"]/strong)').extract()[0]
        user_item['user_yeb_earn'] = \
            page_sel.xpath('string(//a[@id="J-income-num"])').extract()[0]
        user_item['create_time'] = str(int(time.mktime(datetime.datetime.now().timetuple())) * 1000)
        yield user_item

        # 智能等待 --- 1
        time.sleep(random.uniform(0.2, 0.9))

        # 获取完后跳转到账单页面
        self._browser.find_element_by_xpath('//ul[@class="global-nav"]/li[@class="global-nav-item "]/a').click()

        # 账单页面设置
        # 下拉框a标签点击事件触发
        self._browser.find_element_by_xpath('//div[@id="J-datetime-select"]/a[1]').click()

        # 选择下拉框的选项
        self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="threeMonths"]').click()
        '''
        self._browser.find_element_by_xpath('//ul[@class="ui-select-content"]/li[@data-value="customDate"]').click()

        # 起始日期和最终日期的初始化
        begin_date_tag = "beginDate"
        end_date_tag = "endDate"

        # 设置起始日期
        remove_start_time_read_only = "document.getElementById('" + begin_date_tag + "')." \
                                                                                     "removeAttribute('readonly')"
        self._browser.execute_script(remove_start_time_read_only)
        ele_begin = self._browser.find_element_by_id(begin_date_tag)
        ele_begin.clear()
        self._slow_input(ele_begin, self.begin_date)

        # 智能等待 --- 1
        time.sleep(random.uniform(1, 2))

        # 设置结束日期
        remove_end_time_read_only = "document.getElementById('" + end_date_tag + "').removeAttribute('readonly')"
        self._browser.execute_script(remove_end_time_read_only)
        ele_end = self._browser.find_element_by_id(end_date_tag)
        ele_end.clear()
        self._slow_input(ele_end, self.end_date)
        '''

        # 智能等待 --- 2
        time.sleep(random.uniform(0.5, 0.9))

        # 选择交易分类
        self._browser.find_element_by_xpath('//div[@id="J-category-select"]/a[1]').click()

        # 选择交易分类项
        self._bill_option_control()

        # 智能等待 --- 3
        time.sleep(random.uniform(1, 2))

        # 按钮(交易记录点击搜索)
        self._browser.find_element_by_id("J-set-query-form").click()
        logger.info("跳转到自定义时间页面....")
        logger.info(self._browser.current_url)

        # 跳转
        yield scrapy.Request(url=self._browser.current_url,
                             callback=self.parse,
                             cookies=self.cookie)