コード例 #1
0
ファイル: auto_tb.py プロジェクト: yfeng2018/python-1
    async def _crawl_tb_one_shop_intro_page_info(self, first_shop_title_ele,
                                                 shop_title) -> dict:
        """
        抓取店铺介绍页信息
        :return:
        """
        get_current_func_info_by_traceback(self=self, logger=self.lg)
        # 点击shop title ele进店
        # self.lg.info('++++++ {} ele exists is {}'.format(shop_title, first_shop_title_ele.exists()))
        # self.lg.info('即将点击first_shop_title_ele...')
        first_shop_title_ele.click()
        # TODO 此处可不睡眠, 因为该元素出现较快, atx会等待
        # await async_sleep(3.)

        # 点击粉丝数进入店铺印象页面
        self.d(descriptionMatches='粉丝数\d+.*?',
               className="android.view.View").click()
        await async_sleep(6.5)

        # 无法定位, pass
        # manager_name = await self._get_manager_name()
        # 改用读取本地缓存名字
        manager_name = await self._read_now_manager_name_file()
        phone_list = await self._get_shop_phone_num_list()
        address = await self._get_shop_address()
        shop_intro_page_info = {
            'shop_name': shop_title,
            'phone_list': phone_list,
            'address': address,
            'manager_name': manager_name,
        }

        return shop_intro_page_info
コード例 #2
0
    async def db_script0(
        self,
        select_sql_str: str,
        update_sql_str: str,
        func_get_params,
    ):
        get_current_func_info_by_traceback(self=self)
        db_res = self.sql_cli._select_table(sql_str=select_sql_str, )
        db_res = [] if db_res is None else db_res
        if db_res == []:
            print('目标db_res为空list! 跳过此次!')
            return None

        for item in db_res:
            params = func_get_params(k=item)
            self.sql_cli._update_table(
                sql_str=update_sql_str,
                params=params,
            )

        try:
            del db_res
        except:
            pass

        return None
コード例 #3
0
    async def get_db_res(self) -> list:
        """
        获取目标goods_id_list
        :return:
        """
        get_current_func_info_by_traceback(self=self, logger=self.lg)
        db_res = []
        try:
            self.lg.info('清除过期优惠券ing ...')
            # 清除过期优惠券
            self.sql_cli._delete_table(
                sql_str=
                'delete from dbo.coupon_info where GETDATE()-end_time >= 3',
                params=None,
            )
            self.lg.info('休眠15s ...')
            await async_sleep(15)
            self.lg.info('获取新待检测的goods数据ing...')
            db_res = list(self.sql_cli._select_table(sql_str=self.sql_tr0, ))
        except Exception:
            self.lg.error('遇到错误:', exc_info=True)
            self.sql_cli = SqlServerMyPageInfoSaveItemPipeline()

        assert db_res != []
        self.lg.info('db_res_len: {}'.format(len(db_res)))

        return db_res
コード例 #4
0
    async def _fck_run(self):
        print(get_current_func_info_by_traceback(self=self,))
        # todo 不删除旧数据, 原因abort 脚本挂了, 能从之前的开始继续!(数据保留)
        # await self.clear_over_redis_old_tasks()

        while True:
            print('\nnow_time: {}'.format(get_shanghai_time()))
            await self.execute_all_create_dcs_tasks()
            print('sleep {} s ...'.format(self.sleep_time))
            await async_sleep(self.sleep_time)
コード例 #5
0
ファイル: auto_tb.py プロジェクト: yfeng2018/python-1
    async def _get_now_page_shop_title_list(self) -> list:
        """
        获取now_page_shop_title_list
        :return:
        """
        get_current_func_info_by_traceback(self=self, logger=self.lg)
        # TODO 不适用instance, 而是用ele index索引的原因是
        #   instance会导致循环到一定个数出现instance=0对应的某元素不在当前page, 而无法被点击, 导致后续操作紊乱
        # 当前页面的shop_title list
        now_page_shop_title_list = [
            item.info.get('text', '') for item in self.d(
                resourceId="com.taobao.taobao:id/shopTitle",
                className="android.widget.TextView",
                clickable=False,
            )
        ]
        self.lg.info('now_page_shop_title_list: {}'.format(
            str(now_page_shop_title_list)))

        assert now_page_shop_title_list != [], 'now_page_shop_title_list不为空list!'

        return now_page_shop_title_list
コード例 #6
0
    async def get_db_res(self) -> list:
        """
        获取目标goods_id_list
        :return:
        """
        get_current_func_info_by_traceback(self=self, logger=self.lg)
        db_res = []
        try:
            # 清除过期优惠券
            self.sql_cli._delete_table(
                sql_str=
                'delete from dbo.coupon_info where GETDATE()-end_time >= 3',
                params=None,
            )
            await async_sleep(15)
            db_res = list(self.sql_cli._select_table(sql_str=self.sql_tr0, ))
        except Exception:
            self.lg.error('遇到错误:', exc_info=True)

        assert db_res != []
        self.lg.info('db_res_len: {}'.format(len(db_res)))

        return db_res
コード例 #7
0
ファイル: auto_tb.py プロジェクト: yfeng2018/python-1
    async def _search_shop_info_by_one_keyword(self, keyword) -> list:
        """
        根据某关键字进行相应采集
        :param keyword:
        :return:
        """
        get_current_func_info_by_traceback(self=self, logger=self.lg)
        self.lg.info(
            '--->>> 即将开始采集 keyword: {} 对应的shop info...'.format(keyword))
        # 搜索的是否为店铺
        is_shop_search = False

        # 输入内容
        self.d(resourceId="com.taobao.taobao:id/searchEdit").send_keys(keyword)
        # 搜索
        self.d(resourceId="com.taobao.taobao:id/searchbtn").click()
        await async_sleep(2)

        if not is_shop_search:
            # 点击店铺(只需要点击一次, 后续都是以店铺来搜索的)
            self.d(resourceId="com.taobao.taobao:id/tab_text",
                   text=u"店铺").click()
            await async_sleep(2)
            # 点击销量优先
            self.d(resourceId="com.taobao.taobao:id/show_text",
                   text=u"销量优先",
                   description=u"销量优先",
                   className="android.widget.TextView").click()
            is_shop_search = True

        await async_sleep(2)
        if self.d(
                resourceId="com.taobao.taobao:id/tipTitle",
                text=u"没有搜索结果",
                className="android.widget.TextView")\
                .exists():
            # 处理没有搜索结果的
            self.lg.info('### 该关键字: {} 无搜索结果!'.format(keyword))
            return []

        # TODO 方案一无法再ele list里面定位某个特定ele, 并进行后续操作! pass
        # 方案2: 滑动一个采集一个
        self.first_swipe_height, self.second_swipe_height = await self._get_first_swipe_height_and_second_swipe_height(
        )
        self.lg.info(
            'self.first_swipe_height: {}, self.second_swipe_height: {}'.format(
                self.first_swipe_height, self.second_swipe_height))

        # 先上滑隐藏全部, 天猫, 店铺, 淘宝经验
        await u2_up_swipe_some_height(d=self.d,
                                      swipe_height=self.first_swipe_height)

        shop_name_list = []
        shop_crawl_count = 1
        res = []
        while shop_crawl_count < self.max_shop_crawl_count:
            if self.d(resourceId="com.taobao.taobao:id/loading_text",
                      text=u"没有更多内容了",
                      className="android.widget.TextView").exists():
                self.lg.info('该关键字下滑,无更多内容!! 跳过该关键字采集!')
                break

            try:
                now_page_shop_title_list = await self._get_now_page_shop_title_list(
                )
            except (AssertionError, UiObjectNotFoundError):
                self.lg.error('遇到错误:', exc_info=True)
                # 从店铺首页返回搜索页
                await u2_page_back(d=self.d, back_num=1)
                await u2_up_swipe_some_height(
                    d=self.d, swipe_height=self.second_swipe_height / 2)
                # 等待新返回的list成功显示
                await async_sleep(1.)
                continue

            try:
                # clickable = False 确定当前页面某btn是否已被点击, 未被点击 False
                first_shop_title_ele = self.d(
                    resourceId="com.taobao.taobao:id/shopTitle",
                    className="android.widget.TextView",
                    # instance=0,
                    text=now_page_shop_title_list[1]
                    if shop_crawl_count != 1 else now_page_shop_title_list[0],
                    # 保证每个都被遍历
                    clickable=False,
                )
            except IndexError:
                self.lg.error('遇到错误:', exc_info=True)
                # 无法定位first_shop_title_ele元素时
                await u2_up_swipe_some_height(
                    d=self.d, swipe_height=self.second_swipe_height / 2)
                # 等待新返回的list成功显示
                await async_sleep(1.)
                continue

            try:
                shop_title = first_shop_title_ele.info.get('text', '')
            except (UiObjectNotFoundError, ):
                continue

            if await self._shop_title_has_been_traversed(
                    shop_title=shop_title,
                    shop_name_list=shop_name_list,
            ):
                continue

            self.lg.info(
                '正在采集店名: {}, shop_crawl_count: {}, keyword: {} ...'.format(
                    shop_title, shop_crawl_count, keyword))
            shop_name_list.append(shop_title)
            try:
                shop_intro_page_info = await self._crawl_tb_one_shop_intro_page_info(
                    first_shop_title_ele=first_shop_title_ele,
                    shop_title=shop_title,
                )
                send_res = await self._send_2_tb_shop_info_handle(
                    one_dict=shop_intro_page_info)
                res.append(shop_intro_page_info)
                if send_res == 'success'\
                        and shop_title not in self.company_name_list_filter:
                    # 存入成功储存到db中的
                    self.company_name_list_filter.add(shop_title)

            except (UiObjectNotFoundError, AssertionError):
                self.lg.error('遇到错误:', exc_info=True)
                await self._back_2_search_page(
                    first_shop_title_ele=first_shop_title_ele)

                if shop_title not in self.company_name_list_filter:
                    # 存入异常退出但是已被遍历的, 即不管结果如何, 已遍历的都进行存储!!
                    self.company_name_list_filter.add(shop_title)

                continue

            # 返回搜索结果list页面
            # 默认返回前两页会页面混乱
            # await u2_page_back(d=self.d, back_num=2)
            # 改用元素定位来看是否成功返回上层
            await self._back_2_search_page(
                first_shop_title_ele=first_shop_title_ele)

            await u2_up_swipe_some_height(
                d=self.d, swipe_height=self.second_swipe_height)
            # await async_sleep(2)  # 等待新返回的list成功显示
            shop_crawl_count += 1

        self.lg.info('--->>> 采集 keyword: {} 对应的shop info 完毕!'.format(keyword))

        return res