def __execute_sycm_product_actions(self, task):
        raw_data = task.raw_data
        cate_id = raw_data['cateId']
        cate_name = raw_data['cateName']
        account = raw_data['account']
        date_str = raw_data['dateStr']
        context = Context()
        context.attach(Context.KEY_CURRENT_TASK, task)
        context.attach(Context.KEY_IS_UPDATE_COOKIES, False)
        referer = 'https://sycm.taobao.com/mc/mq/product_insight'
        context.attach(Context.KEY_HEADERS, SpiderHttp.get_sycm_headers(referer))
        requests_cookie_jar = self._cookie_service.load(account)

        if not requests_cookie_jar:
            raise CookieNotFoundException('cookie not found')
            # cookies = spider_qt5_bootstrap(url=SpiderUrls.get_sycm_login_url(), account=account)
            # requests_cookie_jar = cookies
            # self._cookie_service.dump(cookies, account)
        context.attach(Context.KEY_COOKIES, requests_cookie_jar)
        context.attach(Context.KEY_IS_UPDATE_COOKIES, False)
        context.attach(Context.KEY_CURRENT_TASK_ACCOUNT, account)
        good = Good()
        good.set_category_id(cate_id)
        good.set_category_name(cate_name)
        good.set_flag(str(int(GoodDataType.initial)))

        # yesterday_date_str = yesterday().strftime("%Y-%m-%d")
        # yesterday_date_str = day_before_yesterday().strftime("%Y-%m-%d")
        yesterday_date_str = date_str
        good.set_date(yesterday_date_str)
        context.attach(Context.KEY_GOOD_DICT, good)

        sycm_brands_url = SpiderUrls.get_sycm_product_get_brands_url(cate_id)
        token = self._cookie_service.load(account=account, type_=PickleFileType.legality_token)
        sycm_prod_hot_rank_url = SpiderUrls.get_sycm_product_prod_hot_rank(start_data=yesterday_date_str,
                                                                           end_date=yesterday_date_str, cate_id=cate_id,
                                                                           token=token)
        sycm_brands_http_request = HttpRequest(url=sycm_brands_url, method=HttpMethod.GET)
        sycm_prod_hot_rank_http_request = HttpRequest(url=sycm_prod_hot_rank_url, method=HttpMethod.GET)
        context.attach(Context.KEY_SYCM_PRODUCT_GET_BRANDS_HTTP_REQUEST, sycm_brands_http_request)
        context.attach(Context.KEY_SYCM_PRODUCT_PROD_HOT_RANK_HTTP_REQUEST, sycm_prod_hot_rank_http_request)

        for action in self.get_sycm_product_actions():
            action.execute(context=context)
        return
    def test(self):
        requests_cookie_jar = self.__cookie_service.load(account=self.account)

        context = Context()
        context.attach(Context.KEY_IS_UPDATE_COOKIES, True)
        context.attach(Context.KEY_HEADERS, SpiderHttp.get_taobao_headers('https://s.m.taobao.com/h5'))
        context.attach(Context.KEY_COOKIES, requests_cookie_jar)
        context.attach(Context.KEY_CURRENT_TASK_ACCOUNT, self.account)
        proxy = self.__proxy_service.get_s_proxy(self.account['username'])
        context.attach(Context.KEY_CURRENT_PROXY, proxy)

        good = Good()
        good.set_brand_name('Flyco/飞科')
        good.set_model_name('FR5218')
        context.attach(Context.KEY_GOOD_DICT, good)

        action = TaobaoPresearchAction()
        action.execute(context=context)
        try:
            # action = TaobaoBaichuanAction()
            # action.execute(context=context)
            action = TaobaoSaleListPageAction()
            action.execute(context=context)
        except CookieNeedUpdateException as e:
            self.__cookie_service.dump(requests_cookie_jar, self.account)
 def execute_direct_good_actions(self, date_str):
     context = Context()
     context.attach(Context.KEY_DIRECT_COLLECT_DATE, date_str)
     context.attach(Context.KEY_CURRENT_TASK_TYPE, TaobaoTaskType.sycm_list)
     action = TaskDirectCollectAction()
     action.execute(context=context)
     tasks = context.get(Context.KEY_CURRENT_TASKS, [])
     for task in tasks:
         raw_data = task.raw_data
         good = Good(raw_data['goodResult'])
         context.attach(Context.KEY_GOOD_DICT, good)
         action = GoodDirectPersistAction()
         action.execute(context=context)
Exemple #4
0
    def test_init_unhandle_tasks(self):
        context = Context()
        context.attach(Context.KEY_CURRENT_TASK_TYPE, TaobaoTaskType.sycm_init)
        action = TaskCollectAction()
        action.execute(context=context)
        tasks = context.get(Context.KEY_CURRENT_TASKS, [])
        for task in tasks:
            id_ = task.id
            raw_data = task.raw_data
            date = raw_data['dateStr']
            with write_session_scope() as session:
                stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                    session=session)
                mod_dict = {'date': date}
                stream_unhandle_task_dao.update(
                    mod_dict=mod_dict,
                    _filter=[CmmSysStreamUnhandleTask.id == id_])

        context = Context()
        context.attach(Context.KEY_CURRENT_TASK_TYPE, TaobaoTaskType.sycm_list)
        action = TaskCollectAction()
        action.execute(context=context)
        tasks = context.get(Context.KEY_CURRENT_TASKS, [])
        for task in tasks:
            id_ = task.id
            raw_data = task.raw_data
            good = Good(raw_data['goodResult'])
            date = good.get_date()
            with write_session_scope() as session:
                stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                    session=session)
                mod_dict = {'date': date}
                stream_unhandle_task_dao.update(
                    mod_dict=mod_dict,
                    _filter=[CmmSysStreamUnhandleTask.id == id_])

        context = Context()
        context.attach(Context.KEY_CURRENT_TASK_TYPE,
                       TaobaoTaskType.taobao_list)
        action = TaskCollectAction()
        action.execute(context=context)
        tasks = context.get(Context.KEY_CURRENT_TASKS, [])
        for task in tasks:
            id_ = task.id
            raw_data = task.raw_data
            good = Good(raw_data['goodResult'])
            date = good.get_date()
            with write_session_scope() as session:
                stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                    session=session)
                mod_dict = {'date': date}
                stream_unhandle_task_dao.update(
                    mod_dict=mod_dict,
                    _filter=[CmmSysStreamUnhandleTask.id == id_])
    def __execute_taobao_integrate_list_actions(self, task, account, proxy):

        raw_data = task.raw_data
        # default_account = global_config.accounts[0]
        # account = raw_data.get('account', default_account)
        # account = raw_data.get('account', default_account)
        good = Good(raw_data['goodResult'])

        context = Context()
        context.attach(Context.KEY_CURRENT_TASK, task)
        context.attach(Context.KEY_CURRENT_TASK_ACCOUNT, account)
        context.attach(Context.KEY_CURRENT_PROXY, proxy)
        # query = 'Flyco/飞科 + FR5218'
        # query = brand_name + '+' + model_name
        # page = '1'

        requests_cookie_jar = self._cookie_service.load(account)

        if not requests_cookie_jar:
            raise CookieExpiredException('integrate list need first login')

        context.attach(Context.KEY_IS_UPDATE_COOKIES, True)
        referer = 'https://s.m.taobao.com/h5'
        context.attach(Context.KEY_HEADERS, SpiderHttp.get_taobao_headers(referer))
        context.attach(Context.KEY_COOKIES, requests_cookie_jar)
        context.attach(Context.KEY_CURRENT_SLEEP_SECS, 2)
        context.attach(Context.KEY_GOOD_DICT, good)
        actions = self.get_taobao_integrate_list_actions()
        for action in actions:
            try:
                result = action.execute(context=context)
            except CookieNeedUpdateException as e:
                self._cookie_service.dump(requests_cookie_jar, account)
                raise e
            # except CookieExpiredException as e:
            #     raise e
            # except InterruptException as e:
            #     raise e
            if not result:
                break
    def __execut_taobao_detail_actions(self, task, proxy=None):
        raw_data = task.raw_data
        good_result = Good(raw_data['goodResult'])
        model_name = good_result.get_model_name()
        cate_id = good_result.get_category_id()
        integrate_infos = raw_data['integrateInfos']
        sale_infos = raw_data['saleInfos']
        i = 0
        j = 1
        length = min(len(integrate_infos), len(sale_infos))
        is_success = False

        context = Context()
        context.attach(Context.KEY_GOOD_DICT, good_result)
        context.attach(Context.KEY_CURRENT_TASK, task)
        context.attach(Context.KEY_CURRENT_PROXY, proxy)

        for x in range(0, length):
            is_need_retry = False
            if i < len(sale_infos):
                sale_info = sale_infos[i]
                sale_item_id = sale_info['itemId']
                sale_title = sale_info['title']
                sale_cate_id = sale_info['category']
                sale_price = sale_info['price']
                # str(sale_title).upper()
                # if str(sale_title).upper().find(str(model_name).upper()) != -1 and str(cate_id) == str(sale_cate_id):
                if str(sale_title).upper().find(str(model_name).upper()) != -1 and Category.check_cate_id(cate_id,
                                                                                                          sale_cate_id):
                    actions = self.get_taobao_detail_actions()
                    is_success = True
                    price_info = [{
                        'skuId': '-1',
                        'price': yuan_2_cent(sale_price)
                    }]
                    good_result.set_price_info(price_info=price_info)
                    good_result.set_flag(str(int(GoodDataType.success)))
                    for action in actions:
                        action.execute(context=context)
                    break
                elif i < 5:
                    actions = self.get_taobao_http_detail_actions()
                    timestamps = int(datetime.now().timestamp() * 1000)
                    # sign = get_sign('414804c1e894540b7f18f703c74346cf', str(timestamps), '12574478',
                    #                 '{"itemNumId":"%s"' % (sale_item_id))
                    sale_detail_url = SpiderUrls.get_taobao_detail_url(timestamps, '', sale_item_id)
                    context = Context()
                    context.attach(Context.KEY_GOOD_DICT, good_result)
                    context.attach(Context.KEY_CURRENT_TASK, task)
                    context.attach(Context.KEY_COOKIES, RequestsCookieJar())
                    context.attach(Context.KEY_IS_UPDATE_COOKIES, True)
                    context.attach(Context.KEY_CURRENT_PROXY, proxy)

                    detail_m_url = SpiderUrls.get_detail_m_url(sale_info['userType'], sale_item_id)
                    detail_m_http_request = HttpRequest(detail_m_url, method=HttpMethod.GET)
                    context.attach(Context.KEY_DETAIL_M_HTTP_REQUEST, detail_m_http_request)

                    sale_http_request = HttpRequest(url=sale_detail_url, method=HttpMethod.GET)
                    context.attach(Context.KEY_TAOBAO_DETAIL_HTTP_REQUEST, sale_http_request)
                    context.attach(Context.KEY_HEADERS, SpiderHttp.get_taobao_headers(detail_m_url))
                    try:
                        for action in actions:
                            action.execute(context=context)
                        is_success = True
                        break
                    except RetryException as e:
                        logger.error(e)
                        time.sleep(5)
                    except InterruptException as e:
                        logger.exception(e)
                        time.sleep(10)
                        is_need_retry = True
                        # raise e
                # if is_success:
                #     break
            if j < len(integrate_infos):
                integrate_info = integrate_infos[j]
                integrate_item_id = integrate_info['itemId']
                integrate_title = integrate_info['title']
                integrate_cate_id = integrate_info['category']
                integrate_price = integrate_info['price']
                # if str(integrate_title).upper().find(
                #         str(model_name).upper()) != -1 and str(cate_id) == str(integrate_cate_id):
                if str(integrate_title).upper().find(
                        str(model_name).upper()) != -1 and Category.check_cate_id(cate_id, integrate_cate_id):
                    actions = self.get_taobao_detail_actions()
                    is_success = True
                    price_info = [{
                        'skuId': '-2',
                        'price': yuan_2_cent(integrate_price)
                    }]
                    good_result.set_price_info(price_info=price_info)
                    good_result.set_flag(str(int(GoodDataType.success)))
                    for action in actions:
                        action.execute(context=context)
                    break
                elif j < 6:
                    actions = self.get_taobao_http_detail_actions()
                    timestamps = int(datetime.now().timestamp() * 1000)
                    integrate_detail_url = SpiderUrls.get_taobao_detail_url(timestamps, '', integrate_item_id)
                    context = Context()
                    # referer = 'https://s.m.taobao.com/h5'
                    # context.attach(Context.KEY_HEADERS, SpiderHttp.get_taobao_headers(referer))
                    context.attach(Context.KEY_GOOD_DICT, good_result)
                    context.attach(Context.KEY_CURRENT_TASK, task)
                    context.attach(Context.KEY_COOKIES, RequestsCookieJar())
                    context.attach(Context.KEY_IS_UPDATE_COOKIES, True)
                    context.attach(Context.KEY_CURRENT_PROXY, proxy)

                    detail_m_url = SpiderUrls.get_detail_m_url(integrate_info['userType'], integrate_item_id)
                    detail_m_http_request = HttpRequest(detail_m_url, method=HttpMethod.GET)
                    context.attach(Context.KEY_DETAIL_M_HTTP_REQUEST, detail_m_http_request)

                    integrate_http_request = HttpRequest(url=integrate_detail_url, method=HttpMethod.GET)
                    context.attach(Context.KEY_TAOBAO_DETAIL_HTTP_REQUEST, integrate_http_request)

                    context.attach(Context.KEY_HEADERS, SpiderHttp.get_taobao_headers(detail_m_url))
                    try:
                        for action in actions:
                            action.execute(context=context)
                        is_success = True
                        break
                    except RetryException as e:
                        logger.exception(e)
                        time.sleep(5)
                    except InterruptException as e:
                        logger.exception(e)
                        time.sleep(10)
                        is_need_retry = True
                        # raise e
            if not is_need_retry:
                i += 1
                j += 1
        if not is_success:
            actions = self.get_taobao_detail_actions()
            good_result.set_flag(str(int(GoodDataType.not_found)))
            for action in actions:
                action.execute(context=context)
Exemple #7
0
    def do_execute(self, context):
        account = context.get(Context.KEY_CURRENT_TASK_ACCOUNT)
        # brands_result = context.get(Context.KEY_SYCM_PRODUCT_GET_BRANDS_RESULT)
        hot_rank_result = context.get(
            Context.KEY_SYCM_PRODUCT_PROD_HOT_RANK_RESULT)
        hot_rank_result = hot_rank_result['data']
        task = context.get(Context.KEY_CURRENT_TASK)
        with write_session_scope() as session:
            good = context.get(Context.KEY_GOOD_DICT)
            stream_opt_data_dao = get_stream_opt_data_dao(session=session)
            stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                session=session)
            stream_handle_task_dao = get_stream_handle_task_dao(
                session=session)
            opt_data_entity = {
                'raw_data': {
                    'hotRankResult': hot_rank_result,
                    'goodResult': good
                },
                'type': int(TaobaoPageType.sycm_list)
            }
            entity = stream_opt_data_dao.insert(**opt_data_entity)

            unhandle_task_entities = []
            for item in hot_rank_result:
                c_good = Good(deepcopy(good))
                c_good.set_brand_id(item['brandId'])
                c_good.set_brand_name(item['brandName'])
                c_good.set_model_id(item['modelId'])
                c_good.set_model_name(item['modelName'])
                c_good.set_sell_count(item['payItmCnt'])
                unhandle_task_entities.append({
                    'raw_data': {
                        'hotRankInfo': item,
                        'goodResult': c_good,
                        'account': account
                    },
                    'type':
                    int(TaobaoTaskType.sycm_list),
                    'origin_id':
                    entity.id,
                    'date':
                    good['date']
                })
            stream_unhandle_task_dao.bulk_insert(unhandle_task_entities)
            entity = stream_unhandle_task_dao.delete(
                _filter=[CmmSysStreamUnhandleTask.id == task.id])
            stream_handle_task_dao.insert(
                **{
                    'type': task.type,
                    'raw_data': task.raw_data,
                    'origin_id': task.origin_id,
                    'date': good['date']
                })
        return True
Exemple #8
0
 def test_dict(self):
     good = {'model': 'ccc'}
     c_good = Good(good)
     c_good.set_flag('1111')
     print(good)
     print(c_good)