Exemple #1
0
    def test_init_unhandle_tasks(self):
        context = Context()
        context.attach(Context.KEY_CURRENT_TASK_TYPE, TaobaoTaskType.sycm_init)
        action = TaskCollectAction()
        action.execute(context=context)
        tasks = context.get(Context.KEY_CURRENT_TASKS, [])
        for task in tasks:
            id_ = task.id
            raw_data = task.raw_data
            date = raw_data['dateStr']
            with write_session_scope() as session:
                stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                    session=session)
                mod_dict = {'date': date}
                stream_unhandle_task_dao.update(
                    mod_dict=mod_dict,
                    _filter=[CmmSysStreamUnhandleTask.id == id_])

        context = Context()
        context.attach(Context.KEY_CURRENT_TASK_TYPE, TaobaoTaskType.sycm_list)
        action = TaskCollectAction()
        action.execute(context=context)
        tasks = context.get(Context.KEY_CURRENT_TASKS, [])
        for task in tasks:
            id_ = task.id
            raw_data = task.raw_data
            good = Good(raw_data['goodResult'])
            date = good.get_date()
            with write_session_scope() as session:
                stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                    session=session)
                mod_dict = {'date': date}
                stream_unhandle_task_dao.update(
                    mod_dict=mod_dict,
                    _filter=[CmmSysStreamUnhandleTask.id == id_])

        context = Context()
        context.attach(Context.KEY_CURRENT_TASK_TYPE,
                       TaobaoTaskType.taobao_list)
        action = TaskCollectAction()
        action.execute(context=context)
        tasks = context.get(Context.KEY_CURRENT_TASKS, [])
        for task in tasks:
            id_ = task.id
            raw_data = task.raw_data
            good = Good(raw_data['goodResult'])
            date = good.get_date()
            with write_session_scope() as session:
                stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                    session=session)
                mod_dict = {'date': date}
                stream_unhandle_task_dao.update(
                    mod_dict=mod_dict,
                    _filter=[CmmSysStreamUnhandleTask.id == id_])
    def do_execute(self, context):
        task = context.get(Context.KEY_CURRENT_TASK)
        with write_session_scope() as session:
            good = context.get(Context.KEY_GOOD_DICT)
            # stream_opt_data_dao = get_stream_opt_data_dao(session=session)
            stream_unhandle_task_dao = get_stream_unhandle_task_dao(session=session)
            stream_handle_task_dao = get_stream_handle_task_dao(session=session)
            stream_good_data_dao = get_stream_good_data_dao(session=session)

            good_entity = {
                'raw_data': good,
                'flag': int(good['flag']),
                'category_name': good['categoryName'],
                'brand_name': good['brandName'],
                'category_id': good['categoryId'],
                'model_name': good['modelName'],
                'date': good['date']
            }
            stream_good_data_dao.insert(**good_entity)
            entity = stream_unhandle_task_dao.delete(_filter=[CmmSysStreamUnhandleTask.id == task.id])
            # stream_handle_task_dao.insert_entity(entity=task)
            stream_handle_task_dao.insert(**{
                'type': task.type,
                'raw_data': task.raw_data,
                'origin_id': task.origin_id,
                'date': good['date']
            })
            with mongo_collection_scope(collection_name='stream_good_data') as collection:
                collection.insert(good)
        return True
    def do_execute(self, context):
        integrate_result = context.get(Context.KEY_TAOBAO_INTERGRATE_RESULT)
        sale_result = context.get(Context.KEY_TAOBAO_SALE_RESULT)

        with write_session_scope() as session:
            good = context.get(Context.KEY_GOOD_DICT)
            stream_opt_data_dao = get_stream_opt_data_dao(session=session)
            stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                session=session)
            stream_handle_task_dao = get_stream_handle_task_dao(
                session=session)
            opt_data_entity = {
                'raw_data': {
                    'integrateResult': integrate_result,
                    'saleResult': sale_result,
                    'goodResult': good
                },
                'type': int(TaobaoPageType.taobao_list)
            }
            entity = stream_opt_data_dao.insert(**opt_data_entity)

            unhandle_task_entity = {
                'raw_data': {
                    'integrateInfos':
                    self.build_integrate_infos(integrate_result),
                    'saleInfos': self.build_sale_infos(sale_result),
                    'goodResult': good
                },
                'type': int(TaobaoTaskType.taobao_list),
                'origin_id': entity.id,
                'date': good['date']
            }
            stream_unhandle_task_dao.insert(**unhandle_task_entity)

            task = context.get(Context.KEY_CURRENT_TASK)
            task_entity = stream_unhandle_task_dao.delete(
                _filter=[CmmSysStreamUnhandleTask.id == task.id])

            stream_handle_task_dao.insert(
                **{
                    'type': task.type,
                    'raw_data': task.raw_data,
                    'origin_id': task.origin_id,
                    'date': good['date']
                })
            # stream_handle_task_dao.insert_entity(entity=task)
            # stream_opt_data_entities = list()
            # # stream_opt_data_entity = CmmSysStreamOptData()
            # # stream_opt_data_entity.raw_data = integrate_result
            #
            # stream_opt_data_entities.append({'raw_data': integrate_result, 'type':})
            # stream_opt_data_entities.append({'raw_data': sale_result})
            #
            # # stream_opt_data_dao.insert_entity()
            # stream_opt_data_dao.bulk_insert(stream_opt_data_entities)
        return True
Exemple #4
0
    def do_execute(self, context):
        account = context.get(Context.KEY_CURRENT_TASK_ACCOUNT)
        # brands_result = context.get(Context.KEY_SYCM_PRODUCT_GET_BRANDS_RESULT)
        hot_rank_result = context.get(
            Context.KEY_SYCM_PRODUCT_PROD_HOT_RANK_RESULT)
        hot_rank_result = hot_rank_result['data']
        task = context.get(Context.KEY_CURRENT_TASK)
        with write_session_scope() as session:
            good = context.get(Context.KEY_GOOD_DICT)
            stream_opt_data_dao = get_stream_opt_data_dao(session=session)
            stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                session=session)
            stream_handle_task_dao = get_stream_handle_task_dao(
                session=session)
            opt_data_entity = {
                'raw_data': {
                    'hotRankResult': hot_rank_result,
                    'goodResult': good
                },
                'type': int(TaobaoPageType.sycm_list)
            }
            entity = stream_opt_data_dao.insert(**opt_data_entity)

            unhandle_task_entities = []
            for item in hot_rank_result:
                c_good = Good(deepcopy(good))
                c_good.set_brand_id(item['brandId'])
                c_good.set_brand_name(item['brandName'])
                c_good.set_model_id(item['modelId'])
                c_good.set_model_name(item['modelName'])
                c_good.set_sell_count(item['payItmCnt'])
                unhandle_task_entities.append({
                    'raw_data': {
                        'hotRankInfo': item,
                        'goodResult': c_good,
                        'account': account
                    },
                    'type':
                    int(TaobaoTaskType.sycm_list),
                    'origin_id':
                    entity.id,
                    'date':
                    good['date']
                })
            stream_unhandle_task_dao.bulk_insert(unhandle_task_entities)
            entity = stream_unhandle_task_dao.delete(
                _filter=[CmmSysStreamUnhandleTask.id == task.id])
            stream_handle_task_dao.insert(
                **{
                    'type': task.type,
                    'raw_data': task.raw_data,
                    'origin_id': task.origin_id,
                    'date': good['date']
                })
        return True
Exemple #5
0
 def execute(self):
     with write_session_scope() as session:
         _stream_risk_dao = get_stream_risk_dao(session=session)
         rsts = _stream_risk_dao.base_query.limit(self.account_num).all()
         if rsts:
             for item in rsts:
                 username = item.raw_data
                 account = global_config.s_accounts_dict[username]
                 proxy = self._proxy_service.get_origin_static_proxy(account['username'])
                 self._login(account=account, force=True, risk=True, proxy=proxy)
                 _stream_risk_dao.delete(_filter=[CmmSysStreamRisk.id == item.id])
                 session.commit()
Exemple #6
0
    def test_db(self):
        entity = CmmSysCommodity()
        entity.content = {
            'category': u'冰箱',
            'brand': u'海尔',
            'model': 'abcd-abcd',
            'price': '12.30',
            'sales': '123',
            'date': '2018-10-10'
        }

        with write_session_scope() as session:
            commodity_dao = get_commodity_dao(session=session)
            entity = commodity_dao.insert_entity(entity)
            delete = {'id': entity.id}
            commodity_dao.delete(**delete)
Exemple #7
0
    def do_execute(self, context):
        task_type = int(context.get(Context.KEY_CURRENT_TASK_TYPE))

        with write_session_scope() as session:
            # stream_opt_data_dao = get_stream_opt_data_dao(session=session)
            stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                session=session)

            tasks = stream_unhandle_task_dao.base_query.with_entities(
                entities=[
                    CmmSysStreamUnhandleTask.id, CmmSysStreamUnhandleTask.type,
                    CmmSysStreamUnhandleTask.raw_data,
                    CmmSysStreamUnhandleTask.origin_id
                ]).filter(
                    filters_=[CmmSysStreamUnhandleTask.type == task_type
                              ]).limit(1000).all()

            # tasks = stream_unhandle_task_dao.query(entities=[CmmSysStreamUnhandleTask.id, CmmSysStreamUnhandleTask.type,
            #                                                  CmmSysStreamUnhandleTask.raw_data,
            #                                                  CmmSysStreamUnhandleTask.origin_id],
            #                                        _filter=[CmmSysStreamUnhandleTask.type == task_type])
            if tasks:
                context.attach(Context.KEY_CURRENT_TASKS, tasks)

            # opt_data_entity = {
            #     'raw_data': {
            #         'integrateResult': integrate_result,
            #         'saleResult': sale_result,
            #         'goodResult': good
            #     },
            #     'type': int(TaobaoPageType.taobao_list)
            # }
            # entity = stream_opt_data_dao.insert(**opt_data_entity)
            #
            # unhandle_task_entity = {
            #     'raw_data': {
            #         'integrateInfos': self.build_integrate_infos(integrate_result),
            #         'saleInfos': self.build_sale_infos(sale_result),
            #         'goodResult': good
            #     },
            #     'type': int(TaobaoTaskType.taobao_list),
            #     'origin_id': entity.id
            # }
            # stream_unhandle_task_dao.insert(**unhandle_task_entity)
        return True
    def do_execute(self, context):
        date_str = yesterday().strftime("%Y-%m-%d")
        date_str = context.get(Context.KEY_SYCM_SPECIFIC_DATE, date_str)

        tasks = Category.get_tasks(date_str, Category.sys_category)
        entities = list()

        for task in tasks:
            entities.append({
                'type': int(TaobaoTaskType.sycm_init),
                'raw_data': task,
                'date': date_str
            })
        if entities:
            with write_session_scope() as session:
                stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                    session=session)
                stream_unhandle_task_dao.bulk_insert(entities)
        return True
    def do_execute(self, context):
        with write_session_scope() as session:
            good = context.get(Context.KEY_GOOD_DICT)
            stream_good_data_dao = get_stream_good_data_dao(session=session)

            good['flag'] = int(GoodDataType.direct)
            good_entity = {
                'raw_data': good,
                'flag': int(good['flag']),
                'category_name': good['categoryName'],
                'brand_name': good['brandName'],
                'category_id': good['categoryId'],
                'model_name': good['modelName'],
                'date': good['date']
            }
            stream_good_data_dao.insert(**good_entity)
            with mongo_collection_scope(
                    collection_name='stream_good_data') as collection:
                collection.insert(good)
        return True
    def do_execute(self, context):
        task_type = int(context.get(Context.KEY_CURRENT_TASK_TYPE))
        date_str = context.get(Context.KEY_DIRECT_COLLECT_DATE)

        with write_session_scope() as session:
            # stream_opt_data_dao = get_stream_opt_data_dao(session=session)
            stream_unhandle_task_dao = get_stream_unhandle_task_dao(
                session=session)

            tasks = stream_unhandle_task_dao.query(
                entities=[
                    CmmSysStreamUnhandleTask.id, CmmSysStreamUnhandleTask.type,
                    CmmSysStreamUnhandleTask.raw_data,
                    CmmSysStreamUnhandleTask.origin_id
                ],
                _filter=[
                    CmmSysStreamUnhandleTask.type == task_type,
                    CmmSysStreamUnhandleTask.date == date_str
                ])

            context.attach(Context.KEY_CURRENT_TASKS, tasks)
        return True
Exemple #11
0
    def execute(self, num):

        cycle_login_num = 0
        tasks = self.execute_taobao_integrate_list_actions()
        i = 0
        future_tasks = {}
        with read_session_scope() as session:
            _stream_risk_dao = get_stream_risk_dao(session=session)
            rsts = _stream_risk_dao.base_query.all()
            risk_usernames = set(item.raw_data for item in rsts)
        s_accounts = global_config.s_accounts
        for task in tasks:
            account = s_accounts[self._counter % len(s_accounts)]
            proxy = self._proxy_service.get_static_proxy(account['username'])
            # raw_data = task.raw_data
            # account = raw_data['account']
            self._counter += 1
            i += 1
            if account['username'] in risk_usernames:
                continue
            if i < num:
                future_tasks[self._executor.submit(
                    self._execute_taobao_integrate_list_actions, task, account,
                    proxy)] = task

        for future in as_completed(future_tasks):
            try:
                account, flag, force = future.result()
                if flag:
                    if force:
                        with write_session_scope() as session:
                            _stream_risk_dao = get_stream_risk_dao(
                                session=session)
                            self._risk(stream_risk_dao=_stream_risk_dao,
                                       account=account)
                        # self._login(account=account, force=True if cycle_login_num == 0 else False)
                        cycle_login_num += 1
                    else:
                        self._fail_account_counter[account['username']] += 1
                        if self._fail_account_counter[account['username']] > 2:
                            self._cookie_service.remove(account=account)
                            with write_session_scope() as session:
                                _stream_risk_dao = get_stream_risk_dao(
                                    session=session)
                                self._risk(stream_risk_dao=_stream_risk_dao,
                                           account=account)
                            # self._login(account=account, force=True if cycle_login_num == 0 else False)
                            cycle_login_num += 1
                        else:
                            url = 'https://s.m.taobao.com/h5?q=Flyco%2BFR5218&search=%E6%8F%90%E4%BA%A4&tab=all'
                            # url = 'https://s.m.taobao.com/h5?q=Flyco%2BFR5218&search=%E6%8F%90%E4%BA%A4&tab=all'
                            proxy = self._proxy_service.get_origin_static_proxy(
                                account['username'])
                            cookies = self._cookie_service.load(
                                account=account,
                                type_=PickleFileType.origin_cookie)
                            time.sleep(5)
                            cookies, origin_cookies = spider_qt5_bootstrap(
                                url=url,
                                account=account,
                                risk=False,
                                proxy=proxy,
                                cookies=cookies)
                            self._cookie_service.dump(cookies=cookies,
                                                      account=account)
                            self._cookie_service.dump(
                                cookies=origin_cookies,
                                account=account,
                                type_=PickleFileType.origin_cookie)
                    self._account_counter[account['username']] = 0
                else:
                    self._fail_account_counter[account['username']] = 0
                    self._account_counter[account['username']] += 1
                    if self._account_counter[account['username']] >= 2:
                        url = 'https://s.m.taobao.com/h5?q=Flyco%2BFR5218&search=%E6%8F%90%E4%BA%A4&tab=all'
                        # url = 'https://s.m.taobao.com/h5?q=Flyco%2BFR5218&search=%E6%8F%90%E4%BA%A4&tab=all'
                        proxy = self._proxy_service.get_origin_static_proxy(
                            account['username'])
                        cookies = self._cookie_service.load(
                            account=account,
                            type_=PickleFileType.origin_cookie)
                        time.sleep(5)
                        cookies, origin_cookies = spider_qt5_bootstrap(
                            url=url,
                            account=account,
                            risk=False,
                            proxy=proxy,
                            cookies=cookies)
                        self._cookie_service.dump(cookies=cookies,
                                                  account=account)
                        self._cookie_service.dump(
                            cookies=origin_cookies,
                            account=account,
                            type_=PickleFileType.origin_cookie)
                        self._account_counter[account['username']] = 0

            except Exception as e:
                logger.error(e)