def crawling_search(self):
     try:
         search_result_text = self.get_search_result()
         search_result_list = self.analyze_search_result(search_result_text)
     except RequestError:
         print('list', RequestError)
     except AnalyzeError:
         print('list', AnalyzeError)
     else:
         total = 0
         for detail in search_result_list:
             try:
                 result_detail = self.auto_news_main_content(detail['url'], keyword=self.req_params['keyword'])
             except RequestError:
                 print('cnt', RequestError)
             else:
                 self.result['result']['item'] = result_detail
                 try:
                     send(self.result)
                     # print(self.result)
                     time.sleep(0.1)
                     total += 1
                     if total >= 100:
                         break
                 except Exception:
                     print('send errors')
Exemple #2
0
 def crawling_news(self):
     try:
         news_last_list_text = self.get_news_last_list()
         news_last_list = self.analyze_news_last_list(news_last_list_text)
     except RequestError:
         print('list', RequestError)
     except AnalyzeError:
         print('list', AnalyzeError)
     else:
         total = 0
         for detail in news_last_list:
             try:
                 result_detail = self.auto_news_main_content(detail['url'])
             except RequestError:
                 print('cnt', RequestError)
             else:
                 self.result['result']['item'] = result_detail
                 try:
                     send(self.result)
                     # print(self.result)
                     time.sleep(0.1)
                     total += 1
                     if total >= 20:
                         break
                 except Exception:
                     print('send errors')
Exemple #3
0
def execute_by_message(message):
    msg = message
    method, params = method_and_params_by_message(msg)
    raw_proxy = params.get('proxy')
    params['proxy'] = proxy_by_message(msg)
    msg.pop('params', None)
    p = dict(
        task_msg=msg,
    )
    p.update(params)
    try:
        result = method(**p)
    except Exception as e:
        s = traceback.format_exc()
        log(f'execute task error: {msg}\n{e}\n{s}')
        result = dict(
            code=1899999,
            data=dict(
                error=str(e),
                detail=s,
            ),
            msg='爬虫未知错误',
        )
    release_proxy(raw_proxy)
    params['proxy'] = raw_proxy
    data = dict(
        msg_type='method_result',
        result=result,
        task_msg=msg,
    )
    send(data)
 def crawling_news(self):
     try:
         current_page = 1
         news_last_list_all = list()
         for _ in range(4):
             news_last_list_text = self.get_news_last_list(current_page)
             news_last_list = self.analyze_news_last_list(news_last_list_text)
             news_last_list_all.extend(news_last_list)
             current_page += 1
     except RequestError:
         print('list', RequestError)
     except AnalyzeError:
         print('list', AnalyzeError)
     else:
         total = 0
         for detail in news_last_list_all:
             try:
                 result_detail = self.get_news_result_cnt(detail['url'])
             except RequestError:
                 print('cnt', RequestError)
             else:
                 self.result['result']['item'] = result_detail
                 try:
                     send(self.result)
                     # print(self.result)
                     time.sleep(0.1)
                     total += 1
                     if total >= 100:
                         break
                 except Exception:
                     print('send errors')
Exemple #5
0
def pick(**kw):
    msg = kw.get('task_msg')
    for i in range(5):
        data = dict(
            msg_type='scraped_data',
            result=dict(a=i, ),
            task_msg=msg,
        )
        send(data)
Exemple #6
0
def async_request(channel, method, callback, **params):
    msg = dict(
        msg_type='request',
        channel=channel,
        method=method,
        params=params,
    )
    send(msg)
    subscribe(channel, callback)
Exemple #7
0
def deal_message(msg):
    try:
        log(f'dealing msg {msg}')
        execute_by_message(msg)
    except Exception as e:
        s = traceback.format_exc()
        log(f'deal msg error: {msg}\n{e}\n{s}')
        data = dict(
            msg_type='deal_msg_error',
            error_detail=s,
            task_msg=msg,
        )
        send(data)
Exemple #8
0
def execute_by_message(message):
    msg = message
    method, params = method_and_params_by_message(msg)
    raw_proxy = params.get('proxy')
    params['proxy'] = proxy_by_message(msg)
    p = dict(
        task_msg=msg,
    )
    p.update(params)
    result = method(**p)
    release_proxy(raw_proxy)
    params['proxy'] = raw_proxy
    data = dict(
        msg_type='method_result',
        result=result,
        task_msg=msg,
    )
    send(data)
Exemple #9
0
def watch(task_manager=None):
    log('worker start watch')
    data = dict(
        msg_type='worker_start',
        time=time.time(),
    )
    send(data)

    while True:
        try:
            url = send.url
            resp = requests.get(url)
            msg = resp.json()
            if msg.get('code') != 204:
                if task_manager is None:
                    deal_message(msg)
                else:
                    task_manager.submit(deal_message, msg)
        except Exception as e:
            s = traceback.format_exc()
            log(f'Unexpected error: {e}\n{s}')
            time.sleep(5)
Exemple #10
0
def __watch(thread_pool_executor=None):
    executor = thread_pool_executor
    log('Worker start watch channel test_01')
    data = dict(
        msg_type='worker_start',
        time=time.time(),
    )
    send(data)

    while True:
        try:
            url = send.url + '?channel=test_01'
            resp = requests.get(url)
            msg = resp.json()
            if msg.get('code') != 204:
                if executor is None:
                    deal_message(msg)
                else:
                    executor.submit(deal_message, msg)
        except Exception as e:
            s = traceback.format_exc()
            log(f'Unexpected error: {e}\n{s}')
            time.sleep(5)
Exemple #11
0
def send_msg(msg):
    time.sleep(2)
    send(msg)