예제 #1
0
def inspect(batch_num):
    spider_manager = GatherCenter()
    while gl.get_value('STATUS'):
        logger.info("inspect task start!  batch_num:%s" % str(batch_num))
        spider_manager.gather(batch_num)
        logger.info("inspect task end!  batch_num:%s" % str(batch_num))
    logger.info("batchNum inspect task end!  batch_num:%s" % str(batch_num))
예제 #2
0
def inspect_tracking(task_id, status):
    tracking_service = MonitorTrackingService()
    while gl.get_value('TRACKING_STATUS'):
        logger.info("tracking task start!  task_id:%s" % str(task_id))
        tracking_service.monitor(task_id, status)
        logger.info("tracking task end!  task_id:%s" % str(task_id))
    task_dao = TrackingTaskDao()
    task_dao.close_task(task_id)
    logger.info("tracking task end!  task_id:%s" % str(task_id))
예제 #3
0
 def heartbeat():
     try:
         agent_name = os.environ['agent_name']
         hostname = socket.gethostname()
         ip = socket.gethostbyname(hostname)
         url = ims_rest_base + "open/api/v1/agent/heartbeat"
         status = gl.get_value('STATUS')
         data_json = {"ip": ip, "status": status, "job": agent_name}
         data = bytes(parse.urlencode(data_json), encoding="utf8")
         new_url = request.Request(url, data)
         request.urlopen(new_url)
     except Exception as e:
         logger.info(e)
         logger.info("heartbeat fail")
예제 #4
0
 def monitor(task_id, status):
     ims_api = ImsApi()
     tracking_dao = TrackingDetailDao()
     status_dict = {'0': '查询中', '1': '查询不到', '2': '运输途中', '3': '到达待取', '4': '成功签收', '5': '运输过久',
                    '6': '投递失败', '7': '可能异常'}
     normal_status_dict = {'0': '查询中', '1': '查询不到', '2': '运输途中', '3': '到达待取', '4': '成功签收', '5': '运输过久'}
     tracking_details = tracking_dao.get_by_task(task_id, status)
     if tracking_details.__len__() > 0:
         try:
             driver = WebDriver.get_chrome()
             driver.get("https://www.trackingmore.com/login-cn.html")
             driver.find_element_by_id("email").send_keys("*****@*****.**")
             driver.find_element_by_id("password").send_keys("0418YXYwlx")
             driver.find_element_by_id("login_test").click()
             time.sleep(5)
             for tracking_detail in tracking_details:
                 if gl.get_value('TRACKING_STATUS'):
                     pass
                 else:
                     logger.info("快递单任务已停止,任务id:%s", task_id)
                     gl.set_value('STATUS', False)
                     gl.set_value('TRACKING_STATUS', False)
                     ims_api.done_tracking(task_id)
                     return
                 tracking_detail.start_time = datetime.datetime.now()
                 tracking_detail.status = "done"
                 logger.info("准备检查单号:%s ", tracking_detail.tracking_num)
                 try:
                     driver.get(
                         "https://my.51tracking.com/numbers.php?lang=cn&keywordType=trackNumber&p=1&searchnumber="
                         + tracking_detail.tracking_num)
                     driver.maximize_window()
                     time.sleep(3)
                     # driver.find_element_by_class_name("show_lastEvent").click()
                     driver.find_element_by_id('trackItem_0').click()
                     time.sleep(1)
                     snapshot = SnapshotService.snapshot_tracking(driver, tracking_detail)
                     url = "https://my.51tracking.com/data/data-numbers.php?lang=cn&action=get_my_number" \
                           "&source=2&where=lang%3Dcn%26p%3D1%26keywordType%3DtrackNumber%26searchnumber%3D" \
                           + tracking_detail.tracking_num + "&page=1"
                     driver.get(url)
                     json_data = driver.find_element_by_tag_name("body").text
                     json_obj = json.loads(str(json_data))
                     status = json_obj['data'][0]['track_status']
                     tracking_detail.des = status_dict[status]
                     tracking_detail.end_time = datetime.datetime.now()
                     tracking_detail.url = ""
                     tracking_detail.snapshot = snapshot
                     if status in normal_status_dict:
                         logger.info("单号巡检状态:%s", status)
                         tracking_detail.result = "true"
                     else:
                         tracking_detail.result = "false"
                     tracking_dao.update(tracking_detail)
                 except Exception as e:
                     logger.error(e)
                     tracking_detail.result = "false"
                     tracking_detail.des = "检测疑似异常,建议手动验证!"
                     tracking_detail.end_time = datetime.datetime.now()
                     tracking_detail.url = ""
                     tracking_detail.snapshot = ""
                     tracking_dao.update(tracking_detail)
                     time.sleep(600)
         except Exception as e:
             logger.error(e)
             tracking_detail.result = "false"
             tracking_detail.des = "检测疑似异常,建议手动验证!"
             tracking_detail.end_time = datetime.datetime.now()
             tracking_detail.url = ""
             tracking_detail.snapshot = ""
             tracking_dao.update(tracking_detail)
         finally:
             driver.quit()
     else:
         logger.info("单号任务没有需要检索的单号,任务id:%s,单号状态: %s", task_id, status)
         gl.set_value('STATUS', False)
         gl.set_value('TRACKING_STATUS', False)
     ims_api.done_tracking(task_id)
     gl.set_value('STATUS', False)
     gl.set_value('TRACKING_STATUS', False)
    def monitor(task_id, status):
        ims_api = ImsApi()
        tracking_dao = TrackingDetailDao()
        strategy_service = StrategyService()
        strategy = strategy_service.get_strategy()
        tracking_details = tracking_dao.get_by_task(task_id, status)
        if tracking_details.__len__() > 0:
            for tracking_detail in tracking_details:
                if gl.get_value('TRACKING_STATUS'):
                    pass
                else:
                    logger.info("快递单任务已停止,任务id:%s", task_id)
                    gl.set_value('STATUS', False)
                    gl.set_value('TRACKING_STATUS', False)
                    ims_api.done_tracking(task_id)
                    return
                if strategy.frequency == 0 or strategy.frequency is None:
                    logger.info("未设置爬取频率限制,继续执行任务..")
                else:
                    logger.info("爬取频率限制为:%s 秒", strategy.frequency)
                    time.sleep(strategy.frequency)
                random_seconds = random.randint(10, 15)
                logger.info("快递单检测随机等待 %s 秒...", str(random_seconds))
                time.sleep(random_seconds)
                tracking_detail.start_time = datetime.datetime.now()
                tracking_detail.status = "done"
                logger.info("准备检查单号:%s ", tracking_detail.tracking_num)
                url = "https://www.trackingmore.com/cn/" + tracking_detail.tracking_num
                logger.info("url:%s ", url)
                driver = WebDriver.get_phantomjs()
                try:
                    driver.get(url)
                except Exception as e:
                    logger.error(e)
                    tracking_detail.result = "true"
                    tracking_detail.des = "检测超时,建议手动验证:" + url
                    tracking_detail.end_time = datetime.datetime.now()
                    tracking_detail.url = url
                    tracking_detail.snapshot = ""
                    tracking_dao.update(tracking_detail)
                    logger.info("单号巡检发生异常,跳过")
                    driver.quit()
                    continue

                try:
                    source = driver.page_source
                    soup = BeautifulSoup(source, 'html.parser')
                    snapshot = SnapshotService.snapshot_tracking(
                        driver, tracking_detail)
                    a_tags = soup.find_all("a", attrs={'class': 'ulliselect'})
                    has_tracking = False
                    if a_tags.__len__() > 0:
                        for a_tag in a_tags:
                            if a_tag.get_text().strip(
                            ) == tracking_detail.tracking_name:
                                has_tracking = True
                                url = "http:" + a_tag.get("href")
                                driver.get(url)
                                snapshot = SnapshotService.snapshot_tracking(
                                    driver, tracking_detail)
                                try:
                                    source = driver.page_source
                                    soup = BeautifulSoup(source, 'html.parser')
                                    items = soup.find_all(
                                        attrs={
                                            'class': 'line-gutter-backdrop'
                                        })
                                    # 异常为0
                                    if items.__len__() != 0:
                                        tracking_detail.result = "false"
                                        tracking_detail.des = "爬虫请求疑似被拦截,建议手动验证!"
                                        tracking_detail.end_time = datetime.datetime.now(
                                        )
                                        tracking_detail.url = url
                                        tracking_detail.snapshot = snapshot
                                    else:
                                        soup = BeautifulSoup(
                                            source, 'html.parser')
                                        item_length = soup.find_all(
                                            "li",
                                            attrs={
                                                'class': 's-packStatst'
                                            }).__len__()
                                        if item_length > 0:
                                            tracking_detail.result = "true"
                                            tracking_detail.des = "物流正常"
                                            tracking_detail.end_time = datetime.datetime.now(
                                            )
                                            tracking_detail.url = url
                                            tracking_detail.snapshot = snapshot
                                        else:
                                            tracking_detail.result = "false"
                                            tracking_detail.des = "没有查询到物流信息"
                                            tracking_detail.end_time = datetime.datetime.now(
                                            )
                                            tracking_detail.url = url
                                            tracking_detail.snapshot = snapshot
                                except Exception as e:
                                    print(e)
                                    # 正常
                                    tracking_detail.result = "false"
                                    tracking_detail.des = "检测疑似异常,建议手动验证!"
                                    tracking_detail.end_time = datetime.datetime.now(
                                    )
                                    tracking_detail.url = url
                                    tracking_detail.snapshot = snapshot
                                break
                            else:
                                continue
                        if not has_tracking:
                            tracking_detail.result = "false"
                            tracking_detail.des = "提供的单号-快递公司关系疑似不匹配"
                            tracking_detail.end_time = datetime.datetime.now()
                            tracking_detail.url = url
                            tracking_detail.snapshot = snapshot

                    else:
                        item_length = soup.find_all(
                            "dd", attrs={'class': 'post_message'})
                        if item_length.__len__() > 0:
                            tracking_detail.result = "true"
                            tracking_detail.des = "巡检正常"
                            tracking_detail.end_time = datetime.datetime.now()
                            tracking_detail.url = url
                            tracking_detail.snapshot = snapshot
                        else:
                            tracking_detail.result = "false"
                            tracking_detail.des = "没有查询物流信息"
                            tracking_detail.end_time = datetime.datetime.now()
                            tracking_detail.url = url
                            tracking_detail.snapshot = snapshot
                    tracking_dao.update(tracking_detail)
                except Exception as e:
                    logger.error(e)
                    tracking_detail.result = "false"
                    tracking_detail.des = "检测疑似异常,建议手动验证!"
                    tracking_detail.end_time = datetime.datetime.now()
                    tracking_detail.url = url
                    tracking_detail.snapshot = ""
                    tracking_dao.update(tracking_detail)
                finally:
                    driver.quit()
            else:
                logger.info("单号任务没有需要检索的单号,任务id:%s,单号状态: %s", task_id, status)
                gl.set_value('STATUS', False)
                gl.set_value('TRACKING_STATUS', False)
            ims_api.done_tracking(task_id)
            gl.set_value('STATUS', False)
            gl.set_value('TRACKING_STATUS', False)