def inspect(batch_num): spider_manager = GatherCenter() while gl.get_value('STATUS'): logger.info("inspect task start! batch_num:%s" % str(batch_num)) spider_manager.gather(batch_num) logger.info("inspect task end! batch_num:%s" % str(batch_num)) logger.info("batchNum inspect task end! batch_num:%s" % str(batch_num))
def inspect_tracking(task_id, status): tracking_service = MonitorTrackingService() while gl.get_value('TRACKING_STATUS'): logger.info("tracking task start! task_id:%s" % str(task_id)) tracking_service.monitor(task_id, status) logger.info("tracking task end! task_id:%s" % str(task_id)) task_dao = TrackingTaskDao() task_dao.close_task(task_id) logger.info("tracking task end! task_id:%s" % str(task_id))
def heartbeat(): try: agent_name = os.environ['agent_name'] hostname = socket.gethostname() ip = socket.gethostbyname(hostname) url = ims_rest_base + "open/api/v1/agent/heartbeat" status = gl.get_value('STATUS') data_json = {"ip": ip, "status": status, "job": agent_name} data = bytes(parse.urlencode(data_json), encoding="utf8") new_url = request.Request(url, data) request.urlopen(new_url) except Exception as e: logger.info(e) logger.info("heartbeat fail")
def monitor(task_id, status): ims_api = ImsApi() tracking_dao = TrackingDetailDao() status_dict = {'0': '查询中', '1': '查询不到', '2': '运输途中', '3': '到达待取', '4': '成功签收', '5': '运输过久', '6': '投递失败', '7': '可能异常'} normal_status_dict = {'0': '查询中', '1': '查询不到', '2': '运输途中', '3': '到达待取', '4': '成功签收', '5': '运输过久'} tracking_details = tracking_dao.get_by_task(task_id, status) if tracking_details.__len__() > 0: try: driver = WebDriver.get_chrome() driver.get("https://www.trackingmore.com/login-cn.html") driver.find_element_by_id("email").send_keys("*****@*****.**") driver.find_element_by_id("password").send_keys("0418YXYwlx") driver.find_element_by_id("login_test").click() time.sleep(5) for tracking_detail in tracking_details: if gl.get_value('TRACKING_STATUS'): pass else: logger.info("快递单任务已停止,任务id:%s", task_id) gl.set_value('STATUS', False) gl.set_value('TRACKING_STATUS', False) ims_api.done_tracking(task_id) return tracking_detail.start_time = datetime.datetime.now() tracking_detail.status = "done" logger.info("准备检查单号:%s ", tracking_detail.tracking_num) try: driver.get( "https://my.51tracking.com/numbers.php?lang=cn&keywordType=trackNumber&p=1&searchnumber=" + tracking_detail.tracking_num) driver.maximize_window() time.sleep(3) # driver.find_element_by_class_name("show_lastEvent").click() driver.find_element_by_id('trackItem_0').click() time.sleep(1) snapshot = SnapshotService.snapshot_tracking(driver, tracking_detail) url = "https://my.51tracking.com/data/data-numbers.php?lang=cn&action=get_my_number" \ "&source=2&where=lang%3Dcn%26p%3D1%26keywordType%3DtrackNumber%26searchnumber%3D" \ + tracking_detail.tracking_num + "&page=1" driver.get(url) json_data = driver.find_element_by_tag_name("body").text json_obj = json.loads(str(json_data)) status = json_obj['data'][0]['track_status'] tracking_detail.des = status_dict[status] tracking_detail.end_time = datetime.datetime.now() tracking_detail.url = "" tracking_detail.snapshot = snapshot if status in normal_status_dict: logger.info("单号巡检状态:%s", status) tracking_detail.result = "true" else: tracking_detail.result = "false" tracking_dao.update(tracking_detail) except Exception as e: logger.error(e) tracking_detail.result = "false" tracking_detail.des = "检测疑似异常,建议手动验证!" tracking_detail.end_time = datetime.datetime.now() tracking_detail.url = "" tracking_detail.snapshot = "" tracking_dao.update(tracking_detail) time.sleep(600) except Exception as e: logger.error(e) tracking_detail.result = "false" tracking_detail.des = "检测疑似异常,建议手动验证!" tracking_detail.end_time = datetime.datetime.now() tracking_detail.url = "" tracking_detail.snapshot = "" tracking_dao.update(tracking_detail) finally: driver.quit() else: logger.info("单号任务没有需要检索的单号,任务id:%s,单号状态: %s", task_id, status) gl.set_value('STATUS', False) gl.set_value('TRACKING_STATUS', False) ims_api.done_tracking(task_id) gl.set_value('STATUS', False) gl.set_value('TRACKING_STATUS', False)
def monitor(task_id, status): ims_api = ImsApi() tracking_dao = TrackingDetailDao() strategy_service = StrategyService() strategy = strategy_service.get_strategy() tracking_details = tracking_dao.get_by_task(task_id, status) if tracking_details.__len__() > 0: for tracking_detail in tracking_details: if gl.get_value('TRACKING_STATUS'): pass else: logger.info("快递单任务已停止,任务id:%s", task_id) gl.set_value('STATUS', False) gl.set_value('TRACKING_STATUS', False) ims_api.done_tracking(task_id) return if strategy.frequency == 0 or strategy.frequency is None: logger.info("未设置爬取频率限制,继续执行任务..") else: logger.info("爬取频率限制为:%s 秒", strategy.frequency) time.sleep(strategy.frequency) random_seconds = random.randint(10, 15) logger.info("快递单检测随机等待 %s 秒...", str(random_seconds)) time.sleep(random_seconds) tracking_detail.start_time = datetime.datetime.now() tracking_detail.status = "done" logger.info("准备检查单号:%s ", tracking_detail.tracking_num) url = "https://www.trackingmore.com/cn/" + tracking_detail.tracking_num logger.info("url:%s ", url) driver = WebDriver.get_phantomjs() try: driver.get(url) except Exception as e: logger.error(e) tracking_detail.result = "true" tracking_detail.des = "检测超时,建议手动验证:" + url tracking_detail.end_time = datetime.datetime.now() tracking_detail.url = url tracking_detail.snapshot = "" tracking_dao.update(tracking_detail) logger.info("单号巡检发生异常,跳过") driver.quit() continue try: source = driver.page_source soup = BeautifulSoup(source, 'html.parser') snapshot = SnapshotService.snapshot_tracking( driver, tracking_detail) a_tags = soup.find_all("a", attrs={'class': 'ulliselect'}) has_tracking = False if a_tags.__len__() > 0: for a_tag in a_tags: if a_tag.get_text().strip( ) == tracking_detail.tracking_name: has_tracking = True url = "http:" + a_tag.get("href") driver.get(url) snapshot = SnapshotService.snapshot_tracking( driver, tracking_detail) try: source = driver.page_source soup = BeautifulSoup(source, 'html.parser') items = soup.find_all( attrs={ 'class': 'line-gutter-backdrop' }) # 异常为0 if items.__len__() != 0: tracking_detail.result = "false" tracking_detail.des = "爬虫请求疑似被拦截,建议手动验证!" tracking_detail.end_time = datetime.datetime.now( ) tracking_detail.url = url tracking_detail.snapshot = snapshot else: soup = BeautifulSoup( source, 'html.parser') item_length = soup.find_all( "li", attrs={ 'class': 's-packStatst' }).__len__() if item_length > 0: tracking_detail.result = "true" tracking_detail.des = "物流正常" tracking_detail.end_time = datetime.datetime.now( ) tracking_detail.url = url tracking_detail.snapshot = snapshot else: tracking_detail.result = "false" tracking_detail.des = "没有查询到物流信息" tracking_detail.end_time = datetime.datetime.now( ) tracking_detail.url = url tracking_detail.snapshot = snapshot except Exception as e: print(e) # 正常 tracking_detail.result = "false" tracking_detail.des = "检测疑似异常,建议手动验证!" tracking_detail.end_time = datetime.datetime.now( ) tracking_detail.url = url tracking_detail.snapshot = snapshot break else: continue if not has_tracking: tracking_detail.result = "false" tracking_detail.des = "提供的单号-快递公司关系疑似不匹配" tracking_detail.end_time = datetime.datetime.now() tracking_detail.url = url tracking_detail.snapshot = snapshot else: item_length = soup.find_all( "dd", attrs={'class': 'post_message'}) if item_length.__len__() > 0: tracking_detail.result = "true" tracking_detail.des = "巡检正常" tracking_detail.end_time = datetime.datetime.now() tracking_detail.url = url tracking_detail.snapshot = snapshot else: tracking_detail.result = "false" tracking_detail.des = "没有查询物流信息" tracking_detail.end_time = datetime.datetime.now() tracking_detail.url = url tracking_detail.snapshot = snapshot tracking_dao.update(tracking_detail) except Exception as e: logger.error(e) tracking_detail.result = "false" tracking_detail.des = "检测疑似异常,建议手动验证!" tracking_detail.end_time = datetime.datetime.now() tracking_detail.url = url tracking_detail.snapshot = "" tracking_dao.update(tracking_detail) finally: driver.quit() else: logger.info("单号任务没有需要检索的单号,任务id:%s,单号状态: %s", task_id, status) gl.set_value('STATUS', False) gl.set_value('TRACKING_STATUS', False) ims_api.done_tracking(task_id) gl.set_value('STATUS', False) gl.set_value('TRACKING_STATUS', False)