Exemple #1
0
 def get_pending_task(batch_num):
     agent_name = os.environ['agent_name']
     job = os.environ['job']
     if job == "bc":
         task_pools = session.query(TaskItem).filter(TaskItem.batch_num == batch_num).filter(
             TaskItem.status == 'pending', TaskItem.type == 'bc')
     else:
         task_pools = session.query(TaskItem).filter(TaskItem.batch_num == batch_num).filter(
             TaskItem.status == 'pending', TaskItem.type != 'bc')
     if task_pools.count() == 0:
         logger.info("本Agent没有待巡检任务,Agent切换为waiting状态: %s", agent_name)
         #  没有pending状态的任务
         gl.set_value('STATUS', False)
         return None, None
     else:
         logger.info("%s 准备执行可以处理的任务,倒数第:%s 个...", agent_name, str(task_pools.count()))
     task_pool = task_pools.first()
     session.query(TaskItem).filter(TaskItem.id == task_pool.id).update({"status": "processing"})
     if task_pool.type == "weburl":
         logger.info("task_pool.website_id:%s", task_pool.website_id)
         weburl = session.query(Weburl).filter(Weburl.url == task_pool.url).filter(
             Weburl.website_id == task_pool.website_id).all()
         if len(weburl):
             return weburl[0], task_pool
         else:
             logger.info("task_pool.website_id:%s", task_pool.website_id)
             logger.info("task_pool.id:%s", task_pool.id)
             session.query(TaskItem).filter(TaskItem.id == task_pool.id).update({"status": "done"})
             return None, None
     else:
         website = session.query(Website).filter(Website.id == task_pool.website_id).one()
         return website, task_pool
Exemple #2
0
def stop():
    job = os.environ['job']
    if job == "gather":
        logger.info("My Job is gather,ignore the order!")
    else:
        gl.set_value('STATUS', False)
        ims_api.heartbeat()
        return 'SUCCESS'
Exemple #3
0
def stop_tracking():
    job = os.environ['job']
    if job == "tracking":
        gl.set_value('STATUS', False)
        gl.set_value('TRACKING_STATUS', False)
        ims_api.heartbeat()
    else:
        logger.info("My Job is tracking,ignore the order!")
    return 'SUCCESS'
Exemple #4
0
def tracking_execute():
    job = os.environ['job']
    if job == "tracking":
        # 重启selenium
        stop_selenium()
        stop_chrome()
        gl.set_value('STATUS', True)
        gl.set_value('TRACKING_STATUS', True)
        ims_api.heartbeat()
        try:
            task_id = request.form['taskId']
            status = request.form['status']
            logger.info("tracking begin task_id: %s,status: %s" % (str(task_id), str(status)))
            t = threading.Thread(target=inspect_tracking, args=(task_id, status))
            t.setDaemon(True)
            t.start()
            return 'OK'
        except Exception as e:
            logger.error(e)
    else:
        logger.info("Tracking is not my job!")
        return 'OK'
Exemple #5
0
def gather_urls():
    logger.info("receive gather_urls req !")
    job = os.environ['job']
    if job == "gather":
        try:
            # task_id = request.form['taskId']
            gl.set_value('STATUS', True)
            weburl_service = WeburlService()
            website_id = request.form['websiteId']
            if website_id == 'all':
                website_id = None
            else:
                pass
            weburl_service.gather_urls_by_website(website_id)
            gl.set_value('STATUS', False)
            return 'SUCCESS'
        except KeyError as e:
            print(e)
            return 'ERROR'
    else:
        logger.info("Gather is not my job!")
        return 'SUCCESS'
Exemple #6
0
def execute():
    logger.info("receive execute req !")
    job = os.environ['job']
    if job == "bc" or job == "other":
        # 重启selenium
        logger.info("restart  selenium...")
        stop_selenium()
        start_selenium()
        logger.info("update status...")
        gl.set_value('STATUS', True)
        logger.info("heartbeat...")
        ims_api.heartbeat()
        try:
            batch_num = request.form['batchNum']
            logger.info("spider begin batchNum: %s" % str(batch_num))
            t = threading.Thread(target=inspect, args=(batch_num,))
            t.setDaemon(True)
            t.start()
            return 'OK'
        except Exception as e:
            logger.error(e)
    else:
        logger.info("spider is not my job!")
        return 'OK'
 def monitor(task_id, status):
     ims_api = ImsApi()
     tracking_dao = TrackingDetailDao()
     status_dict = {'0': '查询中', '1': '查询不到', '2': '运输途中', '3': '到达待取', '4': '成功签收', '5': '运输过久',
                    '6': '投递失败', '7': '可能异常'}
     normal_status_dict = {'0': '查询中', '1': '查询不到', '2': '运输途中', '3': '到达待取', '4': '成功签收', '5': '运输过久'}
     tracking_details = tracking_dao.get_by_task(task_id, status)
     if tracking_details.__len__() > 0:
         try:
             driver = WebDriver.get_chrome()
             driver.get("https://www.trackingmore.com/login-cn.html")
             driver.find_element_by_id("email").send_keys("*****@*****.**")
             driver.find_element_by_id("password").send_keys("0418YXYwlx")
             driver.find_element_by_id("login_test").click()
             time.sleep(5)
             for tracking_detail in tracking_details:
                 if gl.get_value('TRACKING_STATUS'):
                     pass
                 else:
                     logger.info("快递单任务已停止,任务id:%s", task_id)
                     gl.set_value('STATUS', False)
                     gl.set_value('TRACKING_STATUS', False)
                     ims_api.done_tracking(task_id)
                     return
                 tracking_detail.start_time = datetime.datetime.now()
                 tracking_detail.status = "done"
                 logger.info("准备检查单号:%s ", tracking_detail.tracking_num)
                 try:
                     driver.get(
                         "https://my.51tracking.com/numbers.php?lang=cn&keywordType=trackNumber&p=1&searchnumber="
                         + tracking_detail.tracking_num)
                     driver.maximize_window()
                     time.sleep(3)
                     # driver.find_element_by_class_name("show_lastEvent").click()
                     driver.find_element_by_id('trackItem_0').click()
                     time.sleep(1)
                     snapshot = SnapshotService.snapshot_tracking(driver, tracking_detail)
                     url = "https://my.51tracking.com/data/data-numbers.php?lang=cn&action=get_my_number" \
                           "&source=2&where=lang%3Dcn%26p%3D1%26keywordType%3DtrackNumber%26searchnumber%3D" \
                           + tracking_detail.tracking_num + "&page=1"
                     driver.get(url)
                     json_data = driver.find_element_by_tag_name("body").text
                     json_obj = json.loads(str(json_data))
                     status = json_obj['data'][0]['track_status']
                     tracking_detail.des = status_dict[status]
                     tracking_detail.end_time = datetime.datetime.now()
                     tracking_detail.url = ""
                     tracking_detail.snapshot = snapshot
                     if status in normal_status_dict:
                         logger.info("单号巡检状态:%s", status)
                         tracking_detail.result = "true"
                     else:
                         tracking_detail.result = "false"
                     tracking_dao.update(tracking_detail)
                 except Exception as e:
                     logger.error(e)
                     tracking_detail.result = "false"
                     tracking_detail.des = "检测疑似异常,建议手动验证!"
                     tracking_detail.end_time = datetime.datetime.now()
                     tracking_detail.url = ""
                     tracking_detail.snapshot = ""
                     tracking_dao.update(tracking_detail)
                     time.sleep(600)
         except Exception as e:
             logger.error(e)
             tracking_detail.result = "false"
             tracking_detail.des = "检测疑似异常,建议手动验证!"
             tracking_detail.end_time = datetime.datetime.now()
             tracking_detail.url = ""
             tracking_detail.snapshot = ""
             tracking_dao.update(tracking_detail)
         finally:
             driver.quit()
     else:
         logger.info("单号任务没有需要检索的单号,任务id:%s,单号状态: %s", task_id, status)
         gl.set_value('STATUS', False)
         gl.set_value('TRACKING_STATUS', False)
     ims_api.done_tracking(task_id)
     gl.set_value('STATUS', False)
     gl.set_value('TRACKING_STATUS', False)
    def monitor(task_id, status):
        ims_api = ImsApi()
        tracking_dao = TrackingDetailDao()
        strategy_service = StrategyService()
        strategy = strategy_service.get_strategy()
        tracking_details = tracking_dao.get_by_task(task_id, status)
        if tracking_details.__len__() > 0:
            for tracking_detail in tracking_details:
                if gl.get_value('TRACKING_STATUS'):
                    pass
                else:
                    logger.info("快递单任务已停止,任务id:%s", task_id)
                    gl.set_value('STATUS', False)
                    gl.set_value('TRACKING_STATUS', False)
                    ims_api.done_tracking(task_id)
                    return
                if strategy.frequency == 0 or strategy.frequency is None:
                    logger.info("未设置爬取频率限制,继续执行任务..")
                else:
                    logger.info("爬取频率限制为:%s 秒", strategy.frequency)
                    time.sleep(strategy.frequency)
                random_seconds = random.randint(10, 15)
                logger.info("快递单检测随机等待 %s 秒...", str(random_seconds))
                time.sleep(random_seconds)
                tracking_detail.start_time = datetime.datetime.now()
                tracking_detail.status = "done"
                logger.info("准备检查单号:%s ", tracking_detail.tracking_num)
                url = "https://www.trackingmore.com/cn/" + tracking_detail.tracking_num
                logger.info("url:%s ", url)
                driver = WebDriver.get_phantomjs()
                try:
                    driver.get(url)
                except Exception as e:
                    logger.error(e)
                    tracking_detail.result = "true"
                    tracking_detail.des = "检测超时,建议手动验证:" + url
                    tracking_detail.end_time = datetime.datetime.now()
                    tracking_detail.url = url
                    tracking_detail.snapshot = ""
                    tracking_dao.update(tracking_detail)
                    logger.info("单号巡检发生异常,跳过")
                    driver.quit()
                    continue

                try:
                    source = driver.page_source
                    soup = BeautifulSoup(source, 'html.parser')
                    snapshot = SnapshotService.snapshot_tracking(
                        driver, tracking_detail)
                    a_tags = soup.find_all("a", attrs={'class': 'ulliselect'})
                    has_tracking = False
                    if a_tags.__len__() > 0:
                        for a_tag in a_tags:
                            if a_tag.get_text().strip(
                            ) == tracking_detail.tracking_name:
                                has_tracking = True
                                url = "http:" + a_tag.get("href")
                                driver.get(url)
                                snapshot = SnapshotService.snapshot_tracking(
                                    driver, tracking_detail)
                                try:
                                    source = driver.page_source
                                    soup = BeautifulSoup(source, 'html.parser')
                                    items = soup.find_all(
                                        attrs={
                                            'class': 'line-gutter-backdrop'
                                        })
                                    # 异常为0
                                    if items.__len__() != 0:
                                        tracking_detail.result = "false"
                                        tracking_detail.des = "爬虫请求疑似被拦截,建议手动验证!"
                                        tracking_detail.end_time = datetime.datetime.now(
                                        )
                                        tracking_detail.url = url
                                        tracking_detail.snapshot = snapshot
                                    else:
                                        soup = BeautifulSoup(
                                            source, 'html.parser')
                                        item_length = soup.find_all(
                                            "li",
                                            attrs={
                                                'class': 's-packStatst'
                                            }).__len__()
                                        if item_length > 0:
                                            tracking_detail.result = "true"
                                            tracking_detail.des = "物流正常"
                                            tracking_detail.end_time = datetime.datetime.now(
                                            )
                                            tracking_detail.url = url
                                            tracking_detail.snapshot = snapshot
                                        else:
                                            tracking_detail.result = "false"
                                            tracking_detail.des = "没有查询到物流信息"
                                            tracking_detail.end_time = datetime.datetime.now(
                                            )
                                            tracking_detail.url = url
                                            tracking_detail.snapshot = snapshot
                                except Exception as e:
                                    print(e)
                                    # 正常
                                    tracking_detail.result = "false"
                                    tracking_detail.des = "检测疑似异常,建议手动验证!"
                                    tracking_detail.end_time = datetime.datetime.now(
                                    )
                                    tracking_detail.url = url
                                    tracking_detail.snapshot = snapshot
                                break
                            else:
                                continue
                        if not has_tracking:
                            tracking_detail.result = "false"
                            tracking_detail.des = "提供的单号-快递公司关系疑似不匹配"
                            tracking_detail.end_time = datetime.datetime.now()
                            tracking_detail.url = url
                            tracking_detail.snapshot = snapshot

                    else:
                        item_length = soup.find_all(
                            "dd", attrs={'class': 'post_message'})
                        if item_length.__len__() > 0:
                            tracking_detail.result = "true"
                            tracking_detail.des = "巡检正常"
                            tracking_detail.end_time = datetime.datetime.now()
                            tracking_detail.url = url
                            tracking_detail.snapshot = snapshot
                        else:
                            tracking_detail.result = "false"
                            tracking_detail.des = "没有查询物流信息"
                            tracking_detail.end_time = datetime.datetime.now()
                            tracking_detail.url = url
                            tracking_detail.snapshot = snapshot
                    tracking_dao.update(tracking_detail)
                except Exception as e:
                    logger.error(e)
                    tracking_detail.result = "false"
                    tracking_detail.des = "检测疑似异常,建议手动验证!"
                    tracking_detail.end_time = datetime.datetime.now()
                    tracking_detail.url = url
                    tracking_detail.snapshot = ""
                    tracking_dao.update(tracking_detail)
                finally:
                    driver.quit()
            else:
                logger.info("单号任务没有需要检索的单号,任务id:%s,单号状态: %s", task_id, status)
                gl.set_value('STATUS', False)
                gl.set_value('TRACKING_STATUS', False)
            ims_api.done_tracking(task_id)
            gl.set_value('STATUS', False)
            gl.set_value('TRACKING_STATUS', False)
Exemple #9
0
from flask import request

import config.global_val as gl
from config.mylog import logger
from dao.tracking_task_dao import TrackingTaskDao
from manager.gather_center import GatherCenter
from manager.ims_api import ImsApi
from service.monitor_bc_service import MonitorBcService
from service.monitor_tracking_service import MonitorTrackingService
from service.weburl_service import WeburlService

app = Flask(__name__)
gl._init()

# 定义跨模块全局变量
gl.set_value('STATUS', False)
gl.set_value('TRACKING_STATUS', False)
ims_api = ImsApi()


@app.route('/verify_cookie', methods=['POST'])
def verify_cookie():
    monitor_bc_service = MonitorBcService()
    # return monitor_bc_service.check_cookie()
    return "SUCCESS"


'''
爬取数据
'''