Beispiel #1
0
def sync_spiders():
    '''
    sync spiders
    :return:
    '''
    for project in Project.query.all():
        spider_instance_list = agent.get_spider_list(project)
        SpiderInstance.update_spider_instances(spider_instance_list)
Beispiel #2
0
def sync_spiders():
    '''
    sync spiders
    :return:
    '''
    for project in Project.query.all():
        spider_instance_list = agent.get_spider_list(project)
        SpiderInstance.update_spider_instances(project.id, spider_instance_list)
    app.logger.debug('[sync_spiders]')
Beispiel #3
0
def sync_spiders():
    '''
    sync spiders
    :return:
    '''
    for project in Project.query.all():
        spider_instance_list = agent.get_spider_list(project)
        SpiderInstance.update_spider_instances(project.id, spider_instance_list)
    app.logger.debug('[sync_spiders]')
Beispiel #4
0
 def get_spider_list(self, project_name):
     data = request("get", self._scrapyd_url() + "/listspiders.json?project=%s" % project_name,
                    return_type="json")
     result = []
     if data and data['status'] == 'ok':
         for spider_name in data['spiders']:
             spider_instance = SpiderInstance()
             spider_instance.spider_name = spider_name
             result.append(spider_instance)
     return result
Beispiel #5
0
def sync_spiders():
    """
    每隔10s同步scrapyd上的爬虫到数据库
    sync spiders
    :return:
    """
    # 遍历所有的工程
    for project in Project.query.all():
        # 通过工程名获取scrapyd上的爬虫列表
        spider_instance_list = agent.get_spider_list(project)
        SpiderInstance.update_spider_instances(project.id, spider_instance_list)
    app.logger.debug('[同步scrapyd上的蜘蛛到系统数据库]')
Beispiel #6
0
def sync_spiders():
    """
    每隔10s同步scrapyd上的爬虫到数据库
    sync spiders
    :return:
    """
    # 遍历所有的工程
    db.session.commit()
    allprojects = db.session.query(Project).all()
    for project in allprojects:
        # 通过工程名获取scrapyd上的爬虫列表
        spider_instance_list = agent.get_spider_list(project)
        SpiderInstance.update_spider_instances(project.id, spider_instance_list)
    app.logger.debug('[同步scrapyd上的蜘蛛到系统数据库]')
    def get_spider_list(self, project):
        """
        功能: 获取指定工程下的爬虫列表
        :param project: project对象
        :return: 主爬虫的spider对象
        """
        spider_instance_list_slave = []
        spider_instance_list_master = []
        # 获取主爬虫服务器scrapyd上工程名对应的所有spider实例对象
        for spider_service_instance in self.spider_service_instances_master:
            if project.project_name in spider_service_instance.list_projects():
                spider_instance_list_master = \
                    spider_service_instance.get_spider_list(project.project_name)
                break
        # 获取从爬虫服务器scrapyd上工程名对应的所有spider实例对象
        for spider_service_instance in self.spider_service_instances_slave:
            # 如果工程在某个scrapyd服务器上
            if project.project_name in spider_service_instance.list_projects():
                # 取出该服务器上某个工程名下的所有从爬虫
                spider_instance_list_slave = \
                    spider_service_instance.get_slave_spider_list(project.project_name)
                break

        if project.is_msd == '0':  # 如果是单机爬虫
            spider_instance_single_list = []
            for spider_name in spider_instance_list_slave:  # 轮询从爬虫名字
                spider_instance_single = SpiderInstance()  # 建立蜘蛛实例
                spider_instance_single.spider_name = spider_name  # 获取蜘蛛名称
                spider_instance_single.project_id = project.id  # 获取蜘蛛对应的项目ID
                spider_instance_single_list.append(
                    spider_instance_single)  # 将蜘蛛实例追加到列表中
            return spider_instance_single_list

        else:
            # 判断从爬虫服务器有该工程的爬虫
            if spider_instance_list_slave:
                for spider_instance, slave_spider_name in zip(
                        spider_instance_list_master,
                        spider_instance_list_slave):
                    # 给每个spider_instance的project.id赋值
                    spider_instance.project_id = project.id
                    spider_instance.spider_name_slave = slave_spider_name
                return spider_instance_list_master
            # 判断从爬虫服务器没有该工程的爬虫
            else:
                for spider_instance in spider_instance_list_master:
                    # 给每个spider_instance的project.id赋值
                    spider_instance.project_id = project.id
                return spider_instance_list_master
Beispiel #8
0
 def get_spider_list(self, project_name):
     """
     功能: 获取指定工程下的所有爬虫名称列表
     :param project_name: 工程名称
     :return: 返回爬虫实例对象列表
     """
     try:
         data = self.scrapyd_api.list_spiders(
             project_name)  # 列出指定工程下所有的爬虫名称
         result = []
         if data:
             for spider_name in data:
                 spider_instance = SpiderInstance()
                 spider_instance.spider_name = spider_name
                 result.append(spider_instance)
         return result
     except:
         return []
Beispiel #9
0
def spider_dashboard(project_id):
    spider_instance_list = SpiderInstance.list_spiders(project_id)
    return render_template("spider_dashboard.html",
                           spider_instance_list=spider_instance_list)
def spider_deploy(project_id):
    project = Project.find_project_by_id(project_id)
    spider_instance_list = agent.get_spider_list(project)
    SpiderInstance.update_spider_instances(spider_instance_list)
    return render_template("spider_deploy.html")