Beispiel #1
0
 def get_spider_list(self, project_name):
     data = request("get", self._scrapyd_url() + "/listspiders.json?project=%s" % project_name,
                    return_type="json")
     result = []
     if data and data['status'] == 'ok':
         for spider_name in data['spiders']:
             spider_instance = SpiderInstance()
             spider_instance.spider_name = spider_name
             result.append(spider_instance)
     return result
    def get_spider_list(self, project):
        """
        功能: 获取指定工程下的爬虫列表
        :param project: project对象
        :return: 主爬虫的spider对象
        """
        spider_instance_list_slave = []
        spider_instance_list_master = []
        # 获取主爬虫服务器scrapyd上工程名对应的所有spider实例对象
        for spider_service_instance in self.spider_service_instances_master:
            if project.project_name in spider_service_instance.list_projects():
                spider_instance_list_master = \
                    spider_service_instance.get_spider_list(project.project_name)
                break
        # 获取从爬虫服务器scrapyd上工程名对应的所有spider实例对象
        for spider_service_instance in self.spider_service_instances_slave:
            # 如果工程在某个scrapyd服务器上
            if project.project_name in spider_service_instance.list_projects():
                # 取出该服务器上某个工程名下的所有从爬虫
                spider_instance_list_slave = \
                    spider_service_instance.get_slave_spider_list(project.project_name)
                break

        if project.is_msd == '0':  # 如果是单机爬虫
            spider_instance_single_list = []
            for spider_name in spider_instance_list_slave:  # 轮询从爬虫名字
                spider_instance_single = SpiderInstance()  # 建立蜘蛛实例
                spider_instance_single.spider_name = spider_name  # 获取蜘蛛名称
                spider_instance_single.project_id = project.id  # 获取蜘蛛对应的项目ID
                spider_instance_single_list.append(
                    spider_instance_single)  # 将蜘蛛实例追加到列表中
            return spider_instance_single_list

        else:
            # 判断从爬虫服务器有该工程的爬虫
            if spider_instance_list_slave:
                for spider_instance, slave_spider_name in zip(
                        spider_instance_list_master,
                        spider_instance_list_slave):
                    # 给每个spider_instance的project.id赋值
                    spider_instance.project_id = project.id
                    spider_instance.spider_name_slave = slave_spider_name
                return spider_instance_list_master
            # 判断从爬虫服务器没有该工程的爬虫
            else:
                for spider_instance in spider_instance_list_master:
                    # 给每个spider_instance的project.id赋值
                    spider_instance.project_id = project.id
                return spider_instance_list_master
Beispiel #3
0
 def get_spider_list(self, project_name):
     """
     功能: 获取指定工程下的所有爬虫名称列表
     :param project_name: 工程名称
     :return: 返回爬虫实例对象列表
     """
     try:
         data = self.scrapyd_api.list_spiders(
             project_name)  # 列出指定工程下所有的爬虫名称
         result = []
         if data:
             for spider_name in data:
                 spider_instance = SpiderInstance()
                 spider_instance.spider_name = spider_name
                 result.append(spider_instance)
         return result
     except:
         return []