def project_version(request, client_id, project_name): """ get project deploy version :param request: request object :param client_id: client id :param project_name: project name :return: deploy version of project """ if request.method == 'GET': # get client and project model client = CrawlNode.objects.get(id=client_id) project = CrawlProject.objects.get(name=project_name) engine = get_engine(client) # if deploy info exists in db, return it if CrawlDeploy.objects.filter(client=client, project=project): deploy = CrawlDeploy.objects.get(client=client, project=project) # if deploy info does not exists in db, create deploy info else: try: versions = engine.list_versions(project_name) except ConnectionError: return JsonResponse({'message': 'Connect Error'}, status=500) if len(versions) > 0: version = versions[-1] deployed_at = timezone.datetime.fromtimestamp( int(version), tz=pytz.timezone(TIME_ZONE)) else: deployed_at = None deploy, result = CrawlDeploy.objects.update_or_create( client=client, project=project, deployed_at=deployed_at) # return deploy json info return JsonResponse(model_to_dict(deploy))
def project_deploy(request, project_name): """ 发布爬虫工程 :param request: request object :param project_name: project name :return: json of deploy result """ if request.method == 'POST': path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER)) project_path = join(path, project_name) # 检索打包egg文件 egg = find_egg(project_path) if not egg: r = Result.success("没有打包文件") return JsonResponse(r) egg_file = open(join(project_path, egg), 'rb') data = json.loads(request.body.decode('utf-8')) node_ids = data["node_ids"] nodes = CrawlNode.objects.filter(id__in=node_ids) project = CrawlProject.objects.get(name=project_name) for node in nodes: engine = get_engine(node) engine.add_version(project_name, int(time.time()), egg_file.read()) deployed_at = timezone.now() CrawlDeploy.objects.filter( node_id=node.id, project_id=project.id).delete() # 这里逻辑删除 deploy, result = CrawlDeploy.objects.update_or_create( node_id=node.id, project_id=project.id, deployed_at=deployed_at, description=project.description) r = Result.success("") return JsonResponse(r)
def del_project(request, client_id, project): if request.method == 'GET': client = CrawlNode.objects.get(id=client_id) try: scrapyd = get_engine(client) result = scrapyd.delete_project(project=project) return JsonResponse(result) except ConnectionError: return JsonResponse({'message': 'Connect Error'})
def project_list(request, client_id): """ project deployed list on one client :param request: request object :param client_id: client id :return: json """ if request.method == 'GET': client = CrawlNode.objects.get(id=client_id) scrapyd = get_engine(client) try: projects = scrapyd.list_projects() return JsonResponse(projects) except ConnectionError: return JsonResponse({'message': 'Connect Error'}, status=500)
def project_list(request, node_id): """ 获取某个node节点上的爬虫工程 :param request: request object :param node_id: node_id :return: json """ if request.method == 'GET': client = CrawlNode.objects.get(id=node_id) engine = get_engine(client) try: projects = engine.list_projects() JsonResponse(Result.success(data=projects)) except ConnectionError: return JsonResponse(Result.fail())
def task_deploy(request, project_name): try: log_common.info('进入发布方法') work_path = os.getcwd() if request.method == 'GET': log_common.info('开始发布逻辑') path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER)) project_path = join(path, project_name) # 检索打包egg文件 egg = find_egg(project_path) if not egg: raise Exception('没有打包文件') egg_file = open(join(project_path, egg), 'rb') egg_file_content = egg_file.read() project = CrawlProject.objects.get(name=project_name, is_deleted=0) task = CrawlTask.objects.get(id=project.task_id) task.is_deploy = 1 task.save() for node_id in json.loads(task.node_ids): node = CrawlNode.objects.get(id=node_id) engine = get_engine(node) log_common.info('{}: 准备发布{}'.format(node.node_ip, project_name)) engine.add_version(project_name, int(time.time()), egg_file_content) log_common.info('{}: 发布成功{}'.format(node.node_ip, project_name)) # update deploy info deployed_at = timezone.now() CrawlDeploy.objects.filter( node_id=node.id, project_id=project.id).update(is_deleted=1) deploy, result = CrawlDeploy.objects.update_or_create( node_id=node.id, project_id=project.id, deployed_at=deployed_at, description=project.description) r = Result.success("") return JsonResponse(r) except Exception as e: import traceback log_common.error("task_deploy => ", e) log_common.error("task_deploy => {}".format(traceback.format_exc())) r = Result.fail(e) return JsonResponse(r) finally: os.chdir(work_path)
def spider_start(request, client_id, project_name, spider_name): """ 启动爬虫 :param request: request object :param client_id: client id :param project_name: project name :param spider_name: spider name :return: json """ if request.method == 'GET': client = CrawlNode.objects.get(id=client_id) scrapyd = get_engine(client) try: job = scrapyd.schedule(project_name, spider_name) return JsonResponse({'job': job}) except ConnectionError: return JsonResponse({'message': 'Connect Error'}, status=500)
def del_version(request, client_id, project, version): """ 删除指定版本的爬虫工程 :param request: :param client_id: :param project: :param version: :return: """ if request.method == 'GET': node = CrawlNode.objects.get(id=client_id) try: engine = get_engine(node) result = engine.delete_version(project=project, version=version) return JsonResponse(result) except ConnectionError: return JsonResponse({'message': 'Connect Error'})
def job_cancel(request, client_id, project_name, job_id): """ 取消停止一个爬虫 :param request: request object :param client_id: client id :param project_name: project name :param job_id: job id :return: json of cancel """ if request.method == 'GET': node = CrawlNode.objects.get(id=client_id) try: engine = get_engine(node) result = engine.cancel(project_name, job_id) return JsonResponse(result) except ConnectionError: return JsonResponse({'message': 'Connect Error'})
def spider_list(request, client_id, project_name): """ get spider list from one client :param request: request Object :param client_id: client id :param project_name: project name :return: json """ if request.method == 'GET': client = CrawlNode.objects.get(id=client_id) engine = get_engine(client) try: spiders = engine.list_spiders(project_name) spiders = [{ 'name': spider, 'id': index + 1 } for index, spider in enumerate(spiders)] return JsonResponse(spiders) except ConnectionError: return JsonResponse({'message': 'Connect Error'}, status=500)
def job_list(request, client_id, project_name): """ 获取某个节点上爬虫工程的任务列表 :param request: request object :param client_id: client id :param project_name: project name :return: list of jobs """ if request.method == 'GET': client = CrawlNode.objects.get(id=client_id) engine = get_engine(client) try: result = engine.list_jobs(project_name) jobs = [] statuses = ['pending', 'running', 'finished'] for status in statuses: for job in result.get(status): job['status'] = status jobs.append(job) return JsonResponse(jobs) except ConnectionError: return JsonResponse({'message': 'Connect Error'}, status=500)
def node_spider_list(request): """ 某个爬虫工程节点爬虫脚本分布列表 :param request: :return: """ if request.method == 'GET': project_id = request.GET.get("project_id") project_id = CrawlTask.objects.get(id=project_id).project_id deploys = CrawlDeploy.objects.filter(project_id=project_id) node_spiders = [] for deploy in deploys: node_spider = [] node = CrawlNode.objects.get(id=deploy.node_id) engine = get_engine(node) try: # spiders = engine.list_spiders(deploy.project_name) # 写入爬虫脚本表,这个根据性能需要考虑是否后台实时写入 # new_data = { # "name": "", # "desc": "", # "trigger": "", # "hosts": "", # "args": "", # "type": 1, # "project_id": project_id # } # CrawlScript.objects.create() scripts = CrawlScript.objects.filter(project_id=project_id) # spiders = [{'name': spider, 'id': index + 1} for index, spider in enumerate(spiders)] node_spider.append({"node": node, "scripts": scripts}) node_spiders.append(node_spider) except ConnectionError: return JsonResponse(Result.fail("{}爬虫节点不能提供服务".format(node.node_name))) r = Result.success(node_spiders) return JsonResponse(r)