def project_mapping(request): # 映射scrapyd的工程到记录表中 result = {'status': 1, 'msg': None, 'data': None} if request.method == 'POST': user = User.objects.get(username=request.session['username']) node = Node.objects.get(nid=request.POST.get('nid')) projects = request.POST.get('projects').split(',') scrapyd = scrapyd_obj(uri(node.ip, node.port)) if scrapyd: # 删除未部署的记录 for project in set(projects) - set(scrapyd.list_projects()): if project: # 删除工程 project_obj = Project.objects.get(name=project, node=node, user=user) project_obj.delete() # 删除作业 job_obj = Job.objects.filter(project=project, node=node.nid, user=user) job_obj.delete() # 映射部署到记录表 for project in scrapyd.list_projects(): Project.objects.get_or_create(name=project, node=node, user=user) for spider in scrapyd.list_spiders(project): job = Job.objects.get_or_create(name=spider, project=project, user=user, node=node.nid) job[0].status = 1 job[0].save() result['msg'] = '映射执行成功!' return Response(result)
def job_view(request): # 查看作业 result = {} if request.method == 'POST': data = request.POST.dict() nid, project, spider = data['data'].split(',') node = Node.objects.get(nid=nid) scrapyd = scrapyd_obj(uri(node.ip, node.port)) if scrapyd: jobs = scrapyd.list_jobs(project) spiders_list = [] for status, job in jobs.items(): if status == 'node_name': continue for j in job: if j['spider'] == spider: j['start_time'] = j['start_time'][:19] if 'start_time' in j else '' j['end_time'] = j['end_time'][:19] if 'end_time' in j else '' spiders_list.append(j) result['spiders'] = spiders_list result['project'] = project result['node'] = node.ip result['port'] = node.port result['nid'] = node.nid return Response(result)
def get_status(self, pk): # 节点状态 try: node = Node.objects.get(nid=pk) resp = requests.get(uri(node.ip, node.port), timeout=0.1) except: return 0 return 1 if resp.ok else 0
def log_view(request): result = {'status': 1, 'msg': None, 'data': None} data = request.GET.dict() node = Node.objects.get(nid=data['nid']) url = uri(node.ip, node.port) + '/logs/%s/%s/%s.log' % (data['project'], data['spider'], data['job']) head = requests.head(url) length = int(head.headers['Content-Length']) length = length if length < 10240 else 10240 response = requests.get(url, headers={'Range': 'bytes=-%s' % length}) txt = response.content.decode().split('\n')[-100:] result['data'] = ''.join(txt) return Response(result)
def get(self, request): per = 10 result = {'status': 1, 'msg': None, 'data': None} page = int(request.GET.get('page', 1)) # 当前用户所有spider user = User.objects.get(username=request.session['username']) spiders_list = [] projects_list = [] nodes_cache = {} jobs_cache = {} for job in Job.objects.filter(user=user): # 获取节点 if job.node in nodes_cache: node, scrapyd = nodes_cache[job.node] else: node = Node.objects.get(pk=job.node) scrapyd = scrapyd_obj(uri(node.ip, node.port)) nodes_cache[job.node] = (node, scrapyd) if job.status == 1: spiders_list.append( { 'node': job.node, 'project': job.project, 'spider': job.name, 'jid': job.jid, 'ip': node.ip, 'alive': job.alive } ) project_dict = {'ip': node.ip, 'project': job.project} if project_dict not in projects_list and job.status == 1: projects_list.append({'ip': node.ip, 'project': job.project}) result['data'] = {} result['data']['pages'] = get_pages(len(spiders_list), per) page = result['data']['pages'] if page >= result['data']['pages'] else page result['data']['spiders'] = spiders_list[(page - 1) * per:page * per] result['data']['projects'] = projects_list return Response(result)
def job_mapping(request): # 修改爬虫状态 result = {'status': 1, 'msg': None, 'data': None} if request.method == 'POST': user = User.objects.get(username=request.session['username']) node = Node.objects.get(nid=request.POST.get('nid')) project = Project.objects.get(pk=request.POST.get('id')) status = request.POST.get('status') scrapyd = scrapyd_obj(uri(node.ip, node.port)) if scrapyd: spiders_list = scrapyd.list_spiders(project.name) for spider in spiders_list: job = Job.objects.get_or_create(name=spider, project=project.name, user=user, node=project.node.nid) job[0].status = status job[0].save() project.status = status project.save() result['msg'] = '修改成功!' return Response(result)
def post(self, request): result = {'status': 1, 'msg': None, 'data': None} # 删除部署记录 project = Project.objects.get(pk=request.POST.get('id')) node = Node.objects.get(nid=request.POST.get('nid')) scrapyd = scrapyd_obj(uri(node.ip, node.port)) if scrapyd: if project.name in scrapyd.list_projects(): # 删除部署的工程 scrapyd.delete_project(project.name) if project.file: # 删除文件 delete_file(project.file.path) else: delete_file(settings.MEDIA_ROOT + '/deploy/%s.zip' % project.name) # 删除记录 project.delete() result['msg'] = '工程 %s 已经删除!' % project.name return Response(result)
def job_start(request): # 启动爬虫 result = {} if request.method == 'POST': data = request.POST.dict() nid, project, spider = data['data'].split(',') node = Node.objects.get(nid=nid) scrapyd = scrapyd_obj(uri(node.ip, node.port)) if scrapyd: result['jid'] = scrapyd.schedule(project, spider) # 修改状态 job = Job.objects.get(user=User.objects.get(username=request.session['username']), project=project, name=spider) job.jid = result['jid'] job.alive = 1 job.save() result['node'] = nid result['project'] = project result['spider'] = spider return Response(result)
def job_stop(request): # 停止爬虫 def _is_alive(scrapyd, project, jid): for job in scrapyd.list_jobs(project)['running']: if job['id'] == jid: return True result = {'status': 1, 'msg': None, 'data': None} data = {} if request.method == 'POST': data = request.POST.dict() nid, project, jid, spider = data['data'].split(',') node = Node.objects.get(nid=nid) scrapyd = scrapyd_obj(uri(node.ip, node.port)) if scrapyd: scrapyd.cancel(project, jid) # 强制停止 try: if _is_alive(scrapyd, project, jid): scrapyd.cancel(project, jid, 'KILL') except: result['status'] = 0 result['msg'] = '停止异常请强制停止' # 修改状态 job = Job.objects.get(user=User.objects.get(username=request.session['username']), project=project, name=spider) job.jid = None job.alive = 0 job.save() data['node'] = nid data['project'] = project data['spider'] = spider result['data'] = data return Response(result)