def StopScrapydJob(request): # global scrapyd global SpiderWebCrawlerJOBID global SpiderFacebookJOBID scrapyd = ScrapydAPI('http://127.0.0.1:6800') source = request.GET.get('source', None) if source == 'WebCrawl': print("Stopping scrapyd job : " + str(SpiderWebCrawlerJOBID)) scrapyd.cancel(SCRAPYD_PROJECT_NAME, SpiderWebCrawlerJOBID) if source == 'Facebook': print("Stopping scrapyd job : " + str(SpiderFacebookJOBID)) scrapyd.cancel(SCRAPYD_PROJECT_NAME, SpiderFacebookJOBID) return JsonResponse(None, safe=False)
def cancel_job(project, spider, url=DEFAULT_URL): """ @param project: scrapy project name @param spider: spider name @param url: the url which target scrapyd daemon listens on @param settings: the settings dictionary To schedule a spider run: curl http://localhost:6800/schedule.json -d project=myproject -d spider=spider2 """ scrapyd = ScrapydAPI(url) return scrapyd.cancel(project, spider)
def job_cancel(request, client_id, project_name, job_id): """ cancel a job :param request: request object :param client_id: client id :param project_name: project name :param job_id: job id :return: json of cancel """ if request.method == 'GET': client = Client.objects.get(id=client_id) try: scrapyd = ScrapydAPI(scrapyd_url(client.ip, client.port)) result = scrapyd.cancel(project_name, job_id) return JsonResponse(result) except ConnectionError: return JsonResponse({'message': 'Connect Error'})
class Scrapyd_Control(object): def __init__(self): scrapyd_url = input('请输入scrapyd地址: ') project = input('请输入项目名称: ') self.project = project self.scrapyd = ScrapydAPI(scrapyd_url) # 启动爬虫 def schedule(self): spider = input('请输入爬虫名称: ') return { 'project': self.project, 'spider': spider, 'jobid': self.scrapyd.schedule(self.project, spider) } start, run = schedule, schedule # 取消爬虫 def cancel(self): jobid = input('请粘贴要取消的爬虫jobid: ') return self.scrapyd.cancel(self.project, jobid) # 查看项目 def listprojects(self): return self.scrapyd.list_projects() # 查看爬虫 def listspiders(self): return self.scrapyd.list_spiders(self.project) # 列出所有jobs def listjobs(self): return self.scrapyd.list_jobs(self.project) # 查看job状态 def jobstatus(self): jobid = input('请粘贴要查看的jobid: ') return self.scrapyd.job_status(self.project, jobid) # 查看版本 def listversions(self): return self.scrapyd.list_versions(self.project) # 删除版本 def delversion(self): version_name = input('请粘贴要删除的版本: ') yes = input('是否确认删除该版本{},请输yes否则回车跳过删除\n'.format(version_name)) if yes == 'yes': return self.scrapyd.delete_version(self.project, version_name) else: pass # 删除项目 def delproject(self): yes = input('是否确认删除该项目{},请输yes否则回车跳过删除\n'.format(self.project)) if yes == 'yes': return self.scrapyd.delete_project(self.project) else: pass # 列出所有命令 def help(self): print(""" 启动爬虫 schedule|start|run 取消爬虫 cancel 查看项目 listprojects 查看爬虫 listspiders 列出所有jobs listjobs 查看job状态 jobstatus 查看版本 listversions 删除版本 delversion 删除项目 deleproject 列出所有命令 help """)