def delete_version(project, client, version): url = 'http://{ip}:{port}'.format(ip=client.ip, port=client.port) try: scrapyd = ScrapydAPI(url) result = scrapyd.delete_version(project.name, version) return True if result else False except (ConnectionError, InvalidURL): return False
def remove_depody_spider(request, client_id, project, version_name): if request.method == 'POST': node = Node.objects.get(id=client_id) scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port)) try: spider = scrapyd.delete_version(project, version_name) return JsonResponse(spider) except ConnectionError: return JsonResponse({'message': 'Connect Error'}, status=500)
def delete_version(project, version, url=DEFAULT_URL): scrapyd = ScrapydAPI(url) return scrapyd.delete_version(project, version)
class Schedular: def __init__(self): self._scrapyd = None try: self._scrapyd = ScrapydAPI('http://{}:{}'.format( config['Scrapyd']['host'], config['Scrapyd']['port'])) except KeyError as e: logger.error("{}: No such key exists - {}".format( class_fullname(e), str(e))) except Exception as e: logger.error("{}: Failed to create a scrapyd object - {}".format( class_fullname(e), str(e))) def addversion(self, project, version, egg_filename='pricewatch_bot-0.0.1-py3.7.egg'): """ Scrapyd API: addversion - https://scrapyd.readthedocs.io/en/stable/api.html#addversion-json """ if not self._scrapyd: logger.error( "No scrapyd object find. Unable to add a new version.") return None num_of_spiders = None try: with open(os.path.join(settings.APP_DIST_DIRPATH, egg_filename), 'rb') as egg: num_of_spiders = self._scrapyd.add_version( project, version, egg) except FileNotFoundError as e: logger.error("{}: {}".format(class_fullname(e), str(e))) except ScrapydResponseError as e: logger.error("{}: Response error - {}".format( class_fullname(e), str(e))) except Exception as e: logger.error("{}: Failed to add a version - {}".format( class_fullname(e), str(e))) else: logger.info( "version '{}' for project '{}' added/updated - {} spider(s)". format(project, version, num_of_spiders)) # call API to create a version response = requests.post( 'http://{}:{}/api/schedule/version/'.format( config['PriceWatchWeb']['host'], config['PriceWatchWeb']['port']), json={ 'project': project, 'version': version, 'status': settings.SCHEDULES_VERSION_STATUS_ADDED, 'added_at': str(datetime.now()), 'deleted_at': None, }) if not response.ok: logger.error( "{} HTTP Error: Failed to add a version - {} - {}".format( response.status_code, response.reason, response.text)) finally: return num_of_spiders def schedule(self, project, spider, **kwargs): if not self._scrapyd: logger.error("No scrapyd object find. Unable to schedule a job.") return None _jobid = str(uuid.uuid4()) kwargs['jobid'] = _jobid # a scrapyd parameter kwargs['job_id'] = _jobid # passing to a spider try: _s = None # scrapy settings in dict. eg {'DOWNLOAD_DELAY': 2} jobid = self._scrapyd.schedule(project, spider, settings=_s, **kwargs) except ScrapydResponseError as e: logger.error("{}: Response error - {}".format( class_fullname(e), str(e))) except Exception as e: logger.error("{}: Failed to schedule a job - {}".format( class_fullname(e), str(e))) else: if jobid != _jobid: logger.error( "{}: Invalid jobid [enteredid vs returnedid] [{} vs {}] - {}" .format(class_fullname(e), _jobid, jobid, str(e))) else: logger.info( "new scheduled job '{}' for project '{}', spider '{}' has been set" .format(jobid, project, spider)) # call API to create a job response = requests.post( 'http://{}:{}/api/schedule/job/'.format( config['PriceWatchWeb']['host'], config['PriceWatchWeb']['port']), json={ 'job_id': jobid, 'project': project, 'spider': spider, 'version': kwargs.pop('_version', None), 'settings': _s, 'other_params': kwargs, 'status': settings.SCHEDULES_JOB_STATUS_PENDING, }) if not response.ok: logger.error( "{} HTTP Error: Failed to add a new job - {} - {}". format(response.status_code, response.reason, response.text)) finally: return jobid def listjobs(self, project): if not self._scrapyd: logger.error("No scrapyd object find. Unable to list jobs.") return None jobs = None try: jobs = self._scrapyd.list_jobs(project) except ScrapydResponseError as e: logger.error("{}: Response error - {}".format( class_fullname(e), str(e))) except Exception as e: logger.error("{}: Failed to list jobs - {}".format( class_fullname(e), str(e))) else: logger.info("list of jobs for project '{}' - {}".format( project, str(jobs))) self._store_jobs(project, jobs) finally: return jobs def _store_jobs(self, project, jobs): """ parse jobs and store information into db """ if all(_j in jobs for _j in ['running', 'finished']): for x in jobs['running']: # call API to update a running job response = requests.put( 'http://{}:{}/api/schedule/job/{}/'.format( config['PriceWatchWeb']['host'], config['PriceWatchWeb']['port'], x['id']), json={ 'job_id': x['id'], 'project': project, 'spider': x['spider'], 'start_time': x['start_time'], 'status': settings.SCHEDULES_JOB_STATUS_RUNNING, }) if not response.ok: logger.error( "{} HTTP Error: Failed to update a running job - {} - {}" .format(response.status_code, response.reason, response.text)) for x in jobs['finished']: # call API to update a finished job response = requests.put( 'http://{}:{}/api/schedule/job/{}/'.format( config['PriceWatchWeb']['host'], config['PriceWatchWeb']['port'], x['id']), json={ 'job_id': x['id'], 'project': project, 'spider': x['spider'], 'start_time': x['start_time'], 'end_time': x['end_time'], 'status': settings.SCHEDULES_JOB_STATUS_FINISHED, }) if not response.ok: logger.error( "{} HTTP Error: Failed to update a finished job - {} - {}" .format(response.status_code, response.reason, response.text)) def delversion(self, project, version): """ delversion """ if not self._scrapyd: logger.error("No scrapyd object find. Unable to delete version.") return False deleted = False try: deleted = self._scrapyd.delete_version(project, version) except ScrapydResponseError as e: logger.error("{}: Response error - {}".format( class_fullname(e), str(e))) except Exception as e: logger.error("{}: Failed to delete version - {}".format( class_fullname(e), str(e))) else: logger.info( "successfully deleted project '{}' version '{}'".format( project, version)) # update deleted version response = requests.put( 'http://{}:{}/api/schedule/version/'.format( config['PriceWatchWeb']['host'], config['PriceWatchWeb']['port']), json={ 'project': project, 'version': version, 'status': settings.SCHEDULES_VERSION_STATUS_DELETED, 'deleted_at': str(datetime.now()), }) if not response.ok: logger.error( "{} HTTP Error: Failed to update a deleted version - {} - {}" .format(response.status_code, response.reason, response.text)) finally: return deleted def delproject(self, project): """ delproject """ if not self._scrapyd: logger.error("No scrapyd object find. Unable to delete version.") return False deleted = False try: deleted = self._scrapyd.delete_project(project) except ScrapydResponseError as e: logger.error("{}: Response error - {}".format( class_fullname(e), str(e))) except Exception as e: logger.error("{}: Failed to delete project - {}".format( class_fullname(e), str(e))) else: logger.info("successfully deleted project '{}'".format(project)) # update deleted project response = requests.put( 'http://{}:{}/api/schedule/version/'.format( config['PriceWatchWeb']['host'], config['PriceWatchWeb']['port']), json={ 'project': project, 'status': settings.SCHEDULES_VERSION_STATUS_DELETED, 'deleted_at': str(datetime.now()), }) if not response.ok: logger.error( "{} HTTP Error: Failed to update deleted project - {} - {}" .format(response.status_code, response.reason, response.text)) finally: return deleted def close(self): self._scrapyd.client.close()
class Scrapyd_Control(object): def __init__(self): scrapyd_url = input('请输入scrapyd地址: ') project = input('请输入项目名称: ') self.project = project self.scrapyd = ScrapydAPI(scrapyd_url) # 启动爬虫 def schedule(self): spider = input('请输入爬虫名称: ') return { 'project': self.project, 'spider': spider, 'jobid': self.scrapyd.schedule(self.project, spider) } start, run = schedule, schedule # 取消爬虫 def cancel(self): jobid = input('请粘贴要取消的爬虫jobid: ') return self.scrapyd.cancel(self.project, jobid) # 查看项目 def listprojects(self): return self.scrapyd.list_projects() # 查看爬虫 def listspiders(self): return self.scrapyd.list_spiders(self.project) # 列出所有jobs def listjobs(self): return self.scrapyd.list_jobs(self.project) # 查看job状态 def jobstatus(self): jobid = input('请粘贴要查看的jobid: ') return self.scrapyd.job_status(self.project, jobid) # 查看版本 def listversions(self): return self.scrapyd.list_versions(self.project) # 删除版本 def delversion(self): version_name = input('请粘贴要删除的版本: ') yes = input('是否确认删除该版本{},请输yes否则回车跳过删除\n'.format(version_name)) if yes == 'yes': return self.scrapyd.delete_version(self.project, version_name) else: pass # 删除项目 def delproject(self): yes = input('是否确认删除该项目{},请输yes否则回车跳过删除\n'.format(self.project)) if yes == 'yes': return self.scrapyd.delete_project(self.project) else: pass # 列出所有命令 def help(self): print(""" 启动爬虫 schedule|start|run 取消爬虫 cancel 查看项目 listprojects 查看爬虫 listspiders 列出所有jobs listjobs 查看job状态 jobstatus 查看版本 listversions 删除版本 delversion 删除项目 deleproject 列出所有命令 help """)