def project_withdraw(request, project_name): """ withdraw project from client :param request: request object :param project_name: project name :return: json """ if request.method == 'GET': project = Project.objects.get(name=project_name) # remove project from client try: scrapyd = get_scrapyd(project.client) result = scrapyd.delete_project(project=project_name) if not result: return JsonResponse({'result': result}) except (ConnectionError, ReadTimeout): return JsonResponse({'message': 'Connect Error'}) # alter project status project.deployed_at = None project.deployed_version = None project.client.remove_spider(project.spider_amount) project.client = None project.spider_amount = None project.save() # invalidate spiders Spider.objects.filter(project=project).update(available=False) return JsonResponse({'result': result})
def client_remove(request, client_id): """ remove a client :param request: request object :param client_id: client id :return: json """ if request.method == 'GET': client = Client.objects.get(id=client_id) projects = Project.objects.filter(client=client) try: scrapyd = get_scrapyd(client) # withdraw projects for each in projects: result = scrapyd.delete_project(project=each.name) if not result: return JsonResponse({'result': 0}) Spider.objects.filter(project=each).update(available=False) except (ConnectionError, ReadTimeout): return JsonResponse({'message': 'Connect Error'}) projects.update(client=None, deployed_at=None, deployed_version=None, spider_amount=None) # delete client client.delete() return JsonResponse({'result': 1})
def execute(spider): """ execute deployed spider :param spider: spider object :return: """ client_name = spider.project.client.name project_name = spider.project.name scrapyd = get_scrapyd(spider.project.client) # check current status if spider.current_job_id: status = scrapyd.job_status(spider.project.name, spider.current_job_id) status = get_spider_status_index(status) # running or pending if abs(status) == 1: logger.info( 'job: client %s, project %s, spider %s is already running', client_name, project_name, spider.name) return job = scrapyd.schedule(project_name, spider.name) spider.current_job_id = job spider.latest_run = timezone.now() spider.save() logger.info('execute job of client %s, project %s, spider %s', client_name, project_name, spider.name)
def job_cancel(request, spider_id): """ cancel a job :param request: request object :param spider_id: spider id :return: json of cancel """ if request.method == 'GET': spider = Spider.objects.get(id=spider_id) try: scrapyd = get_scrapyd(spider.project.client) result = scrapyd.cancel(spider.project.name, spider.current_job_id) return JsonResponse(result) except (ConnectionError, ReadTimeout): return JsonResponse({'message': 'Connect Error'}, status=500)
def spider_start(request, spider_id): """ start a spider :param request: request object :param spider_id: spider id :return: json """ if request.method == 'GET': spider = Spider.objects.get(id=spider_id) scrapyd = get_scrapyd(spider.project.client) job = scrapyd.schedule(spider.project.name, spider.name) spider.current_job_id = job spider.latest_run = timezone.now() spider.save() return JsonResponse({'job': job})
def spider_status(request, spider_id): """ get job list of spider from one client :param request: request object :param spider_id: spider id :return: list of jobs """ if request.method == 'GET': spider = Spider.objects.get(id=spider_id) scrapyd = get_scrapyd(spider.project.client) try: job_id = spider.current_job_id if spider.current_job_id else '' result = scrapyd.job_status(spider.project.name, job_id) return JsonResponse({'status': get_spider_status_index(result)}) except (ConnectionError, ReadTimeout): return JsonResponse({'message': 'Connect Error'}, status=500)
def project_deploy(request, client_id, project_name): """ deploy project operation :param request: request object :param client_id: client id :param project_name: project name :return: json of deploy result """ if request.method == 'GET': # get project folder path = os.path.abspath(join(WORKPLACE, PROJECTS_DIR)) project_path = join(path, project_name) # find egg file egg = find_egg(project_path) if not egg: return JsonResponse({'message': 'egg not found'}, status=500) egg_file = open(join(project_path, egg), 'rb') # get client and project model client = Client.objects.get(id=client_id) project = Project.objects.get(name=project_name) # execute deploy operation scrapyd = get_scrapyd(client) spider_amount = scrapyd.add_version(project_name, int(time.time()), egg_file.read()) # update client info if project.client: client.remove_spider(project.spider_amount) client.add_spider(spider_amount) # update project info project.client = client project.deployed_version = project.uploaded_version project.deployed_at = timezone.now() project.spider_amount = spider_amount project.save() # update spider info spiders = scrapyd.list_spiders(project_name) Spider.objects.filter(project=project).exclude( name__in=spiders).delete() for each in spiders: Spider.objects.update_or_create(name=each, defaults={ 'project': project, 'available': True }) return JsonResponse(model_to_dict(project))
def job_list(request, spider_id): """ get job list of spider from one client :param request: request object :param spider_id: spider id :return: list of jobs """ if request.method == 'GET': spider = Spider.objects.get(id=spider_id) scrapyd = get_scrapyd(spider.project.client) try: result = scrapyd.list_jobs(spider.project.name) jobs = [] statuses = ['pending', 'running', 'finished'] for status in statuses: for job in result.get(status): if job['spider'] == spider.name: job['status'] = status jobs.append(job) return JsonResponse(jobs) except (ConnectionError, ReadTimeout): return JsonResponse({'message': 'Connect Error'}, status=500)