def render_html(request): """ render html with url :param request: :return: """ if request.method == 'GET': url = request.GET.get('url') url = unquote(base64.b64decode(url).decode('utf-8')) print('Decoded', url) js = request.GET.get('js', 0) script = request.GET.get('script') try: response = requests.get(url, timeout=5) response.encoding = response.apparent_encoding html = process_html(response.text) return HttpResponse(html) except Exception as e: return JsonResponse({'message': e.args}, status=500)
def task_create(request): """ add task :param request: request object :return: Bool """ if request.method == 'POST': data = json.loads(request.body) task = Task.objects.create(clients=json.dumps(data.get('clients'), ensure_ascii=False), project=data.get('project'), name=data.get('name'), spider=data.get('spider'), trigger=data.get('trigger'), configuration=json.dumps( data.get('configuration'), ensure_ascii=False), modified=1) return JsonResponse({'result': '1', 'data': model_to_dict(task)})
def job_list(request, client_id, project_name): """ get job list of project from one client :param request: request object :param client_id: client id :param project_name: project name :return: list of jobs """ if request.method == 'GET': client = Client.objects.get(id=client_id) scrapyd = get_scrapyd(client) result = scrapyd.list_jobs(project_name) jobs = [] statuses = ['pending', 'running', 'finished'] for status in statuses: for job in result.get(status): job['status'] = status jobs.append(job) return JsonResponse(jobs)
def project_remove(request, project_name): """ remove project from disk and db :param request: request object :param project_name: project name :return: result of remove """ if request.method == 'POST': # delete deployments project = Project.objects.get(name=project_name) Deploy.objects.filter(project=project).delete() # delete project result = Project.objects.filter(name=project_name).delete() # get project path path = join(os.path.abspath(os.getcwd()), PROJECTS_FOLDER) project_path = join(path, project_name) # delete project file tree if exists(project_path): rmtree(project_path) return JsonResponse({'result': result})
def task_status(request, task_id): """ get task status info :param request: request object :param task_id: task id :return: """ if request.method == 'GET': result = [] task = Task.objects.get(id=task_id) clients = clients_of_task(task) for client in clients: job_id = get_job_id(client, task) job = DjangoJob.objects.get(name=job_id) executions = serialize('json', DjangoJobExecution.objects.filter(job=job)) result.append({ 'client': model_to_dict(client), 'next': job.next_run_time, 'executions': json.loads(executions) }) return JsonResponse({'data': result})
def project_build(request, project_name): """ get build info or execute build operation :param request: request object :param project_name: project name :return: json """ # get project folder path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER)) project_path = join(path, project_name) # get build version if request.method == 'GET': egg = find_egg(project_path) # if built, save or update project to db if egg: built_at = timezone.datetime.fromtimestamp( os.path.getmtime(join(project_path, egg)), tz=pytz.timezone(TIME_ZONE)) if not Project.objects.filter(name=project_name): Project(name=project_name, built_at=built_at, egg=egg).save() model = Project.objects.get(name=project_name) else: model = Project.objects.get(name=project_name) model.built_at = built_at model.egg = egg model.save() # if not built, just save project name to db else: if not Project.objects.filter(name=project_name): Project(name=project_name).save() model = Project.objects.get(name=project_name) # transfer model to dict then dumps it to json data = model_to_dict(model) return JsonResponse(data) # build operation manually by clicking button elif request.method == 'POST': data = json.loads(request.body) description = data['description'] build_project(project_name) egg = find_egg(project_path) if not egg: return JsonResponse({'message': 'egg not found'}, status=500) # update built_at info built_at = timezone.now() # if project does not exists in db, create it if not Project.objects.filter(name=project_name): Project(name=project_name, description=description, built_at=built_at, egg=egg).save() model = Project.objects.get(name=project_name) # if project exists, update egg, description, built_at info else: model = Project.objects.get(name=project_name) model.built_at = built_at model.egg = egg model.description = description model.save() # transfer model to dict then dumps it to json data = model_to_dict(model) return JsonResponse(data)
def del_project(request, client_id, project): if request.method == 'GET': client = Client.objects.get(id=client_id) scrapyd = get_scrapyd(client) result = scrapyd.delete_project(project=project) return JsonResponse(result)
def project_parse(request, project_name): """ parse project :param request: request object :param project_name: project name :return: requests, items, response """ if request.method == 'POST': project_path = join(PROJECTS_FOLDER, project_name) data = json.loads(request.body) logger.debug('post data %s', data) spider_name = data.get('spider') args = { 'start': data.get('start', False), 'method': data.get('method', 'GET'), 'url': data.get('url'), 'callback': data.get('callback'), 'cookies': "'" + json.dumps(data.get('cookies', {}), ensure_ascii=False) + "'", 'headers': "'" + json.dumps(data.get('headers', {}), ensure_ascii=False) + "'", 'meta': "'" + json.dumps(data.get('meta', {}), ensure_ascii=False) + "'", 'dont_filter': data.get('dont_filter', False), 'priority': data.get('priority', 0), } # set request body body = data.get('body', '') if args.get('method').lower() != 'get': args['body'] = "'" + json.dumps(body, ensure_ascii=False) + "'" args_cmd = ' '.join([ '--{arg} {value}'.format(arg=arg, value=value) for arg, value in args.items() ]) logger.debug('args cmd %s', args_cmd) cmd = 'gerapy parse {args_cmd} {project_path} {spider_name}'.format( args_cmd=args_cmd, project_path=project_path, spider_name=spider_name) logger.debug('parse cmd %s', cmd) p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True) stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read()) logger.debug('stdout %s, stderr %s', stdout, stderr) if not stderr: return JsonResponse({'status': True, 'result': json.loads(stdout)}) else: return JsonResponse({'status': False, 'message': stderr})