def project_file_delete(request): """ delete project file :param request: request object :return: result of delete """ if request.method == 'POST': data = json.loads(request.body) path = join(data['path'], data['label']) result = os.remove(path) return JsonResponse({'result': result})
def spider_list(request, client_id, project_name): """ get spider list from one client :param request: request Object :param client_id: client id :param project_name: project name :return: json """ if request.method == 'GET': client = Client.objects.get(id=client_id) scrapyd = get_scrapyd(client) try: spiders = scrapyd.list_spiders(project_name) spiders = [{ 'name': spider, 'id': index + 1 } for index, spider in enumerate(spiders)] return JsonResponse(spiders) except ConnectionError: return JsonResponse({'message': 'Connect Error'}, status=500)
def project_file_create(request): """ create project file :param request: request object :return: result of create """ if request.method == 'POST': data = json.loads(request.body) path = join(data['path'], data['name']) open(path, 'w').close() return JsonResponse({'result': '1'})
def project_list(request, client_id): """ project deployed list on one client :param request: request object :param client_id: client id :return: json """ if request.method == 'GET': client = Client.objects.get(id=client_id) scrapyd = get_scrapyd(client) projects = scrapyd.list_projects() return JsonResponse(projects)
def project_tree(request, project_name): """ get file tree of project :param request: request object :param project_name: project name :return: json of tree """ if request.method == 'GET': path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER)) # get tree data tree = get_tree(join(path, project_name)) return JsonResponse(tree)
def project_file_rename(request): """ rename file name :param request: request object :return: result of rename """ if request.method == 'POST': data = json.loads(request.body) pre = join(data['path'], data['pre']) new = join(data['path'], data['new']) os.rename(pre, new) return JsonResponse({'result': '1'})
def client_status(request, client_id): """ get client status :param request: request object :param client_id: client id :return: json """ if request.method == 'GET': # get client object client = Client.objects.get(id=client_id) requests.get(scrapyd_url(client.ip, client.port), timeout=3) return JsonResponse({'result': '1'})
def client_remove(request, client_id): """ remove a client :param request: request object :param client_id: client id :return: json """ if request.method == 'POST': client = Client.objects.get(id=client_id) Deploy.objects.filter(client=client).delete() Client.objects.filter(id=client_id).delete() return JsonResponse({'result': '1'})
def monitor_create(request): """ create a monitor :param request: request object :return: json of create """ if request.method == 'POST': data = json.loads(request.body) data = data['form'] data['configuration'] = json.dumps(data['configuration']) monitor = Monitor.objects.create(**data) return JsonResponse(model_to_dict(monitor))
def client_update(request, client_id): """ update client info :param request: request object :param client_id: client id :return: json """ if request.method == 'POST': client = Client.objects.filter(id=client_id) data = json.loads(request.body) client.update(**data) return JsonResponse(model_to_dict(Client.objects.get(id=client_id)))
def project_parse(request, project_name): """ parse project :param request: request object :param project_name: project name :return: requests, items, response """ if request.method == 'POST': print(project_name) project_path = join(PROJECTS_FOLDER, project_name) print('Project Path', project_path) data = json.loads(request.body) spider_name = data.get('spider') start = data.get('start') method = data.get('method', 'get') headers = data.get('headers', {}) meta = data.get('meta', {}) url = data.get('url') callback = data.get('callback') if start: result = get_start_requests(project_path, spider_name) else: result = parser.get_follow_results(url, project_path, spider_name, callback) if not result.get('finished'): print('FATAL!!!!!') output = get_output_error(project_name, spider_name) return JsonResponse({'status': '2', 'message': output}) if start: requests = result['requests'] return JsonResponse({ 'status': '1', 'result': { 'requests': requests } }) else: result['response']['html'] = process_html( result['response']['html'], dirname(url)) return JsonResponse({'status': '1', 'result': result})
def project_parse(request, project_name): """ parse project :param request: request object :param project_name: project name :return: requests, items, response """ if request.method == 'POST': print(project_name) project_path = join(PROJECTS_FOLDER, project_name) print('Project Path', project_path) data = json.loads(request.body) spider_name = data.get('spider') # start = data.get('start', 0) # method = data.get('method', 'GET') # headers = data.get('headers', {}) # meta = data.get('meta', {}) # url = data.get('url') # callback = data.get('callback') # construct args cmd args = { 'start': data.get('start', 0), 'method': data.get('method', 'GET'), 'url': data.get('url'), 'callback': data.get('callback') } # args = ['start', 'method', 'url', 'callback'] args_cmd = ' '.join( ['--{arg} {value}'.format(arg=arg, value=value) if value else '' for arg, value in args.items()]) cmd = 'gerapy parse {args_cmd} {project_path} {spider_name}'.format( args_cmd=args_cmd, project_path=project_path, spider_name=spider_name ) p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True) stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read()) if not stderr: return JsonResponse({'status': '1', 'result': json.loads(stdout)}) else: return JsonResponse({'status': '0', 'message': stderr})
def task_info(request, task_id): """ get task info :param request: request object :param task_id: task id :return: json """ if request.method == 'GET': task = Task.objects.get(id=task_id) data = model_to_dict(task) data['clients'] = json.loads(data.get('clients')) data['configuration'] = json.loads(data.get('configuration')) return JsonResponse({'data': data})
def job_list(request, client_id, project_name): """ get job list of project from one client :param request: request object :param client_id: client id :param project_name: project name :return: list of jobs """ if request.method == 'GET': client = Client.objects.get(id=client_id) scrapyd = get_scrapyd(client) try: result = scrapyd.list_jobs(project_name) jobs = [] statuses = ['pending', 'running', 'finished'] for status in statuses: for job in result.get(status): job['status'] = status jobs.append(job) return JsonResponse(jobs) except ConnectionError: return JsonResponse({'message': 'Connect Error'}, status=500)
def project_configure(request, project_name): """ get or update configuration :param request: request object :param project_name: project name :return: json """ # get configuration if request.method == 'GET': project = Project.objects.get(name=project_name) project = model_to_dict(project) project['configuration'] = json.loads(project['configuration']) if project['configuration'] else None return JsonResponse(project) # update configuration elif request.method == 'POST': project = Project.objects.filter(name=project_name) data = json.loads(request.body) configuration = json.dumps(data.get('configuration')) project.update(**{'configuration': configuration}) project = Project.objects.get(name=project_name) project = model_to_dict(project) return JsonResponse(project)
def project_file_update(request): """ update project file :param request: request object :return: result of update """ if request.method == 'POST': data = json.loads(request.body) path = join(data['path'], data['label']) code = data['code'] with open(path, 'w') as f: f.write(code) return JsonResponse({'result': '1'})
def task_info(request, task_id): """ get task info :param request: request object :param task_id: task id :return: json """ if request.method == "GET": task = Task.objects.get(id=task_id) data = model_to_dict(task) data["clients"] = json.dumps(data.get("clients")) data["configuration"] = json.dumps(data.get("configuration")) return JsonResponse({"data": data})
def project_file_read(request): """ get content of project file :param request: request object :return: file content """ if request.method == 'POST': data = json.loads(request.body) path = join(data['path'], data['label']) if not is_in_curdir(path): return JsonResponse({'result': '0'}) # binary file with open(path, 'rb') as f: return HttpResponse(f.read().decode('utf-8'))
def spider_start(request, client_id, project_name, spider_name): """ start a spider :param request: request object :param client_id: client id :param project_name: project name :param spider_name: spider name :return: json """ if request.method == 'GET': client = Client.objects.get(id=client_id) scrapyd = get_scrapyd(client) job = scrapyd.schedule(project_name, spider_name) return JsonResponse({'job': job})
def project_index(request): """ project index list :param request: request object :return: json """ if request.method == 'GET': path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER)) files = os.listdir(path) project_list = [] for file in files: if os.path.isdir(join(path, file)) and not file in IGNORES: project_list.append({'name': file}) return JsonResponse(project_list)
def job_cancel(request, client_id, project_name, job_id): """ cancel a job :param request: request object :param client_id: client id :param project_name: project name :param job_id: job id :return: json of cancel """ if request.method == 'GET': client = Client.objects.get(id=client_id) scrapyd = get_scrapyd(client) result = scrapyd.cancel(project_name, job_id) return JsonResponse(result)
def project_clone(request): """ clone project from github :param request: request object :return: json """ if request.method == 'POST': data = json.loads(request.body) address = data.get('address') if not address.startswith('http'): return JsonResponse({'status': False}) address = address + '.git' if not address.endswith('.git') else address cmd = 'git clone {address} {target}'.format(address=address, target=join( PROJECTS_FOLDER, Path(address).stem)) logger.debug('clone cmd %s', cmd) p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE) stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read()) logger.debug('clone run result %s', stdout) if stderr: logger.error(stderr) return JsonResponse({'status': True}) if not stderr else JsonResponse( {'status': False})
def project_create(request): """ create a configurable project :param request: request object :return: json """ if request.method == 'POST': data = json.loads(request.body) data['configurable'] = 1 project, result = Project.objects.update_or_create(**data) # generate a single project folder path = join(os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER)), data['name']) os.mkdir(path) return JsonResponse(model_to_dict(project))
def monitor_db_list(request): """ get monitor db list :param request: request object :return: json of db list """ if request.method == 'POST': data = json.loads(request.body) url = data['url'] type = data['type'] if type == 'MongoDB': client = pymongo.MongoClient(url) dbs = client.database_names() return JsonResponse(dbs)
def task_create(request): """ add task :param request: request object :return: Bool """ if request.method == 'POST': data = json.loads(request.body) task = Task.objects.create(clients=json.dumps(data.get('clients')), project=data.get('project'), name=data.get('name'), spider=data.get('spider'), trigger=data.get('trigger'), configuration=json.dumps(data.get('configuration'))) return JsonResponse({'result': '1', 'data': model_to_dict(task)})
def task_update(request, task_id): """ update task info :param request: request object :param task_id: task id :return: json """ if request.method == "POST": task = Task.objects.filter(id=task_id) data = json.loads(str(request.body, encoding="utf-8")) data["clients"] = json.dumps(data.get("clients")) data["configuration"] = json.dumps(data.get("configuration")) data["modified"] = 1 task.update(**data) return JsonResponse(model_to_dict(Task.objects.get(id=task_id)))
def project_remove(request, project_name): """ remove project from disk and db :param request: request object :param project_name: project name :return: result of remove """ if request.method == 'POST': path = join(os.path.abspath(os.getcwd()), PROJECTS_FOLDER) project_path = join(path, project_name) # delete project file tree rmtree(project_path) # delete project result = Project.objects.filter(name=project_name).delete() return JsonResponse({'result': result})
def task_update(request, task_id): """ update task info :param request: request object :param task_id: task id :return: json """ if request.method == 'POST': task = Task.objects.filter(id=task_id) data = json.loads(request.body) data['clients'] = json.dumps(data.get('clients')) data['configuration'] = json.dumps(data.get('configuration')) data['modified'] = 1 task.update(**data) return JsonResponse(model_to_dict(Task.objects.get(id=task_id)))
def project_deploy(request, client_id, project_name): """ deploy project operation :param request: request object :param client_id: client id :param project_name: project name :return: json of deploy result """ if request.method == 'POST': # get project folder path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER)) project_path = join(path, project_name) # find egg file egg = find_egg(project_path) if not egg: return JsonResponse({'message': 'egg not found'}, status=500) egg_file = open(join(project_path, egg), 'rb') # get client and project model client = Client.objects.get(id=client_id) project = Project.objects.get(name=project_name) # execute deploy operation scrapyd = get_scrapyd(client) try: scrapyd.add_version(project_name, int(time.time()), egg_file.read()) # update deploy info deployed_at = timezone.now() Deploy.objects.filter(client=client, project=project).delete() deploy, result = Deploy.objects.update_or_create( client=client, project=project, deployed_at=deployed_at, description=project.description) return JsonResponse(model_to_dict(deploy)) except Exception: return JsonResponse({'message': get_traceback()}, status=500)
def monitor_collection_list(request): """ get monitor collection list :param request: request object :return: json of collection list """ if request.method == 'POST': data = json.loads(request.body) url = data['url'] db = data['db'] type = data['type'] if type == 'MongoDB': client = pymongo.MongoClient(url) db = client[db] collections = db.collection_names() return JsonResponse(collections)