コード例 #1
0
def render_html(request):
    """
    render html with url
    :param request:
    :return:
    """
    if request.method == 'GET':
        url = request.GET.get('url')
        url = unquote(base64.b64decode(url).decode('utf-8'))
        print('Decoded', url)
        js = request.GET.get('js', 0)
        script = request.GET.get('script')
        try:
            response = requests.get(url, timeout=5)
            response.encoding = response.apparent_encoding
            html = process_html(response.text)
            return HttpResponse(html)
        except Exception as e:
            return JsonResponse({'message': e.args}, status=500)
コード例 #2
0
ファイル: views.py プロジェクト: zwjwhxz/Gerapy
def task_create(request):
    """
    add task
    :param request: request object
    :return: Bool
    """
    if request.method == 'POST':
        data = json.loads(request.body)
        task = Task.objects.create(clients=json.dumps(data.get('clients'),
                                                      ensure_ascii=False),
                                   project=data.get('project'),
                                   name=data.get('name'),
                                   spider=data.get('spider'),
                                   trigger=data.get('trigger'),
                                   configuration=json.dumps(
                                       data.get('configuration'),
                                       ensure_ascii=False),
                                   modified=1)
        return JsonResponse({'result': '1', 'data': model_to_dict(task)})
コード例 #3
0
ファイル: views.py プロジェクト: zwjwhxz/Gerapy
def job_list(request, client_id, project_name):
    """
    get job list of project from one client
    :param request: request object
    :param client_id: client id
    :param project_name: project name
    :return: list of jobs
    """
    if request.method == 'GET':
        client = Client.objects.get(id=client_id)
        scrapyd = get_scrapyd(client)
        result = scrapyd.list_jobs(project_name)
        jobs = []
        statuses = ['pending', 'running', 'finished']
        for status in statuses:
            for job in result.get(status):
                job['status'] = status
                jobs.append(job)
        return JsonResponse(jobs)
コード例 #4
0
def project_remove(request, project_name):
    """
    remove project from disk and db
    :param request: request object
    :param project_name: project name
    :return: result of remove
    """
    if request.method == 'POST':
        # delete deployments
        project = Project.objects.get(name=project_name)
        Deploy.objects.filter(project=project).delete()
        # delete project
        result = Project.objects.filter(name=project_name).delete()
        # get project path
        path = join(os.path.abspath(os.getcwd()), PROJECTS_FOLDER)
        project_path = join(path, project_name)
        # delete project file tree
        if exists(project_path):
            rmtree(project_path)
        return JsonResponse({'result': result})
コード例 #5
0
ファイル: views.py プロジェクト: sevenyearslater/Gerapy
def task_status(request, task_id):
    """
    get task status info
    :param request: request object
    :param task_id: task id
    :return:
    """
    if request.method == 'GET':
        result = []
        task = Task.objects.get(id=task_id)
        clients = clients_of_task(task)
        for client in clients:
            job_id = get_job_id(client, task)
            job = DjangoJob.objects.get(name=job_id)
            executions = serialize('json',
                                   DjangoJobExecution.objects.filter(job=job))
            result.append({
                'client': model_to_dict(client),
                'next': job.next_run_time,
                'executions': json.loads(executions)
            })
        return JsonResponse({'data': result})
コード例 #6
0
ファイル: views.py プロジェクト: shichaoji/Gerapy
def project_build(request, project_name):
    """
    get build info or execute build operation
    :param request: request object
    :param project_name: project name
    :return: json
    """
    # get project folder
    path = os.path.abspath(join(os.getcwd(), PROJECTS_FOLDER))
    project_path = join(path, project_name)
    # get build version
    if request.method == 'GET':
        egg = find_egg(project_path)
        # if built, save or update project to db
        if egg:
            built_at = timezone.datetime.fromtimestamp(
                os.path.getmtime(join(project_path, egg)),
                tz=pytz.timezone(TIME_ZONE))
            if not Project.objects.filter(name=project_name):
                Project(name=project_name, built_at=built_at, egg=egg).save()
                model = Project.objects.get(name=project_name)
            else:
                model = Project.objects.get(name=project_name)
                model.built_at = built_at
                model.egg = egg
                model.save()
        # if not built, just save project name to db
        else:
            if not Project.objects.filter(name=project_name):
                Project(name=project_name).save()
            model = Project.objects.get(name=project_name)
        # transfer model to dict then dumps it to json
        data = model_to_dict(model)
        return JsonResponse(data)
    # build operation manually by clicking button
    elif request.method == 'POST':
        data = json.loads(request.body)
        description = data['description']
        build_project(project_name)
        egg = find_egg(project_path)
        if not egg:
            return JsonResponse({'message': 'egg not found'}, status=500)
        # update built_at info
        built_at = timezone.now()
        # if project does not exists in db, create it
        if not Project.objects.filter(name=project_name):
            Project(name=project_name,
                    description=description,
                    built_at=built_at,
                    egg=egg).save()
            model = Project.objects.get(name=project_name)
        # if project exists, update egg, description, built_at info
        else:
            model = Project.objects.get(name=project_name)
            model.built_at = built_at
            model.egg = egg
            model.description = description
            model.save()
        # transfer model to dict then dumps it to json
        data = model_to_dict(model)
        return JsonResponse(data)
コード例 #7
0
def del_project(request, client_id, project):
    if request.method == 'GET':
        client = Client.objects.get(id=client_id)
        scrapyd = get_scrapyd(client)
        result = scrapyd.delete_project(project=project)
        return JsonResponse(result)
コード例 #8
0
ファイル: views.py プロジェクト: sevenyearslater/Gerapy
def project_parse(request, project_name):
    """
    parse project
    :param request: request object
    :param project_name: project name
    :return: requests, items, response
    """
    if request.method == 'POST':
        project_path = join(PROJECTS_FOLDER, project_name)
        data = json.loads(request.body)
        logger.debug('post data %s', data)
        spider_name = data.get('spider')
        args = {
            'start':
            data.get('start', False),
            'method':
            data.get('method', 'GET'),
            'url':
            data.get('url'),
            'callback':
            data.get('callback'),
            'cookies':
            "'" + json.dumps(data.get('cookies', {}), ensure_ascii=False) +
            "'",
            'headers':
            "'" + json.dumps(data.get('headers', {}), ensure_ascii=False) +
            "'",
            'meta':
            "'" + json.dumps(data.get('meta', {}), ensure_ascii=False) + "'",
            'dont_filter':
            data.get('dont_filter', False),
            'priority':
            data.get('priority', 0),
        }
        # set request body
        body = data.get('body', '')
        if args.get('method').lower() != 'get':
            args['body'] = "'" + json.dumps(body, ensure_ascii=False) + "'"

        args_cmd = ' '.join([
            '--{arg} {value}'.format(arg=arg, value=value)
            for arg, value in args.items()
        ])
        logger.debug('args cmd %s', args_cmd)
        cmd = 'gerapy parse {args_cmd} {project_path} {spider_name}'.format(
            args_cmd=args_cmd,
            project_path=project_path,
            spider_name=spider_name)
        logger.debug('parse cmd %s', cmd)
        p = Popen(cmd,
                  shell=True,
                  stdin=PIPE,
                  stdout=PIPE,
                  stderr=PIPE,
                  close_fds=True)
        stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read())
        logger.debug('stdout %s, stderr %s', stdout, stderr)
        if not stderr:
            return JsonResponse({'status': True, 'result': json.loads(stdout)})
        else:
            return JsonResponse({'status': False, 'message': stderr})