Example #1
0
def project_configure(request, project_name):
    """
    get configuration
    :param request: request object
    :param project_name: project name
    :return: json
    """
    # get configuration
    if request.method == 'GET':
        project = Project.objects.get(name=project_name)
        project = model_to_dict(project)
        project['configuration'] = json.loads(
            project['configuration']) if project['configuration'] else None
        return JsonResponse(project)
    # update configuration
    elif request.method == 'POST':
        project = Project.objects.filter(name=project_name)
        data = json.loads(request.body)
        configuration = json.dumps(data.get('configuration'))
        project.update(**{'configuration': configuration})
        # execute generate cmd
        cmd = ' '.join(['gerapy', 'generate', project_name])
        p = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE)
        stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read())
        print('RETURN CODE', p.returncode)

        print('stdout', stdout)
        print('stderr', stderr)
        if not stderr:
            return JsonResponse({'status': '1'})
        else:
            return JsonResponse({'status': '0', 'message': stderr})
Example #2
0
def project_clone(request):
    """
    clone project from github
    :param request: request object
    :return: json
    """
    if request.method == 'POST':
        data = json.loads(request.body)
        address = data.get('address')
        if not address.startswith('http'):
            return JsonResponse({'status': False})
        address = address + '.git' if not address.endswith('.git') else address
        cmd = [
            'git', 'clone', address,
            join(PROJECTS_FOLDER,
                 Path(address).stem)
        ]
        logger.debug('clone cmd %s', cmd)
        p = Popen(cmd, shell=False, stdin=PIPE, stdout=PIPE, stderr=PIPE)
        stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read())
        logger.debug('clone run result %s', stdout)
        if stderr:
            logger.error(stderr)
        return JsonResponse({'status': True}) if not stderr else JsonResponse(
            {'status': False})
Example #3
0
def project_configure(request, project_name):
    """
    get configuration
    :param request: request object
    :param project_name: project name
    :return: json
    """
    # get configuration
    if request.method == 'GET':
        project = Project.objects.get(name=project_name)
        project = model_to_dict(project)
        project['configuration'] = json.loads(
            project['configuration']) if project['configuration'] else None
        return JsonResponse(project)

    # update configuration
    elif request.method == 'POST':
        project = Project.objects.filter(name=project_name)
        data = json.loads(request.body)
        configuration = json.dumps(data.get('configuration'),
                                   ensure_ascii=False)
        project.update(**{'configuration': configuration})
        # for safe protection
        project_name = re.sub('[\s\!\@\#\$\;\&\*\~\"\'\{\}\]\[\-\+\%\^]+', '',
                              project_name)
        # execute generate cmd
        cmd = ['gerapy', 'generate', project_name]
        p = Popen(cmd, shell=False, stdin=PIPE, stdout=PIPE, stderr=PIPE)
        stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read())

        if not stderr:
            return JsonResponse({'status': '1'})
        else:
            return JsonResponse({'status': '0', 'message': stderr})
Example #4
0
def project_parse(request, project_name):
    """
    parse project
    :param request: request object
    :param project_name: project name
    :return: requests, items, response
    """
    if request.method == 'POST':
        project_path = join(PROJECTS_FOLDER, project_name)
        data = json.loads(request.body)
        logger.debug('post data %s', data)
        spider_name = data.get('spider')
        args = {
            'start':
            data.get('start', False),
            'method':
            data.get('method', 'GET'),
            'url':
            data.get('url'),
            'callback':
            data.get('callback'),
            'cookies':
            "'" + json.dumps(data.get('cookies', {}), ensure_ascii=False) +
            "'",
            'headers':
            "'" + json.dumps(data.get('headers', {}), ensure_ascii=False) +
            "'",
            'meta':
            "'" + json.dumps(data.get('meta', {}), ensure_ascii=False) + "'",
            'dont_filter':
            data.get('dont_filter', False),
            'priority':
            data.get('priority', 0),
        }
        # set request body
        body = data.get('body', '')
        if args.get('method').lower() != 'get':
            args['body'] = "'" + json.dumps(body, ensure_ascii=False) + "'"

        args_array = []
        for arg, value in args.items():
            args_array.append(f'--{arg}')
            args_array.append(f'{value}')
        cmd = ['gerapy', 'parse'] + args_array + [project_path] + [spider_name]
        logger.debug('parse cmd %s', cmd)
        p = Popen(cmd,
                  shell=False,
                  stdin=PIPE,
                  stdout=PIPE,
                  stderr=PIPE,
                  close_fds=True)
        stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read())
        logger.debug('stdout %s, stderr %s', stdout, stderr)
        if not stderr:
            return JsonResponse({'status': True, 'result': json.loads(stdout)})
        else:
            return JsonResponse({'status': False, 'message': stderr})
Example #5
0
def project_parse(request, project_name):
    """
    parse project
    :param request: request object
    :param project_name: project name
    :return: requests, items, response
    """
    if request.method == 'POST':
        print(project_name)
        project_path = join(PROJECTS_FOLDER, project_name)
        print('Project Path', project_path)
        data = json.loads(request.body)
        spider_name = data.get('spider')
        # start = data.get('start', 0)
        # method = data.get('method', 'GET')
        # headers = data.get('headers', {})
        # meta = data.get('meta', {})
        # url = data.get('url')
        # callback = data.get('callback')
        # construct args cmd
        args = {
            'start': data.get('start', 0),
            'method': data.get('method', 'GET'),
            'url': data.get('url'),
            'callback': data.get('callback')
        }
        # args = ['start', 'method', 'url', 'callback']
        args_cmd = ' '.join([
            '--{arg} {value}'.format(arg=arg, value=value) if value else ''
            for arg, value in args.items()
        ])
        print(args_cmd)
        cmd = 'gerapy parse {args_cmd} {project_path} {spider_name}'.format(
            args_cmd=args_cmd,
            project_path=project_path,
            spider_name=spider_name)
        print(cmd)
        p = Popen(cmd,
                  shell=True,
                  stdin=PIPE,
                  stdout=PIPE,
                  stderr=PIPE,
                  close_fds=True)
        stdout, stderr = bytes2str(p.stdout.read()), bytes2str(p.stderr.read())
        print('stdout', stdout)
        print('stderr', stderr)
        if not stderr:
            return JsonResponse({'status': '1', 'result': json.loads(stdout)})
        else:
            return JsonResponse({'status': '0', 'message': stderr})