Beispiel #1
0
def processor_edit(request, processor_id, **kwargs):
    model = handle_msg(kwargs)
    processor = Processor.objects.get(id=processor_id)
    processor_obj = ProcessorSerializer(processor, many=False).data
    try:
        validate_rules = ValidateRule.objects.filter(task_id=processor_id)
        validate_rules_ser = ValidateRuleSerializer(validate_rules,
                                                    many=True).data
        processor_obj['validate_rules'] = json.dumps(validate_rules_ser)
    except Exception as e:
        pass

    model['form'] = processor_obj
    return render(request, 'html/processor_edit.html', model)
Beispiel #2
0
def task_edit(request, task_id, **kwargs):
    model = handle_msg(kwargs)
    task = Task.objects.get(id=task_id)
    tasks_obj = TaskSerializer(task, many=False).data
    try:
        validate_rules = ValidateRule.objects.filter(task_id=task_id)
        validate_rules_ser = ValidateRuleSerializer(validate_rules,
                                                    many=True).data
        tasks_obj['validate_rules'] = json.dumps(validate_rules_ser)
    except Exception as e:
        pass

    taskServers = TaskServer.objects.filter(task=task)
    task_servers = []
    for taskServer in taskServers:
        task_servers.append(taskServer.server_id)
    tasks_obj['task_servers'] = task_servers
    servers = SpiderServer.objects.all().order_by('-update_time')
    spider_servers = SpiderServerSerializer(servers, many=True).data
    model['form'] = tasks_obj
    model['spider_servers'] = spider_servers
    return render(request, 'html/task_edit.html', model)
Beispiel #3
0
def task_create(request, **kwargs):
    servers = SpiderServer.objects.all().order_by('-update_time')
    spider_servers = SpiderServerSerializer(servers, many=True).data
    model = handle_msg(kwargs)
    model['spider_servers'] = spider_servers
    if request.method == 'POST':
        crawler_id = request.POST.get('crawler_id', '')
    elif request.method == 'GET':
        crawler_id = request.GET.get('crawler_id', '')
    if crawler_id and len(crawler_id) > 0:
        crawlers = Crawler.objects.get(pk=crawler_id)
        crawler_obj = CrawlerSerializer(crawlers, many=False).data
        if crawler_obj and crawler_obj['clean_method'] == 'common':
            processor = Processor.objects.get(id=crawler_obj['processor_id'])
            crawler_obj['deploy_target'] = processor.deploy_target
            validate_rules = ValidateRule.objects.filter(
                task_id=crawlers.processor_id)
            validate_rules_ser = ValidateRuleSerializer(validate_rules,
                                                        many=True).data
            crawler_obj['validate_rules'] = json.dumps(validate_rules_ser)
            model["crawler_data"] = crawler_obj

    return render(request, 'html/task_create.html', model)
Beispiel #4
0
def task_list(request, **kwargs):
    model = handle_msg(kwargs)
    project_ids = Project.objects.values_list('code', 'code', 'project_name')
    model['project_ids'] = project_ids
    return render(request, 'html/task_list.html', model)
Beispiel #5
0
def crawler_create(request, **kwargs):
    model = handle_msg(kwargs)

    crawler_app_folder = "/etc/rhino/crawler_app"
    form = {}
    success = True
    if request.method == 'POST':
        crawler_id = request.POST['crawler_id']
        project = request.POST['project']
        project_id = request.POST['project_id']
        project_name = request.POST['project_name']
        crawler_name = request.POST['crawler_name']
        crawler_app = request.FILES.get('crawler_app', None)
        clean_app = request.FILES.get('clean_app', None)
        clean_parameters = request.POST['clean_parameters']
        clean_method = request.POST.get('clean_method', 'custom')
        processor_id = request.POST.get('processor_id', '')
        processor_name = request.POST.get('processor_name', '')
        nc = "off"
        try:
            nc = request.POST['need_clean']
        except Exception as e:
            need_clean = False
        if nc and nc == 'on':
            need_clean = True
        else:
            need_clean = False

        try:

            # 清空目录
            path = crawler_app_folder + '/' + crawler_id
            if os.path.exists(path):
                shutil.rmtree(path)
            os.makedirs(path)

            if clean_method == 'common':
                processor = Processor.objects.get(id=processor_id)
                field_mapping = processor.fields_mapping
                clean_app_file_name = processor.clean_app
                clean_parameters = processor.clean_parameters
            else:
                field_mapping = request.POST.get('field_mapping', '')
                field_mapping = field_mapping.replace('\n', '')
                if clean_app and clean_app.name:
                    # 上传清洗文件
                    clean_app_dest = open(os.path.join(path, clean_app.name),
                                          'wb+')
                    for chunks in clean_app.chunks():
                        clean_app_dest.write(chunks)
                    clean_app_file_name = crawler_app_folder + '/' + crawler_id + '/' + clean_app.name
                else:
                    clean_app_file_name = ""

            # 上传爬虫文件
            crawler_app_dest = open(os.path.join(path, crawler_app.name),
                                    'wb+')
            for chunks in crawler_app.chunks():
                crawler_app_dest.write(chunks)

            crawler = Crawler.objects.filter(pk=crawler_id)
            if crawler:
                success = False
                add_message(model, "已经存在同样编码的爬虫", msg_type="error")
            else:
                crawler = Crawler(project_id=project_id,
                                  id=crawler_id,
                                  project_name=project_name,
                                  crawler_name=crawler_name,
                                  crawler_app=crawler_app_folder + '/' +
                                  crawler_id + '/' + crawler_app.name,
                                  clean_app=clean_app_file_name,
                                  clean_parameters=clean_parameters,
                                  need_clean=need_clean,
                                  fields_mapping=field_mapping,
                                  processor_id=processor_id,
                                  processor_name=processor_name,
                                  clean_method=clean_method)
                crawler.save()
        except Exception as e:
            add_message(model, str(e), msg_type="error")
            success = False

        if success is True:
            add_message(model, "新增爬虫成功")
            return crawler_list(request, msg="新增爬虫成功")
        else:
            form["crawler_id"] = crawler_id
            form["crawler_name"] = crawler_name
            form["project"] = project
            form["project_id"] = project_id
            form["project_name"] = project_name
            form["crawler_app"] = crawler_app
            form["clean_app"] = clean_app
            form["clean_parameters"] = clean_parameters
            if need_clean:
                form["need_clean"] = 'on'
            form["field_mapping"] = field_mapping
        model['form'] = form

    elif request.method == 'GET':
        project_id = request.GET.get('project', '')
        if project_id:
            project = Project.objects.get(code=project_id)
            form['project_id'] = project_id
            form['project_name'] = project.project_name
            form['project'] = project_id + ' ---- ' + project.project_name
        model['form'] = form

    return render(request, 'html/crawler_create.html', model)
Beispiel #6
0
def crawler_edit_submit(request, **kwargs):
    model = handle_msg(kwargs)
    crawler_app_folder = "/etc/rhino/crawler_app"
    form = {}
    success = True
    if request.method == 'POST':
        crawler_id = request.POST.get('crawler_id')
        crawler = Crawler.objects.get(id=crawler_id)

        #project = request.POST.get('project')
        #project_id = request.POST.get('project_id')
        #project_name = request.POST.get('project_name')
        crawler_name = request.POST.get('crawler_name', None)
        crawler_app = request.FILES.get('crawler_app', None)
        clean_app = request.FILES.get('clean_app', None)
        clean_parameters = request.POST.get('clean_parameters', None)
        field_mapping = request.POST.get('field_mapping', '')
        field_mapping = field_mapping.replace('\n', '')
        field_mapping = field_mapping.replace('', '')
        nc = "off"
        try:
            nc = request.POST.get('need_clean', None)
        except Exception as e:
            need_clean = False
        if nc and nc == 'on':
            need_clean = True
        else:
            need_clean = False

        try:
            path = crawler_app_folder + '/' + crawler_id
            tasks = Task.objects.filter(crawler_id=crawler_id)
            if crawler_app:  #如果用户重新上传了文件,则更新文件
                # 更新文件, 并更新cralwer类
                original_crawler_app = crawler.crawler_app
                crawler_app_file_name = crawler_app_folder + '/' + crawler_id + '/' + crawler_app.name
                crawler.crawler_app = crawler_app_file_name
                # 删除老的文件
                if original_crawler_app and len(original_crawler_app)\
                        and os.path.isfile(original_crawler_app) and os.path.exists(original_crawler_app):
                    os.remove(original_crawler_app)

                crawler_app_dest = open(os.path.join(path, crawler_app.name),
                                        'wb+')
                for chunks in crawler_app.chunks():
                    crawler_app_dest.write(chunks)

                #同时修改所有的task
                for task in tasks:
                    task.crawler_app = crawler_app_file_name
                    task.save()

            if clean_app:  #如果用户重新上传了清洗文件,则更新文件
                original_clean_app = crawler.clean_app
                # 删除老的文件
                if original_clean_app and len(original_clean_app) and os.path.isfile(original_clean_app)\
                        and os.path.exists(original_clean_app):
                    os.remove(original_clean_app)
                clean_app_file_name = crawler_app_folder + '/' + crawler_id + '/' + clean_app.name
                crawler.clean_app = clean_app_file_name

                clean_app_dest = open(os.path.join(path, clean_app.name),
                                      'wb+')
                # 更新文件, 并更新cralwer类
                for chunks in clean_app.chunks():
                    clean_app_dest.write(chunks)

                # 同时修改所有的task
                for task in tasks:
                    task.clean_app = clean_app_file_name
                    task.save()

            # 同时修改所有的task
            for task in tasks:
                task.clean_parameters = clean_parameters
                task.save()

            crawler.fields_mapping = field_mapping
            crawler.crawler_name = crawler_name
            crawler.clean_parameters = clean_parameters
            crawler.update_time = time.time()
            crawler.save()
        except Exception as e:
            add_message(model, str(e), msg_type="error")
            success = False

        if success is True:
            add_message(model, "修改爬虫成功,新建任务才能生效。")
            return crawler_list(request, msg="修改爬虫成功,新建任务才能生效。")
        else:
            crawler_ser = CrawlerSerializer(crawler, many=False)
            return render(request, 'html/crawler_edit.html',
                          {'form': crawler_ser.data})
    else:
        return crawler_list
Beispiel #7
0
def list_server(request, **kwargs):
    model = handle_msg(kwargs)
    return render(request, 'html/server_list.html', model)
Beispiel #8
0
def create_server(request, **kwargs):
    model = handle_msg(kwargs)
    return render(request, 'html/server_create.html', model)
Beispiel #9
0
def processor_create(request, **kwargs):
    model = handle_msg(kwargs)
    return render(request, 'html/processor_create.html', model)