Ejemplo n.º 1
0
def get_tag_avg_stat_rank(request):
    if not check_dict_required_param(
            request.GET,
        ['tid', 'from', 'to', 'count', 'stat_code', 'min_tag_count']):
        return JsonResponse(response_json(errmsg='缺少参数'))

    type_id = int(request.GET.get('tid'))
    time_from = int(request.GET.get('from'))
    time_to = int(request.GET.get('to'))
    count = int(request.GET.get('count'))
    stat_code = str(request.GET.get('stat_code'))
    min_tag_count = int(request.GET.get('min_tag_count'))

    time_from = date_to_timestamp(time_from)
    time_to_end = date_to_timestamp(time_to) + 24 * 3600

    mongo_settings = settings.DATABASES['default']
    handler = TagHandler(mongo_settings['HOST'], mongo_settings['NAME'],
                         mongo_settings['USER'], mongo_settings['PASSWORD'])

    if not handler.judge_record(type_id, time_from, time_to_end,
                                RecordStatus.Calculated.value):
        return JsonResponse(response_json(errmsg='对应时间内的视频未被全部计算'))

    data = {
        'result':
        handler.get_tag_avg_stat_rank(type_id, time_from, time_to_end, count,
                                      stat_code, min_tag_count)
    }
    return JsonResponse(response_json(data))
Ejemplo n.º 2
0
def start_spider(request, *args, **kwargs):
    if request.method != 'POST':
        return JsonResponse(response_json(errmsg='非POST请求'))

    if not check_dict_required_param(request.POST, ['tid', 'from', 'to']):
        return JsonResponse(response_json(errmsg='缺少参数'))

    type_id = int(request.POST.get('tid'))
    time_from = int(request.POST.get('from'))
    time_to = int(request.POST.get('to'))

    if time_to - time_from >= 300:
        return JsonResponse(response_json(errmsg='受接口限制,时间区间应在1个月以内'))

    try:
        task = task_spider.delay(type_id, time_from, time_to)
    except AlreadyQueued:
        return JsonResponse(response_json(errmsg='仍有爬虫任务执行中'))
    else:
        Tasks.objects.create(task_id=task.id,
                             task_type=TaskType.Scrapy.value,
                             tid=type_id,
                             time_from=time_from,
                             time_to=time_to,
                             time_start=int(time.time()),
                             time_end=-1)
        return JsonResponse(
            response_json({
                'task_id': task.id,
                'task_state': task.state
            }))
Ejemplo n.º 3
0
    def list_by_tag(self, request, *args, **kwargs):
        videos = Videos.objects.none()
        if check_dict_required_param(request.GET,
                                     ['tid', 'tag_pubdate', 'tag_name']):
            tag = Tags.objects.filter(name=request.GET.get('tag_name')).first()
            if tag is not None:
                videotag = VideoTag.objects.filter(
                    tid=request.GET.get('tid'),
                    pubdate=request.GET.get('tag_pubdate'),
                    tag_id=tag._id).first()
                if videotag is not None:
                    # videos = Videos.objects.filter(videotag.aids, field_name='aid')
                    videos = Videos.objects.filter(aid__in=videotag.aids)

        page = self.paginate_queryset(videos)
        if page is not None:
            serializer = self.get_serializer(page, many=True)
            return self.get_paginated_response(serializer.data)

        serializer = self.get_serializer(videos, many=True)
        return Response(serializer.data)
Ejemplo n.º 4
0
def get_task_list(request, *args, **kwargs):
    """
    返回的json实例:
        {
            'task_id': 'bbb2ffb3-0351-4277-a248-b7cd4ccbc7d9',
            'task_type': 0,  # 0为爬虫,1为处理器
            'tid': 17,
            'time_from': 20160101,
            'time_to': 20160101,
            'time_start': 1585321895,
            'time_end': -1,
            'state': 'PROGRESS',
            'progress': [
                {
                    'cur': 100,
                    'total': 7000
                },
                {
                    'cur': 0,
                    'total': 0
                }
            ],
            'extra': {}
        }
    """
    if not check_dict_required_param(request.GET, ['pageIndex', 'pageSize']):
        return JsonResponse(response_json(errmsg='缺少参数'))

    pageIndex = int(request.GET.get('pageIndex'))
    pageSize = int(request.GET.get('pageSize'))

    queryset = Tasks.objects.all().order_by('-time_start')
    task_records = Paginator(queryset, pageSize).page(pageIndex)

    result = []
    for task_record in task_records:
        celery_task = AsyncResult(task_record.task_id)
        item = {
            'task_id': task_record.task_id,
            'task_type': task_record.task_type,
            'tid': task_record.tid,
            'time_from': task_record.time_from,
            'time_to': task_record.time_to,
            'time_start': task_record.time_start,
            'time_end': task_record.time_end,
            'state': celery_task.state,
            'progress': [],
            'extra': {},
        }

        if celery_task.state == 'PROGRESS' or celery_task.state == 'SUCCESS':
            if (task_record.task_type == TaskType.Scrapy.value):
                item['progress'].append({
                    'cur':
                    celery_task.result[ScrapyField.VideoCur.value],
                    'total':
                    celery_task.result[ScrapyField.VideoTotal.value],
                })
            elif (task_record.task_type == TaskType.Handler.value):
                item['progress'].append({
                    'cur':
                    celery_task.result[HandlerField.HandleCur.value],
                    'total':
                    celery_task.result[HandlerField.HandleTotal.value],
                })
                item['progress'].append({
                    'cur':
                    celery_task.result[HandlerField.CalCur.value],
                    'total':
                    celery_task.result[HandlerField.CalTotal.value],
                })
                item['extra']['status'] = celery_task.result[
                    HandlerField.Status.value]

                if item['state'] == 'SUCCESS' and item['extra'][
                        'status'] != HandlerErrcode.Success.value:
                    # 业务层面修改state
                    item['state'] = 'ERROR'
        result.append(item)
    return JsonResponse(
        response_json({
            'count': Tasks.objects.count(),
            'results': result
        }))