Python batchの例、core.utils.common.batch Pythonの例

コード例 #1

0

ファイルを表示

    def get_export_data(self, task_filter_options=None, annotation_filter_options=None, serialization_options=None):
        """
        serialization_options: None or Dict({
            drafts: optional
                None
                    or
                Dict({
                    only_id: true/false
                })
            predictions: optional
                None
                    or
                Dict({
                    only_id: true/false
                })
            annotations__completed_by: optional
                None
                    or
                Dict({
                    only_id: true/false
                })
        })
        """
        from .serializers import ExportDataSerializer

        logger.debug('Run get_task_queryset')

        with transaction.atomic():
            # TODO: make counters from queryset
            # counters = Project.objects.with_counts().filter(id=self.project.id)[0].get_counters()
            self.counters = {'task_number': 0}
            result = []
            all_tasks = self.project.tasks
            logger.debug('Tasks filtration')
            task_ids = (
                self._get_filtered_tasks(all_tasks, task_filter_options=task_filter_options)
                .distinct()
                .values_list('id', flat=True)
            )
            base_export_serializer_option = self._get_export_serializer_option(serialization_options)
            i = 0
            BATCH_SIZE = 1000
            for ids in batch(task_ids, BATCH_SIZE):
                i += 1
                tasks = list(self.get_task_queryset(ids, annotation_filter_options))
                logger.debug(f'Batch: {i*BATCH_SIZE}')
                if isinstance(task_filter_options, dict) and task_filter_options.get('only_with_annotations'):
                    tasks = [task for task in tasks if task.annotations.exists()]

                serializer = ExportDataSerializer(tasks, many=True, **base_export_serializer_option)
                self.counters['task_number'] += len(tasks)
                for task in serializer.data:
                    yield task

コード例 #2

0

ファイルを表示

ファイル: api.py プロジェクト: stjordanis/label-studio

    def get(self, request, *args, **kwargs):
        project = self.get_object()
        query_serializer = ExportParamSerializer(data=request.GET)
        query_serializer.is_valid(raise_exception=True)

        export_type = query_serializer.validated_data.get(
            'exportType') or query_serializer.validated_data['export_type']
        only_finished = not query_serializer.validated_data[
            'download_all_tasks']
        download_resources = query_serializer.validated_data[
            'download_resources']
        interpolate_key_frames = query_serializer.validated_data[
            'interpolate_key_frames']

        tasks_ids = request.GET.getlist('ids[]')

        logger.debug('Get tasks')
        query = Task.objects.filter(project=project)
        if tasks_ids and len(tasks_ids) > 0:
            logger.debug(f'Select only subset of {len(tasks_ids)} tasks')
            query = query.filter(id__in=tasks_ids)
        if only_finished:
            query = query.filter(annotations__isnull=False).distinct()

        task_ids = query.values_list('id', flat=True)

        logger.debug('Serialize tasks for export')
        tasks = []
        for _task_ids in batch(task_ids, 1000):
            tasks += ExportDataSerializer(self.get_task_queryset(
                query.filter(id__in=_task_ids)),
                                          many=True,
                                          expand=['drafts'],
                                          context={
                                              'interpolate_key_frames':
                                              interpolate_key_frames
                                          }).data
        logger.debug('Prepare export files')

        export_stream, content_type, filename = DataExport.generate_export_file(
            project, tasks, export_type, download_resources, request.GET)

        response = HttpResponse(File(export_stream), content_type=content_type)
        response[
            'Content-Disposition'] = 'attachment; filename="%s"' % filename
        response['filename'] = filename
        return response

コード例 #3

0

ファイルを表示

    def get(self, request, *args, **kwargs):
        project = self.get_object()
        export_type = (request.GET.get('exportType', 'JSON')
                       if 'exportType' in request.GET else request.GET.get(
                           'export_type', 'JSON'))
        only_finished = not bool_from_request(request.GET,
                                              'download_all_tasks', False)
        tasks_ids = request.GET.getlist('ids[]')
        if 'download_resources' in request.GET:
            download_resources = bool_from_request(request.GET,
                                                   'download_resources', True)
        else:
            download_resources = settings.CONVERTER_DOWNLOAD_RESOURCES

        logger.debug('Get tasks')
        tasks = Task.objects.filter(project=project)
        if tasks_ids and len(tasks_ids) > 0:
            logger.debug(f'Select only subset of {len(tasks_ids)} tasks')
            tasks = tasks.filter(id__in=tasks_ids)
        query = tasks.select_related('project').prefetch_related(
            'annotations', 'predictions')
        if only_finished:
            query = query.filter(annotations__isnull=False).distinct()

        task_ids = query.values_list('id', flat=True)

        logger.debug('Serialize tasks for export')
        tasks = []
        for _task_ids in batch(task_ids, 1000):
            tasks += ExportDataSerializer(query.filter(id__in=_task_ids),
                                          many=True,
                                          expand=['drafts']).data
        logger.debug('Prepare export files')

        export_stream, content_type, filename = DataExport.generate_export_file(
            project, tasks, export_type, download_resources, request.GET)

        response = HttpResponse(File(export_stream), content_type=content_type)
        response[
            'Content-Disposition'] = 'attachment; filename="%s"' % filename
        response['filename'] = filename
        return response

コード例 #4

0

ファイルを表示

ファイル: models.py プロジェクト: g-simmons/label-studio-gs

    def get_export_data(self):
        from .serializers import ExportDataSerializer

        with transaction.atomic():
            counters = Project.objects.with_counts().filter(
                id=self.project.id)[0].get_counters()
            tasks = self.project.tasks.select_related(
                'project').prefetch_related('annotations', 'predictions')
            if self.task_ids:
                tasks = tasks.filter(id__in=self.task_ids)
            if self.only_finished:
                tasks = tasks.filter(annotations__isnull=False).distinct()

            task_ids = list(tasks.values_list('id', flat=True))

            logger.debug('Serialize tasks for export')
            result = []
            for _task_ids in batch(task_ids, 1000):
                result += ExportDataSerializer(tasks.filter(id__in=_task_ids),
                                               many=True).data
        return result, counters

コード例 #5

0

ファイルを表示

    def get(self, request, *args, **kwargs):
        project = self.get_object()
        export_type = request.GET.get('exportType')
        only_finished = not bool_from_request(request.GET, 'download_all_tasks', False)

        logger.debug('Get tasks')
        query = Task.objects.filter(project=project).select_related('project').prefetch_related('annotations', 'predictions')
        if only_finished:
            query = query.filter(annotations__isnull=False).distinct()

        task_ids = query.values_list('id', flat=True)

        logger.debug('Serialize tasks for export')
        tasks = []
        for _task_ids in batch(task_ids, 1000):
            tasks += ExportDataSerializer(query.filter(id__in=_task_ids), many=True).data
        logger.debug('Prepare export files')

        export_stream, content_type, filename = DataExport.generate_export_file(project, tasks, export_type, request.GET)

        response = HttpResponse(File(export_stream), content_type=content_type)
        response['Content-Disposition'] = 'attachment; filename="%s"' % filename
        response['filename'] = filename
        return response

コード例 #6

0

ファイルを表示

    def get_export_data(self, task_filter_options=None, annotation_filter_options=None, serialization_options=None):
        """
        serialization_options: None or Dict({
            drafts: optional
                None
                    or
                Dict({
                    only_id: true/false
                })
            predictions: optional
                None
                    or
                Dict({
                    only_id: true/false
                })
            annotations__completed_by: optional
                None
                    or
                Dict({
                    only_id: true/false
                })
        })
        """
        with transaction.atomic():
            # TODO: make counters from queryset
            # counters = Project.objects.with_counts().filter(id=self.project.id)[0].get_counters()
            counters = {'task_number': 0}
            result = []
            all_tasks = self.project.tasks.select_related('project').prefetch_related(
                'annotations', 'predictions', 'drafts'
            )
            logger.debug('Tasks filtration')
            task_ids = (
                self._get_filtered_tasks(all_tasks, task_filter_options=task_filter_options)
                .distinct()
                .values_list('id', flat=True)
            )
            base_export_serializer_option = self._get_export_serializer_option(serialization_options)
            i = 0
            BATCH_SIZE = 1000
            serializer_class = self.get_serializer_class()
            annotations_qs = self._get_filtered_annotations_queryset(
                annotation_filter_options=annotation_filter_options
            )
            for ids in batch(task_ids, BATCH_SIZE):
                i += 1
                tasks = list(
                    Task.objects.filter(id__in=ids).prefetch_related(
                        Prefetch(
                            "annotations",
                            queryset=annotations_qs,
                        )
                    )
                )
                logger.debug(f'Batch: {i*BATCH_SIZE}')
                if isinstance(task_filter_options, dict) and task_filter_options.get('only_with_annotations'):
                    tasks = [task for task in tasks if task.annotations.all()]

                serializer = serializer_class(tasks, many=True, **base_export_serializer_option)
                result += serializer.data

        counters['task_number'] = len(result)
        return result, counters