def get_export_data(self, task_filter_options=None, annotation_filter_options=None, serialization_options=None): """ serialization_options: None or Dict({ drafts: optional None or Dict({ only_id: true/false }) predictions: optional None or Dict({ only_id: true/false }) annotations__completed_by: optional None or Dict({ only_id: true/false }) }) """ from .serializers import ExportDataSerializer logger.debug('Run get_task_queryset') with transaction.atomic(): # TODO: make counters from queryset # counters = Project.objects.with_counts().filter(id=self.project.id)[0].get_counters() self.counters = {'task_number': 0} result = [] all_tasks = self.project.tasks logger.debug('Tasks filtration') task_ids = ( self._get_filtered_tasks(all_tasks, task_filter_options=task_filter_options) .distinct() .values_list('id', flat=True) ) base_export_serializer_option = self._get_export_serializer_option(serialization_options) i = 0 BATCH_SIZE = 1000 for ids in batch(task_ids, BATCH_SIZE): i += 1 tasks = list(self.get_task_queryset(ids, annotation_filter_options)) logger.debug(f'Batch: {i*BATCH_SIZE}') if isinstance(task_filter_options, dict) and task_filter_options.get('only_with_annotations'): tasks = [task for task in tasks if task.annotations.exists()] serializer = ExportDataSerializer(tasks, many=True, **base_export_serializer_option) self.counters['task_number'] += len(tasks) for task in serializer.data: yield task
def get(self, request, *args, **kwargs): project = self.get_object() query_serializer = ExportParamSerializer(data=request.GET) query_serializer.is_valid(raise_exception=True) export_type = query_serializer.validated_data.get( 'exportType') or query_serializer.validated_data['export_type'] only_finished = not query_serializer.validated_data[ 'download_all_tasks'] download_resources = query_serializer.validated_data[ 'download_resources'] interpolate_key_frames = query_serializer.validated_data[ 'interpolate_key_frames'] tasks_ids = request.GET.getlist('ids[]') logger.debug('Get tasks') query = Task.objects.filter(project=project) if tasks_ids and len(tasks_ids) > 0: logger.debug(f'Select only subset of {len(tasks_ids)} tasks') query = query.filter(id__in=tasks_ids) if only_finished: query = query.filter(annotations__isnull=False).distinct() task_ids = query.values_list('id', flat=True) logger.debug('Serialize tasks for export') tasks = [] for _task_ids in batch(task_ids, 1000): tasks += ExportDataSerializer(self.get_task_queryset( query.filter(id__in=_task_ids)), many=True, expand=['drafts'], context={ 'interpolate_key_frames': interpolate_key_frames }).data logger.debug('Prepare export files') export_stream, content_type, filename = DataExport.generate_export_file( project, tasks, export_type, download_resources, request.GET) response = HttpResponse(File(export_stream), content_type=content_type) response[ 'Content-Disposition'] = 'attachment; filename="%s"' % filename response['filename'] = filename return response
def get(self, request, *args, **kwargs): project = self.get_object() export_type = (request.GET.get('exportType', 'JSON') if 'exportType' in request.GET else request.GET.get( 'export_type', 'JSON')) only_finished = not bool_from_request(request.GET, 'download_all_tasks', False) tasks_ids = request.GET.getlist('ids[]') if 'download_resources' in request.GET: download_resources = bool_from_request(request.GET, 'download_resources', True) else: download_resources = settings.CONVERTER_DOWNLOAD_RESOURCES logger.debug('Get tasks') tasks = Task.objects.filter(project=project) if tasks_ids and len(tasks_ids) > 0: logger.debug(f'Select only subset of {len(tasks_ids)} tasks') tasks = tasks.filter(id__in=tasks_ids) query = tasks.select_related('project').prefetch_related( 'annotations', 'predictions') if only_finished: query = query.filter(annotations__isnull=False).distinct() task_ids = query.values_list('id', flat=True) logger.debug('Serialize tasks for export') tasks = [] for _task_ids in batch(task_ids, 1000): tasks += ExportDataSerializer(query.filter(id__in=_task_ids), many=True, expand=['drafts']).data logger.debug('Prepare export files') export_stream, content_type, filename = DataExport.generate_export_file( project, tasks, export_type, download_resources, request.GET) response = HttpResponse(File(export_stream), content_type=content_type) response[ 'Content-Disposition'] = 'attachment; filename="%s"' % filename response['filename'] = filename return response
def get_export_data(self): from .serializers import ExportDataSerializer with transaction.atomic(): counters = Project.objects.with_counts().filter( id=self.project.id)[0].get_counters() tasks = self.project.tasks.select_related( 'project').prefetch_related('annotations', 'predictions') if self.task_ids: tasks = tasks.filter(id__in=self.task_ids) if self.only_finished: tasks = tasks.filter(annotations__isnull=False).distinct() task_ids = list(tasks.values_list('id', flat=True)) logger.debug('Serialize tasks for export') result = [] for _task_ids in batch(task_ids, 1000): result += ExportDataSerializer(tasks.filter(id__in=_task_ids), many=True).data return result, counters
def get(self, request, *args, **kwargs): project = self.get_object() export_type = request.GET.get('exportType') only_finished = not bool_from_request(request.GET, 'download_all_tasks', False) logger.debug('Get tasks') query = Task.objects.filter(project=project).select_related('project').prefetch_related('annotations', 'predictions') if only_finished: query = query.filter(annotations__isnull=False).distinct() task_ids = query.values_list('id', flat=True) logger.debug('Serialize tasks for export') tasks = [] for _task_ids in batch(task_ids, 1000): tasks += ExportDataSerializer(query.filter(id__in=_task_ids), many=True).data logger.debug('Prepare export files') export_stream, content_type, filename = DataExport.generate_export_file(project, tasks, export_type, request.GET) response = HttpResponse(File(export_stream), content_type=content_type) response['Content-Disposition'] = 'attachment; filename="%s"' % filename response['filename'] = filename return response
def get_export_data(self, task_filter_options=None, annotation_filter_options=None, serialization_options=None): """ serialization_options: None or Dict({ drafts: optional None or Dict({ only_id: true/false }) predictions: optional None or Dict({ only_id: true/false }) annotations__completed_by: optional None or Dict({ only_id: true/false }) }) """ with transaction.atomic(): # TODO: make counters from queryset # counters = Project.objects.with_counts().filter(id=self.project.id)[0].get_counters() counters = {'task_number': 0} result = [] all_tasks = self.project.tasks.select_related('project').prefetch_related( 'annotations', 'predictions', 'drafts' ) logger.debug('Tasks filtration') task_ids = ( self._get_filtered_tasks(all_tasks, task_filter_options=task_filter_options) .distinct() .values_list('id', flat=True) ) base_export_serializer_option = self._get_export_serializer_option(serialization_options) i = 0 BATCH_SIZE = 1000 serializer_class = self.get_serializer_class() annotations_qs = self._get_filtered_annotations_queryset( annotation_filter_options=annotation_filter_options ) for ids in batch(task_ids, BATCH_SIZE): i += 1 tasks = list( Task.objects.filter(id__in=ids).prefetch_related( Prefetch( "annotations", queryset=annotations_qs, ) ) ) logger.debug(f'Batch: {i*BATCH_SIZE}') if isinstance(task_filter_options, dict) and task_filter_options.get('only_with_annotations'): tasks = [task for task in tasks if task.annotations.all()] serializer = serializer_class(tasks, many=True, **base_export_serializer_option) result += serializer.data counters['task_number'] = len(result) return result, counters