def apply_ordering(queryset, ordering, only_undefined_field=False): if ordering: preprocess_field_name = load_func(settings.PREPROCESS_FIELD_NAME) field_name, ascending = preprocess_field_name( ordering[0], only_undefined_field=only_undefined_field) if field_name.startswith('data__'): # annotate task with data field for float/int/bool ordering support json_field = field_name.replace('data__', '') queryset = queryset.annotate( ordering_field=KeyTextTransform(json_field, 'data')) f = F('ordering_field').asc( nulls_last=True) if ascending else F('ordering_field').desc( nulls_last=True) else: f = F(field_name).asc( nulls_last=True) if ascending else F(field_name).desc( nulls_last=True) queryset = queryset.order_by(f) else: queryset = queryset.order_by("id") return queryset
def get_storage_classes(storage_type='import'): """Helper function to return all registered ***ImportStorage classes. It's been made through the APIViews rather than using models directly to make it consistent with what we expose storage_type: import, export """ storage_list = load_func(settings.GET_STORAGE_LIST) storage_classes = [] for storage_decl in storage_list(): storage_api_class = storage_decl[f'{storage_type}_list_api'] storage_classes.append(storage_api_class.serializer_class.Meta.model) return storage_classes
def get_fields_for_evaluation(prepare_params, request): """ Collecting field names to annotate them :param prepare_params: structure with filters and ordering :param request: django request :return: list of field names """ from tasks.models import Task from projects.models import Project result = [] # collect fields from ordering if prepare_params.ordering: ordering_field_name = prepare_params.ordering[0].replace("tasks:", "").replace("-", "") result.append(ordering_field_name) # collect fields from filters if prepare_params.filters: for _filter in prepare_params.filters.items: filter_field_name = _filter.filter.replace("filter:tasks:", "") result.append(filter_field_name) # visible fields calculation fields = prepare_params.data.get('hiddenColumns', None) if fields: from label_studio.data_manager.functions import TASKS GET_ALL_COLUMNS = load_func(settings.DATA_MANAGER_GET_ALL_COLUMNS) # we need to have a request here to detect user role all_columns = GET_ALL_COLUMNS(request, Project.objects.get(id=prepare_params.project)) all_columns = set([TASKS + ('data.' if c.get('parent', None) == 'data' else '') + c['id'] for c in all_columns['columns']]) hidden = set(fields['explore']) & set(fields['labeling']) shown = all_columns - hidden shown = {c[len(TASKS):] for c in shown} - {'data'} # remove tasks: result = set(result) | shown # remove duplicates result = set(result) # we don't need to annotate regular model fields, so we skip them skipped_fields = [field.attname for field in Task._meta.fields] skipped_fields.append("id") result = [f for f in result if f not in skipped_fields] result = [f for f in result if not f.startswith("data.")] return result
def get_fields_for_evaluation(prepare_params, user): """ Collecting field names to annotate them :param prepare_params: structure with filters and ordering :param user: user :return: list of field names """ from tasks.models import Task from projects.models import Project result = [] result += get_fields_for_filter_ordering(prepare_params) # visible fields calculation fields = prepare_params.data.get('hiddenColumns', None) if fields: from label_studio.data_manager.functions import TASKS GET_ALL_COLUMNS = load_func(settings.DATA_MANAGER_GET_ALL_COLUMNS) all_columns = GET_ALL_COLUMNS( Project.objects.get(id=prepare_params.project), user) all_columns = set([ TASKS + ('data.' if c.get('parent', None) == 'data' else '') + c['id'] for c in all_columns['columns'] ]) hidden = set(fields['explore']) & set(fields['labeling']) shown = all_columns - hidden shown = {c[len(TASKS):] for c in shown} - {'data'} # remove tasks: result = set(result) | shown # remove duplicates result = set(result) # we don't need to annotate regular model fields, so we skip them skipped_fields = [field.attname for field in Task._meta.fields] skipped_fields.append("id") result = [f for f in result if f not in skipped_fields] result = [f for f in result if not f.startswith("data.")] return result
import logging from django.conf import settings from rest_framework import generics from rest_framework.views import APIView from core.permissions import all_permissions from rest_framework.parsers import FormParser, JSONParser, MultiPartParser from rest_framework.response import Response from label_studio.core.utils.common import load_func from .localfiles.api import LocalFilesImportStorageListAPI, LocalFilesExportStorageListAPI logger = logging.getLogger(__name__) # TODO: replace hardcoded apps lists with search over included storage apps get_storage_list = load_func(settings.GET_STORAGE_LIST) def _get_common_storage_list(): storage_list = get_storage_list() if settings.ENABLE_LOCAL_FILES_STORAGE: storage_list += [{ 'name': 'localfiles', 'title': 'Local files', 'import_list_api': LocalFilesImportStorageListAPI, 'export_list_api': LocalFilesExportStorageListAPI }] return storage_list
def apply_filters(queryset, filters, only_undefined_field=False): if not filters: return queryset # convert conjunction to orm statement filter_expressions = [] for _filter in filters.items: # we can also have annotations filters if not _filter.filter.startswith( "filter:tasks:") or _filter.value is None: continue # django orm loop expression attached to column name preprocess_field_name = load_func(settings.PREPROCESS_FIELD_NAME) field_name, _ = preprocess_field_name(_filter.filter, only_undefined_field) # filter preprocessing, value type conversion, etc.. preprocess_filter = load_func(settings.DATA_MANAGER_PREPROCESS_FILTER) _filter = preprocess_filter(_filter, field_name) # custom expressions for enterprise custom_filter_expressions = load_func( settings.DATA_MANAGER_CUSTOM_FILTER_EXPRESSIONS) filter_expression = custom_filter_expressions(_filter, field_name) if filter_expression: filter_expressions.append(filter_expression) continue # annotators if field_name == 'annotators' and _filter.operator == Operator.CONTAINS: filter_expressions.append( Q(annotations__completed_by=int(_filter.value))) continue elif field_name == 'annotators' and _filter.operator == Operator.NOT_CONTAINS: filter_expressions.append(~Q( annotations__completed_by=int(_filter.value))) continue elif field_name == 'annotators' and _filter.operator == Operator.EMPTY: value = cast_bool_from_str(_filter.value) filter_expressions.append( Q(annotations__completed_by__isnull=value)) continue # annotations results & predictions results if field_name in ['annotations_results', 'predictions_results']: name = 'annotations__result' if field_name == 'annotations_results' else 'predictions__result' if _filter.operator in [Operator.EQUAL, Operator.NOT_EQUAL]: try: value = json.loads(_filter.value) except: return queryset.none() q = Q(**{name: value}) filter_expressions.append(q if _filter.operator == Operator.EQUAL else ~q) continue elif _filter.operator == Operator.CONTAINS: filter_expressions.append( Q(**{name + '__icontains': _filter.value})) continue elif _filter.operator == Operator.NOT_CONTAINS: filter_expressions.append(~Q( **{name + '__icontains': _filter.value})) continue # annotation ids if field_name == 'annotations_ids': field_name = 'annotations__id' if 'contains' in _filter.operator: # convert string like "1 2,3" => [1,2,3] _filter.value = [ int(value) for value in re.split(',|;| ', _filter.value) if value and value.isdigit() ] _filter.operator = 'in_list' if _filter.operator == 'contains' else 'not_in_list' elif 'equal' in _filter.operator: if not _filter.value.isdigit(): _filter.value = 0 # annotators if field_name == 'annotators' and _filter.operator == Operator.CONTAINS: filter_expressions.append( Q(annotations__completed_by=int(_filter.value))) continue elif field_name == 'annotators' and _filter.operator == Operator.NOT_CONTAINS: filter_expressions.append(~Q( annotations__completed_by=int(_filter.value))) continue elif field_name == 'annotators' and _filter.operator == Operator.EMPTY: value = cast_bool_from_str(_filter.value) filter_expressions.append( Q(annotations__completed_by__isnull=value)) continue # predictions model versions if field_name == 'predictions_model_versions' and _filter.operator == Operator.CONTAINS: q = Q() for value in _filter.value: q |= Q(predictions__model_version__contains=value) filter_expressions.append(q) continue elif field_name == 'predictions_model_versions' and _filter.operator == Operator.NOT_CONTAINS: q = Q() for value in _filter.value: q &= ~Q(predictions__model_version__contains=value) filter_expressions.append(q) continue elif field_name == 'predictions_model_versions' and _filter.operator == Operator.EMPTY: value = cast_bool_from_str(_filter.value) filter_expressions.append( Q(predictions__model_version__isnull=value)) continue # use other name because of model names conflict if field_name == 'file_upload': field_name = 'file_upload_field' # annotate with cast to number if need if _filter.type == 'Number' and field_name.startswith('data__'): json_field = field_name.replace('data__', '') queryset = queryset.annotate( **{ f'filter_{json_field.replace("$undefined$", "undefined")}': Cast(KeyTextTransform(json_field, 'data'), output_field=FloatField()) }) clean_field_name = f'filter_{json_field.replace("$undefined$", "undefined")}' else: clean_field_name = field_name # special case: predictions, annotations, cancelled --- for them 0 is equal to is_empty=True if clean_field_name in ('total_predictions', 'total_annotations', 'cancelled_annotations') and \ _filter.operator == 'empty': _filter.operator = 'equal' if cast_bool_from_str( _filter.value) else 'not_equal' _filter.value = 0 # get type of annotated field value_type = 'str' if queryset.exists(): value_type = type(queryset.values_list(field_name, flat=True)[0]).__name__ if (value_type == 'list' or value_type == 'tuple') and 'equal' in _filter.operator: raise Exception('Not supported filter type') # special case: for strings empty is "" or null=True if _filter.type in ('String', 'Unknown') and _filter.operator == 'empty': value = cast_bool_from_str(_filter.value) if value: # empty = true q = Q( Q(**{field_name: None}) | Q(**{field_name + '__isnull': True})) if value_type == 'str': q |= Q(**{field_name: ''}) if value_type == 'list': q = Q(**{field_name: [None]}) else: # empty = false q = Q(~Q(**{field_name: None}) & ~Q(**{field_name + '__isnull': True})) if value_type == 'str': q &= ~Q(**{field_name: ''}) if value_type == 'list': q = ~Q(**{field_name: [None]}) filter_expressions.append(q) continue # regex pattern check elif _filter.operator == 'regex': try: re.compile(pattern=str(_filter.value)) except Exception as e: logger.info('Incorrect regex for filter: %s: %s', _filter.value, str(e)) return queryset.none() # append operator field_name = f"{clean_field_name}{operators.get(_filter.operator, '')}" # in if _filter.operator == "in": cast_value(_filter) filter_expressions.append( Q( **{ f"{field_name}__gte": _filter.value.min, f"{field_name}__lte": _filter.value.max, }), ) # not in elif _filter.operator == "not_in": cast_value(_filter) filter_expressions.append( ~Q( **{ f"{field_name}__gte": _filter.value.min, f"{field_name}__lte": _filter.value.max, }), ) # in list elif _filter.operator == "in_list": filter_expressions.append( Q(**{f"{field_name}__in": _filter.value}), ) # not in list elif _filter.operator == "not_in_list": filter_expressions.append( ~Q(**{f"{field_name}__in": _filter.value}), ) # empty elif _filter.operator == 'empty': if cast_bool_from_str(_filter.value): filter_expressions.append(Q(**{field_name: True})) else: filter_expressions.append(~Q(**{field_name: True})) # starting from not_ elif _filter.operator.startswith("not_"): cast_value(_filter) filter_expressions.append(~Q(**{field_name: _filter.value})) # all others else: cast_value(_filter) filter_expressions.append(Q(**{field_name: _filter.value})) logger.debug(f'Apply filter: {filter_expressions}') if filters.conjunction == ConjunctionEnum.OR: result_filter = Q() for filter_expression in filter_expressions: result_filter.add(filter_expression, Q.OR) queryset = queryset.filter(result_filter) else: for filter_expression in filter_expressions: queryset = queryset.filter(filter_expression) return queryset