Exemple #1
0
def apply_ordering(queryset, ordering, only_undefined_field=False):
    if ordering:
        preprocess_field_name = load_func(settings.PREPROCESS_FIELD_NAME)
        field_name, ascending = preprocess_field_name(
            ordering[0], only_undefined_field=only_undefined_field)

        if field_name.startswith('data__'):
            # annotate task with data field for float/int/bool ordering support
            json_field = field_name.replace('data__', '')
            queryset = queryset.annotate(
                ordering_field=KeyTextTransform(json_field, 'data'))
            f = F('ordering_field').asc(
                nulls_last=True) if ascending else F('ordering_field').desc(
                    nulls_last=True)

        else:
            f = F(field_name).asc(
                nulls_last=True) if ascending else F(field_name).desc(
                    nulls_last=True)

        queryset = queryset.order_by(f)
    else:
        queryset = queryset.order_by("id")

    return queryset
Exemple #2
0
def get_storage_classes(storage_type='import'):
    """Helper function to return all registered ***ImportStorage classes.
    It's been made through the APIViews rather than using models directly to make it consistent with what we expose
    storage_type: import, export
    """
    storage_list = load_func(settings.GET_STORAGE_LIST)
    storage_classes = []
    for storage_decl in storage_list():
        storage_api_class = storage_decl[f'{storage_type}_list_api']
        storage_classes.append(storage_api_class.serializer_class.Meta.model)
    return storage_classes
Exemple #3
0
def get_fields_for_evaluation(prepare_params, request):
    """ Collecting field names to annotate them

    :param prepare_params: structure with filters and ordering
    :param request: django request
    :return: list of field names
    """
    from tasks.models import Task
    from projects.models import Project

    result = []
    # collect fields from ordering
    if prepare_params.ordering:
        ordering_field_name = prepare_params.ordering[0].replace("tasks:", "").replace("-", "")
        result.append(ordering_field_name)

    # collect fields from filters
    if prepare_params.filters:
        for _filter in prepare_params.filters.items:
            filter_field_name = _filter.filter.replace("filter:tasks:", "")
            result.append(filter_field_name)

    # visible fields calculation
    fields = prepare_params.data.get('hiddenColumns', None)
    if fields:
        from label_studio.data_manager.functions import TASKS
        GET_ALL_COLUMNS = load_func(settings.DATA_MANAGER_GET_ALL_COLUMNS)
        # we need to have a request here to detect user role
        all_columns = GET_ALL_COLUMNS(request, Project.objects.get(id=prepare_params.project))
        all_columns = set([TASKS + ('data.' if c.get('parent', None) == 'data' else '') + c['id']
                           for c in all_columns['columns']])
        hidden = set(fields['explore']) & set(fields['labeling'])
        shown = all_columns - hidden
        shown = {c[len(TASKS):] for c in shown} - {'data'}  # remove tasks:
        result = set(result) | shown

    # remove duplicates
    result = set(result)

    # we don't need to annotate regular model fields, so we skip them
    skipped_fields = [field.attname for field in Task._meta.fields]
    skipped_fields.append("id")
    result = [f for f in result if f not in skipped_fields]
    result = [f for f in result if not f.startswith("data.")]

    return result
Exemple #4
0
def get_fields_for_evaluation(prepare_params, user):
    """ Collecting field names to annotate them

    :param prepare_params: structure with filters and ordering
    :param user: user
    :return: list of field names
    """
    from tasks.models import Task
    from projects.models import Project

    result = []
    result += get_fields_for_filter_ordering(prepare_params)

    # visible fields calculation
    fields = prepare_params.data.get('hiddenColumns', None)
    if fields:
        from label_studio.data_manager.functions import TASKS
        GET_ALL_COLUMNS = load_func(settings.DATA_MANAGER_GET_ALL_COLUMNS)
        all_columns = GET_ALL_COLUMNS(
            Project.objects.get(id=prepare_params.project), user)
        all_columns = set([
            TASKS + ('data.' if c.get('parent', None) == 'data' else '') +
            c['id'] for c in all_columns['columns']
        ])
        hidden = set(fields['explore']) & set(fields['labeling'])
        shown = all_columns - hidden
        shown = {c[len(TASKS):] for c in shown} - {'data'}  # remove tasks:
        result = set(result) | shown

    # remove duplicates
    result = set(result)

    # we don't need to annotate regular model fields, so we skip them
    skipped_fields = [field.attname for field in Task._meta.fields]
    skipped_fields.append("id")
    result = [f for f in result if f not in skipped_fields]
    result = [f for f in result if not f.startswith("data.")]

    return result
Exemple #5
0
import logging

from django.conf import settings
from rest_framework import generics
from rest_framework.views import APIView
from core.permissions import all_permissions
from rest_framework.parsers import FormParser, JSONParser, MultiPartParser
from rest_framework.response import Response

from label_studio.core.utils.common import load_func
from .localfiles.api import LocalFilesImportStorageListAPI, LocalFilesExportStorageListAPI

logger = logging.getLogger(__name__)
# TODO: replace hardcoded apps lists with search over included storage apps

get_storage_list = load_func(settings.GET_STORAGE_LIST)


def _get_common_storage_list():
    storage_list = get_storage_list()
    if settings.ENABLE_LOCAL_FILES_STORAGE:
        storage_list += [{
            'name': 'localfiles',
            'title': 'Local files',
            'import_list_api': LocalFilesImportStorageListAPI,
            'export_list_api': LocalFilesExportStorageListAPI
        }]

    return storage_list

Exemple #6
0
def apply_filters(queryset, filters, only_undefined_field=False):
    if not filters:
        return queryset

    # convert conjunction to orm statement
    filter_expressions = []

    for _filter in filters.items:

        # we can also have annotations filters
        if not _filter.filter.startswith(
                "filter:tasks:") or _filter.value is None:
            continue

        # django orm loop expression attached to column name
        preprocess_field_name = load_func(settings.PREPROCESS_FIELD_NAME)
        field_name, _ = preprocess_field_name(_filter.filter,
                                              only_undefined_field)

        # filter preprocessing, value type conversion, etc..
        preprocess_filter = load_func(settings.DATA_MANAGER_PREPROCESS_FILTER)
        _filter = preprocess_filter(_filter, field_name)

        # custom expressions for enterprise
        custom_filter_expressions = load_func(
            settings.DATA_MANAGER_CUSTOM_FILTER_EXPRESSIONS)
        filter_expression = custom_filter_expressions(_filter, field_name)
        if filter_expression:
            filter_expressions.append(filter_expression)
            continue

        # annotators
        if field_name == 'annotators' and _filter.operator == Operator.CONTAINS:
            filter_expressions.append(
                Q(annotations__completed_by=int(_filter.value)))
            continue
        elif field_name == 'annotators' and _filter.operator == Operator.NOT_CONTAINS:
            filter_expressions.append(~Q(
                annotations__completed_by=int(_filter.value)))
            continue
        elif field_name == 'annotators' and _filter.operator == Operator.EMPTY:
            value = cast_bool_from_str(_filter.value)
            filter_expressions.append(
                Q(annotations__completed_by__isnull=value))
            continue

        # annotations results & predictions results
        if field_name in ['annotations_results', 'predictions_results']:
            name = 'annotations__result' if field_name == 'annotations_results' else 'predictions__result'
            if _filter.operator in [Operator.EQUAL, Operator.NOT_EQUAL]:
                try:
                    value = json.loads(_filter.value)
                except:
                    return queryset.none()

                q = Q(**{name: value})
                filter_expressions.append(q if _filter.operator ==
                                          Operator.EQUAL else ~q)
                continue
            elif _filter.operator == Operator.CONTAINS:
                filter_expressions.append(
                    Q(**{name + '__icontains': _filter.value}))
                continue
            elif _filter.operator == Operator.NOT_CONTAINS:
                filter_expressions.append(~Q(
                    **{name + '__icontains': _filter.value}))
                continue

        # annotation ids
        if field_name == 'annotations_ids':
            field_name = 'annotations__id'
            if 'contains' in _filter.operator:
                # convert string like "1 2,3" => [1,2,3]
                _filter.value = [
                    int(value) for value in re.split(',|;| ', _filter.value)
                    if value and value.isdigit()
                ]
                _filter.operator = 'in_list' if _filter.operator == 'contains' else 'not_in_list'
            elif 'equal' in _filter.operator:
                if not _filter.value.isdigit():
                    _filter.value = 0

        # annotators
        if field_name == 'annotators' and _filter.operator == Operator.CONTAINS:
            filter_expressions.append(
                Q(annotations__completed_by=int(_filter.value)))
            continue
        elif field_name == 'annotators' and _filter.operator == Operator.NOT_CONTAINS:
            filter_expressions.append(~Q(
                annotations__completed_by=int(_filter.value)))
            continue
        elif field_name == 'annotators' and _filter.operator == Operator.EMPTY:
            value = cast_bool_from_str(_filter.value)
            filter_expressions.append(
                Q(annotations__completed_by__isnull=value))
            continue

        # predictions model versions
        if field_name == 'predictions_model_versions' and _filter.operator == Operator.CONTAINS:
            q = Q()
            for value in _filter.value:
                q |= Q(predictions__model_version__contains=value)
            filter_expressions.append(q)
            continue
        elif field_name == 'predictions_model_versions' and _filter.operator == Operator.NOT_CONTAINS:
            q = Q()
            for value in _filter.value:
                q &= ~Q(predictions__model_version__contains=value)
            filter_expressions.append(q)
            continue
        elif field_name == 'predictions_model_versions' and _filter.operator == Operator.EMPTY:
            value = cast_bool_from_str(_filter.value)
            filter_expressions.append(
                Q(predictions__model_version__isnull=value))
            continue

        # use other name because of model names conflict
        if field_name == 'file_upload':
            field_name = 'file_upload_field'

        # annotate with cast to number if need
        if _filter.type == 'Number' and field_name.startswith('data__'):
            json_field = field_name.replace('data__', '')
            queryset = queryset.annotate(
                **{
                    f'filter_{json_field.replace("$undefined$", "undefined")}':
                    Cast(KeyTextTransform(json_field, 'data'),
                         output_field=FloatField())
                })
            clean_field_name = f'filter_{json_field.replace("$undefined$", "undefined")}'
        else:
            clean_field_name = field_name

        # special case: predictions, annotations, cancelled --- for them 0 is equal to is_empty=True
        if clean_field_name in ('total_predictions', 'total_annotations', 'cancelled_annotations') and \
                _filter.operator == 'empty':
            _filter.operator = 'equal' if cast_bool_from_str(
                _filter.value) else 'not_equal'
            _filter.value = 0

        # get type of annotated field
        value_type = 'str'
        if queryset.exists():
            value_type = type(queryset.values_list(field_name,
                                                   flat=True)[0]).__name__

        if (value_type == 'list'
                or value_type == 'tuple') and 'equal' in _filter.operator:
            raise Exception('Not supported filter type')

        # special case: for strings empty is "" or null=True
        if _filter.type in ('String',
                            'Unknown') and _filter.operator == 'empty':
            value = cast_bool_from_str(_filter.value)
            if value:  # empty = true
                q = Q(
                    Q(**{field_name: None})
                    | Q(**{field_name + '__isnull': True}))
                if value_type == 'str':
                    q |= Q(**{field_name: ''})
                if value_type == 'list':
                    q = Q(**{field_name: [None]})

            else:  # empty = false
                q = Q(~Q(**{field_name: None})
                      & ~Q(**{field_name + '__isnull': True}))
                if value_type == 'str':
                    q &= ~Q(**{field_name: ''})
                if value_type == 'list':
                    q = ~Q(**{field_name: [None]})

            filter_expressions.append(q)
            continue

        # regex pattern check
        elif _filter.operator == 'regex':
            try:
                re.compile(pattern=str(_filter.value))
            except Exception as e:
                logger.info('Incorrect regex for filter: %s: %s',
                            _filter.value, str(e))
                return queryset.none()

        # append operator
        field_name = f"{clean_field_name}{operators.get(_filter.operator, '')}"

        # in
        if _filter.operator == "in":
            cast_value(_filter)
            filter_expressions.append(
                Q(
                    **{
                        f"{field_name}__gte": _filter.value.min,
                        f"{field_name}__lte": _filter.value.max,
                    }), )

        # not in
        elif _filter.operator == "not_in":
            cast_value(_filter)
            filter_expressions.append(
                ~Q(
                    **{
                        f"{field_name}__gte": _filter.value.min,
                        f"{field_name}__lte": _filter.value.max,
                    }), )

        # in list
        elif _filter.operator == "in_list":
            filter_expressions.append(
                Q(**{f"{field_name}__in": _filter.value}), )

        # not in list
        elif _filter.operator == "not_in_list":
            filter_expressions.append(
                ~Q(**{f"{field_name}__in": _filter.value}), )

        # empty
        elif _filter.operator == 'empty':
            if cast_bool_from_str(_filter.value):
                filter_expressions.append(Q(**{field_name: True}))
            else:
                filter_expressions.append(~Q(**{field_name: True}))

        # starting from not_
        elif _filter.operator.startswith("not_"):
            cast_value(_filter)
            filter_expressions.append(~Q(**{field_name: _filter.value}))

        # all others
        else:
            cast_value(_filter)
            filter_expressions.append(Q(**{field_name: _filter.value}))

    logger.debug(f'Apply filter: {filter_expressions}')
    if filters.conjunction == ConjunctionEnum.OR:
        result_filter = Q()
        for filter_expression in filter_expressions:
            result_filter.add(filter_expression, Q.OR)
        queryset = queryset.filter(result_filter)
    else:
        for filter_expression in filter_expressions:
            queryset = queryset.filter(filter_expression)
    return queryset