Esempio n. 1
0
    def validate_config(self, config_string):
        self.validate_label_config(config_string)
        if not hasattr(self, 'summary'):
            return

        # validate data columns consistency
        fields_from_config = get_all_object_tag_names(config_string)
        if not fields_from_config:
            logger.debug(f'Data fields not found in labeling config')
            return
        fields_from_data = set(self.summary.common_data_columns)
        fields_from_data.discard(settings.DATA_UNDEFINED_NAME)
        if fields_from_data and not fields_from_config.issubset(fields_from_data):
            different_fields = list(fields_from_config.difference(fields_from_data))
            raise ValidationError(f'These fields are not present in the data: {",".join(different_fields)}')

        # validate annotations consistency
        annotations_from_config = set(get_all_control_tag_tuples(config_string))
        if not annotations_from_config:
            logger.debug(f'Annotation schema is not found in config')
            return
        annotations_from_data = set(self.summary.created_annotations)
        if annotations_from_data and not annotations_from_data.issubset(annotations_from_config):
            different_annotations = list(annotations_from_data.difference(annotations_from_config))
            diff_str = []
            for ann_tuple in different_annotations:
                from_name, to_name, t = ann_tuple.split('|')
                diff_str.append(
                    f'{self.summary.created_annotations[ann_tuple]} '
                    f'with from_name={from_name}, to_name={to_name}, type={t}')
            diff_str = '\n'.join(diff_str)
            raise ValidationError(f'Created annotations are incompatible with provided labeling schema, '
                                  f'we found:\n{diff_str}')

        # validate labels consistency
        labels_from_config = get_all_labels(config_string)
        created_labels = self.summary.created_labels
        for control_tag_from_data, labels_from_data in created_labels.items():
            # Check if labels created in annotations, and their control tag has been removed
            if labels_from_data and control_tag_from_data not in labels_from_config:
                raise ValidationError(
                    f'There are {sum(labels_from_data.values(), 0)} annotation(s) created with tag '
                    f'"{control_tag_from_data}", you can\'t remove it')
            labels_from_config_by_tag = set(labels_from_config[control_tag_from_data])
            if not set(labels_from_data).issubset(set(labels_from_config_by_tag)):
                different_labels = list(set(labels_from_data).difference(labels_from_config_by_tag))
                diff_str = '\n'.join(f'{l} ({labels_from_data[l]} annotations)' for l in different_labels)
                raise ValidationError(f'These labels still exist in annotations:\n{diff_str}')
Esempio n. 2
0
    def validate_config(self, config_string, strict=False):
        self.validate_label_config(config_string)
        if not hasattr(self, 'summary'):
            return

        if self.num_tasks == 0:
            logger.debug(f'Project {self} has no tasks: nothing to validate here. Ensure project summary is empty')
            self.summary.reset()
            return

        # validate data columns consistency
        fields_from_config = get_all_object_tag_names(config_string)
        if not fields_from_config:
            logger.debug(f'Data fields not found in labeling config')
            return
        fields_from_config = {field.split('[')[0] for field in fields_from_config}  # Repeater tag support
        fields_from_data = set(self.summary.common_data_columns)
        fields_from_data.discard(settings.DATA_UNDEFINED_NAME)
        if fields_from_data and not fields_from_config.issubset(fields_from_data):
            different_fields = list(fields_from_config.difference(fields_from_data))
            raise LabelStudioValidationErrorSentryIgnored(
                f'These fields are not present in the data: {",".join(different_fields)}'
            )

        if self.num_annotations == 0:
            logger.debug(
                f'Project {self} has no annotations: nothing to validate here. '
                f'Ensure annotations-related project summary is empty'
            )
            self.summary.reset(tasks_data_based=False)
            return

        # validate annotations consistency
        annotations_from_config = set(get_all_control_tag_tuples(config_string))
        if not annotations_from_config:
            logger.debug(f'Annotation schema is not found in config')
            return
        annotations_from_data = set(self.summary.created_annotations)
        if annotations_from_data and not annotations_from_data.issubset(annotations_from_config):
            different_annotations = list(annotations_from_data.difference(annotations_from_config))
            diff_str = []
            for ann_tuple in different_annotations:
                from_name, to_name, t = ann_tuple.split('|')
                diff_str.append(
                    f'{self.summary.created_annotations[ann_tuple]} '
                    f'with from_name={from_name}, to_name={to_name}, type={t}'
                )
            diff_str = '\n'.join(diff_str)
            raise LabelStudioValidationErrorSentryIgnored(
                f'Created annotations are incompatible with provided labeling schema, we found:\n{diff_str}'
            )

        # validate labels consistency
        labels_from_config, dynamic_label_from_config = get_all_labels(config_string)
        created_labels = self.summary.created_labels
        for control_tag_from_data, labels_from_data in created_labels.items():
            # Check if labels created in annotations, and their control tag has been removed
            if labels_from_data and ((control_tag_from_data not in labels_from_config) and (
                    control_tag_from_data not in dynamic_label_from_config)):
                raise LabelStudioValidationErrorSentryIgnored(
                    f'There are {sum(labels_from_data.values(), 0)} annotation(s) created with tag '
                    f'"{control_tag_from_data}", you can\'t remove it'
                )
            labels_from_config_by_tag = set(labels_from_config[control_tag_from_data])
            if not set(labels_from_data).issubset(set(labels_from_config_by_tag)):
                different_labels = list(set(labels_from_data).difference(labels_from_config_by_tag))
                diff_str = '\n'.join(f'{l} ({labels_from_data[l]} annotations)' for l in different_labels)
                if (strict is True) and (control_tag_from_data not in dynamic_label_from_config):
                    raise LabelStudioValidationErrorSentryIgnored(
                        f'These labels still exist in annotations:\n{diff_str}')
                else:
                    logger.warning(f'project_id={self.id} inconsistent labels in config and annotations: {diff_str}')