Ejemplo n.º 1
0
    def check_tag_exists_in_schema(self,
                                   original_tag,
                                   check_for_warnings=False):
        """Reports a validation error if the tag provided is not a valid tag or doesn't take a value.

        Parameters
        ----------
        original_tag: HedTag
            The original tag that is used to report the error.
        check_for_warnings: bool
            If True, also check for warnings.
        Returns
        -------
        []
            A validation issues list. If no issues are found then an empty list is returned.
        """
        validation_issues = []
        if original_tag.is_basic_tag() or original_tag.is_takes_value_tag():
            return validation_issues

        is_extension_tag = original_tag.is_extension_allowed_tag()
        if not is_extension_tag:
            validation_issues += ErrorHandler.format_error(
                ValidationErrors.INVALID_EXTENSION, tag=original_tag)
        elif check_for_warnings:
            validation_issues += ErrorHandler.format_error(
                ValidationErrors.HED_TAG_EXTENDED,
                tag=original_tag,
                index_in_tag=len(original_tag.org_base_tag),
                index_in_tag_end=None)
        return validation_issues
Ejemplo n.º 2
0
    def _handle_onset_or_offset(self, def_tag, onset_offset_tag):
        is_onset = onset_offset_tag.short_base_tag.lower(
        ) == DefTagNames.ONSET_KEY
        full_def_name = def_name = def_tag.extension_or_value_portion
        placeholder = None
        found_slash = def_name.find("/")
        if found_slash != -1:
            placeholder = def_name[found_slash + 1:]
            def_name = def_name[:found_slash]

        def_entry = self._def_mapper.get_def_entry(def_name)
        if def_entry is None:
            return ErrorHandler.format_error(OnsetErrors.ONSET_DEF_UNMATCHED,
                                             tag=def_tag)
        if bool(def_entry.takes_value) != bool(placeholder):
            return ErrorHandler.format_error(
                OnsetErrors.ONSET_PLACEHOLDER_WRONG,
                tag=def_tag,
                has_placeholder=bool(def_entry.takes_value))

        if is_onset:
            # onset can never fail as it implies an offset
            self._onsets[full_def_name.lower()] = full_def_name
        else:
            if full_def_name.lower() not in self._onsets:
                return ErrorHandler.format_error(
                    OnsetErrors.OFFSET_BEFORE_ONSET, tag=def_tag)
            else:
                del self._onsets[full_def_name.lower()]

        return []
Ejemplo n.º 3
0
    def check_for_onset_offset(self, hed_string_obj):
        """
            Checks for an onset or offset tag in the given string and adds it to the current context if found.
        Parameters
        ----------
        hed_string_obj : HedString
            The hed string to check.  Finds a maximum of one onset tag.

        Returns
        -------
        onset_issues: [{}]
            Issues found validating onsets.  Out of order onsets, unknown def names, etc.
        """
        onset_issues = []
        for found_onset, found_group in self._find_onset_tags(hed_string_obj):
            if not found_onset:
                return []

            def_tags, def_groups = self._find_def_tags(found_group)
            if not def_tags:
                onset_issues += ErrorHandler.format_error(
                    OnsetErrors.ONSET_NO_DEF_TAG_FOUND, found_onset)
                continue

            if len(def_tags) > 1:
                onset_issues += ErrorHandler.format_error(
                    OnsetErrors.ONSET_TOO_MANY_DEFS,
                    tag=def_tags[0],
                    tag_list=[tag for tag in def_tags[1:]])
                continue

            def_tag = def_tags[0]
            def_group = def_groups[0]
            children = [
                child for child in found_group.get_direct_children()
                if def_group != child and found_onset != child
            ]
            max_children = 1
            if found_onset.short_base_tag.lower() == DefTagNames.OFFSET_KEY:
                max_children = 0
            if len(children) > max_children:
                onset_issues += ErrorHandler.format_error(
                    OnsetErrors.ONSET_WRONG_NUMBER_GROUPS, def_tag,
                    found_group.get_direct_children())
                continue

            if children:
                # Make this a loop if max_children can be > 1
                child = children[0]
                if not isinstance(child, HedGroup):
                    onset_issues += ErrorHandler.format_error(
                        OnsetErrors.ONSET_TAG_OUTSIDE_OF_GROUP, child, def_tag)

            # At this point we have either an onset or offset tag and it's name
            onset_issues += self._handle_onset_or_offset(def_tag, found_onset)

        return onset_issues
Ejemplo n.º 4
0
    def _finalize_mapping(self):
        """
        Internal function that gathers up all the various sources of column rules and puts them
        in a list mapping from column number to definition.

        This needs to be called after all definitions and columns are added.
        """
        self._final_column_map = {}
        found_named_tag_columns = {}
        all_tag_columns = self._tag_columns + self._optional_tag_columns
        self._finalize_mapping_issues = []
        if self._column_map is not None:
            for column_number, column_name in self._column_map.items():
                if column_name in self.column_data:
                    column_entry = self.column_data[column_name]
                    column_entry.column_name = column_name
                    self._final_column_map[column_number] = column_entry
                elif column_name in all_tag_columns:
                    found_named_tag_columns[column_name] = column_number
                elif column_name.startswith(PANDAS_COLUMN_PREFIX_TO_IGNORE):
                    continue
                elif self._has_sidecars:
                    if column_number not in all_tag_columns:
                        self._finalize_mapping_issues += ErrorHandler.format_error(
                            ValidationErrors.HED_UNKNOWN_COLUMN,
                            extra_column_name=column_name)

        # Add any numbered columns
        for column_name, column_entry in self.column_data.items():
            if isinstance(column_name, int):
                # Convert to internal numbering format
                column_number = column_name - 1
                self._final_column_map[column_number] = column_entry

        # Add any tag columns
        for column_number in all_tag_columns:
            if isinstance(column_number, int):
                if column_number not in self._final_column_map:
                    self._final_column_map[column_number] = ColumnMetadata(
                        ColumnType.HEDTags, column_number)
            elif column_number in found_named_tag_columns:
                column_name = column_number
                column_number = found_named_tag_columns[column_name]
                self._final_column_map[column_number] = ColumnMetadata(
                    ColumnType.HEDTags, column_number)
            elif column_number in self._tag_columns:
                self._finalize_mapping_issues += ErrorHandler.format_error(
                    ValidationErrors.HED_MISSING_COLUMN,
                    missing_column_name=column_number)

        # Add prefixes
        for column_number, prefix in self._column_prefix_dictionary.items():
            self._set_column_prefix(column_number, prefix)

        # Finally check if any numbered columns don't have an entry in final columns and issue a warning.
        return self._finalize_mapping_issues
Ejemplo n.º 5
0
    def check_delimiter_issues_in_hed_string(self, hed_string):
        """Reports a validation error if there are missing commas or commas in tags that take values.

        Parameters
        ----------
        hed_string: str
            A hed string.
        Returns
        -------
        []
            A validation issues list. If no issues are found then an empty list is returned.

        """
        last_non_empty_valid_character = ''
        last_non_empty_valid_index = 0
        current_tag = ''
        issues = []

        for i, current_character in enumerate(hed_string):
            current_tag += current_character
            if not current_character.strip():
                continue
            if TagValidator._character_is_delimiter(current_character):
                if current_tag.strip() == current_character:
                    issues += ErrorHandler.format_error(
                        ValidationErrors.HED_TAG_EMPTY,
                        source_string=hed_string,
                        char_index=i)
                    current_tag = ''
                    continue
                current_tag = ''
            elif current_character == self.OPENING_GROUP_CHARACTER:
                if current_tag.strip() == self.OPENING_GROUP_CHARACTER:
                    current_tag = ''
                else:
                    issues += ErrorHandler.format_error(
                        ValidationErrors.HED_COMMA_MISSING, tag=current_tag)
            elif TagValidator._comma_is_missing_after_closing_parentheses(
                    last_non_empty_valid_character, current_character):
                issues += ErrorHandler.format_error(
                    ValidationErrors.HED_COMMA_MISSING, tag=current_tag[:-1])
                break
            last_non_empty_valid_character = current_character
            last_non_empty_valid_index = i
        if TagValidator._character_is_delimiter(
                last_non_empty_valid_character):
            issues += ErrorHandler.format_error(
                ValidationErrors.HED_TAG_EMPTY,
                char_index=last_non_empty_valid_index,
                source_string=hed_string)
        return issues
Ejemplo n.º 6
0
    def validate(self, validators=None, error_handler=None, **kwargs):
        """
            Run the given validators on this string.

        Parameters
        ----------
        validators : [func or validator like] or func or validator like
            A validator or list of validators to apply to the hed strings in this sidecar.
        error_handler : ErrorHandler or None
            Used to report errors.  Uses a default one if none passed in.
        kwargs:
            See util.translate_ops or the specific validators for additional options

        Returns
        -------

        """
        if error_handler is None:
            error_handler = ErrorHandler()
        tag_ops = translate_ops(validators, **kwargs)

        error_handler.push_error_context(ErrorContext.HED_STRING,
                                         self,
                                         increment_depth_after=False)
        issues = self.apply_ops(tag_ops)
        error_handler.add_context_to_issues(issues)
        error_handler.pop_error_context()

        return issues
Ejemplo n.º 7
0
    def iter_raw(self, validators=None, error_handler=None, **kwargs):
        """Generates an iterator that goes over every row in the file without modification.

           This is primarily for altering or re-saving the original file.(eg convert short tags to long)

        Parameters
        validators : [func or validator like] or func or validator like
            A validator or list of validators to apply to the hed strings before returning
        kwargs:
            See util.translate_ops or the specific validators for additional options

        Yields
        -------
        row_number: int
            The current row number
        column_to_hed_tags_dictionary: dict
            A dict with keys column_number, value the cell at that position.
        """
        if error_handler is None:
            error_handler = ErrorHandler()

        default_mapper = ColumnMapper()
        return self.iter_dataframe(default_mapper,
                                   validators=validators,
                                   run_string_ops_on_columns=True,
                                   error_handler=error_handler,
                                   **kwargs)
Ejemplo n.º 8
0
def validate_schema_description(tag_name, hed_description):
    """
    Takes a single term description and returns a list of warnings and errors in it.

    Parameters
    ----------
    tag_name : str
        A single hed tag - not validated here, just used for error messages
    hed_description: str
        The description string to validate
    Returns
    -------
    issue_list: [{}]
        A list of all formatting issues found in the description
    """
    issues_list = []
    # Blank description is fine
    if not hed_description:
        return issues_list
    for i, char in enumerate(hed_description):
        if char.isalnum():
            continue
        if char in ALLOWED_DESC_CHARS:
            continue
        issues_list += ErrorHandler.format_error(
            SchemaWarnings.INVALID_CHARACTERS_IN_DESC,
            hed_description,
            tag_name,
            char_index=i,
            problem_char=char)
    return issues_list
Ejemplo n.º 9
0
    def check_duplicate_tags_exist(self, original_tag_list):
        """Reports a validation error if two or more tags are the same.

        This only tracks exact matches, it will not catch two identical  value tags with different values.
        Parameters
        ----------
        original_tag_list: [HedTag]
            A list containing tags that are used to report the error.
        Returns
        -------
        []
            A validation issues list. If no issues are found then an empty list is returned.

        """
        validation_issues = []
        tag_set = set()
        for tag in original_tag_list:
            formatted_tag = tag.lower()
            if formatted_tag in tag_set:
                validation_issues += ErrorHandler.format_error(
                    ValidationErrors.HED_TAG_REPEATED, tag=tag)
                continue
            tag_set.add(formatted_tag)

        return validation_issues
Ejemplo n.º 10
0
def tag_is_placeholder_check(hed_schema,
                             tag_entry,
                             possible_tags,
                             force_issues_as_warnings=True):
    """
        Checks if the comma separated list in possible tags are valid HedTags

    Parameters
    ----------
    hed_schema: HedSchema
        The schema to check if the tag exists
    tag_entry: HedSchemaEntry
        The schema entry for this tag.
    possible_tags: str
        Comma separated list of tags.  Short long or mixed form valid.
    force_issues_as_warnings: bool
        If True sets all the severity levels to warning

    Returns
    -------
    issues_list: [{}]
    """
    issues = []
    if not tag_entry.name.endswith("/#"):
        issues += ErrorHandler.format_error(
            SchemaWarnings.NON_PLACEHOLDER_HAS_CLASS, tag_entry.name,
            possible_tags)

    if force_issues_as_warnings:
        for issue in issues:
            issue['severity'] = ErrorSeverity.WARNING

    return issues
Ejemplo n.º 11
0
    def _validate_groups_in_hed_string(self, hed_string_obj):
        """Validates the tags at each level in a HED string. This pertains to the top-level, all groups, and nested
           groups.

         Parameters
         ----------
         hed_string_obj: HedString
            A HedString object.
         Returns
         -------
         list
             The issues associated with each level in the HED string.

         """
        validation_issues = []
        for original_tag_group, is_top_level in hed_string_obj.get_all_groups(
                also_return_depth=True):
            is_group = original_tag_group.is_group()
            if not original_tag_group and is_group:
                validation_issues += ErrorHandler.format_error(
                    ValidationErrors.HED_GROUP_EMPTY, tag=original_tag_group)
            validation_issues += self._tag_validator.run_tag_level_validators(
                original_tag_group.tags(), is_top_level, is_group)

        return validation_issues
Ejemplo n.º 12
0
    def validate_column_data(self, validators, error_handler=None, **kwargs):
        """
        Validates all column definitions that are being used and column definition hed strings

        Parameters
        ----------
        validators : [func or validator like] or func or validator like
            A validator or list of validators to apply to the hed strings in the sidecars.
        error_handler : ErrorHandler or None
            Used to report errors.  Uses a default one if none passed in.
        kwargs:
            See util.translate_ops or the specific validators for additional options
        Returns
        -------
        validation_issues : [{}]
            A list of syntax and semantic issues found in the definitions.
        """
        if error_handler is None:
            error_handler = ErrorHandler()
        all_validation_issues = []
        for column_data in self.column_data.values():
            all_validation_issues += column_data.validate_column(
                validators, error_handler=error_handler, **kwargs)

        return all_validation_issues
Ejemplo n.º 13
0
    def extract_definitions(self, error_handler=None):
        """
        Gathers and validates all definitions found in this spreadsheet

        Parameters
        ----------
        error_handler : ErrorHandler
            The error handler to use for context, uses a default one if none.

        Returns
        -------
        def_dict: DefDict
            Contains all the definitions located in the file
        """
        if error_handler is None:
            error_handler = ErrorHandler()
        new_def_dict = DefDict()
        validators = []
        validators.append(new_def_dict)
        validators.append(HedString.remove_definitions)

        all_issues = []
        for hed_string, key_name, issues in self.hed_string_iter(
                validators=validators,
                allow_placeholders=True,
                error_handler=error_handler):
            self.set_hed_string(hed_string, key_name, set_def_removed=True)
            all_issues += issues

        return new_def_dict
Ejemplo n.º 14
0
    def _validate_pound_sign_count(self, hed_string):
        """Checks if a given hed string in the column has the correct number of pound signs

        This normally should be either 0 or 1, but sometimes will be higher due to the presence of definition tags.

        Parameters
        ----------
        hed_string : str or HedString

        Returns
        -------
        issues_list: [{}]
            A list of the pound sign errors(always 0 or 1 item in the list)
        """
        if self.column_type == ColumnType.Value or self.column_type == ColumnType.Attribute:
            expected_pound_sign_count = 1
            error_type = SidecarErrors.INVALID_POUND_SIGNS_VALUE
        elif self.column_type == ColumnType.HEDTags or self.column_type == ColumnType.Categorical:
            expected_pound_sign_count = 0
            error_type = SidecarErrors.INVALID_POUND_SIGNS_CATEGORY
        else:
            return []

        # This needs to only account for the ones without definitions.
        if hed_string.without_defs().count("#") != expected_pound_sign_count:
            return ErrorHandler.format_error(
                error_type, pound_sign_count=str(hed_string).count("#"))

        return []
Ejemplo n.º 15
0
    def check_for_placeholder(self, original_tag):
        """
            Checks for a placeholder character in the extension/value portion of a tag, unless they are allowed.

        Parameters
        ----------
        original_tag : HedTag

        Returns
        -------
        error_list: [{}]
        """
        validation_issues = []
        if not original_tag.is_definition:
            starting_index = len(original_tag.org_base_tag) + 1
            for i, character in enumerate(
                    original_tag.extension_or_value_portion):
                if character == "#":
                    validation_issues += ErrorHandler.format_error(
                        ValidationErrors.INVALID_TAG_CHARACTER,
                        tag=original_tag,
                        index_in_tag=starting_index + i,
                        index_in_tag_end=starting_index + i + 1,
                        actual_error=ValidationErrors.HED_VALUE_INVALID)

        return validation_issues
Ejemplo n.º 16
0
    def check_multiple_unique_tags_exist(self, original_tag_list):
        """Reports a validation error if two or more tags start with a tag name_prefix that has the 'unique' attribute.

        Parameters
        ----------
        original_tag_list: [HedTag]
            A list containing tags that are used to report the error.
        Returns
        -------
        []
            A validation issues list. If no issues are found then an empty list is returned.
        """
        validation_issues = []
        unique_tag_prefixes = self._hed_schema.get_all_tags_with_attribute(
            HedKey.Unique)
        for capitalized_unique_tag_prefix in unique_tag_prefixes:
            unique_tag_prefix = capitalized_unique_tag_prefix.lower()
            unique_tag_prefix_bool_mask = [
                x.lower().startswith(unique_tag_prefix)
                for x in original_tag_list
            ]
            if sum(unique_tag_prefix_bool_mask) > 1:
                validation_issues += ErrorHandler.format_error(
                    ValidationErrors.HED_TAG_NOT_UNIQUE,
                    tag_prefix=capitalized_unique_tag_prefix)
        return validation_issues
Ejemplo n.º 17
0
    def get_def_and_mapper_issues(self,
                                  error_handler,
                                  check_for_warnings=False):
        """
            Returns formatted issues found with definitions and columns.
        Parameters
        ----------
        error_handler : ErrorHandler
            The error handler to use
        check_for_warnings: bool
            If True this will check for and return warnings as well
        Returns
        -------
        issues_list: [{}]
            A list of definition and mapping issues.
        """
        issues = []
        issues += self.file_def_dict.get_definition_issues()

        # Gather any issues from the mapper for things like missing columns.
        mapper_issues = self._mapper.get_column_mapping_issues()
        error_handler.add_context_to_issues(mapper_issues)
        issues += mapper_issues
        if not check_for_warnings:
            issues = ErrorHandler.filter_issues_by_severity(
                issues, ErrorSeverity.ERROR)
        return issues
Ejemplo n.º 18
0
    def _get_def_expand_tag(self, original_tag, def_issues):
        """
            Checks for issues with expanding a tag from def to def-expand, and returns the expanded tag.

        Parameters
        ----------
        original_tag : HedTag
            Source hed tag that may be a Def tag.
        def_issues : [{}]
            List of issues to append any new issues to

        Returns
        -------
        def_tag_name: str
            The def-expand tag matching this def tag, if any
        def_contents: [HedTag or HedGroup]
            The contents to replace the previous def-tag with.
        """
        if original_tag.short_base_tag.lower() == DefTagNames.DEF_KEY:
            is_label_tag = original_tag.extension_or_value_portion
            placeholder = None
            found_slash = is_label_tag.find("/")
            if found_slash != -1:
                placeholder = is_label_tag[found_slash + 1:]
                is_label_tag = is_label_tag[:found_slash]

            label_tag_lower = is_label_tag.lower()
            def_entry = self._gathered_defs.get(label_tag_lower)
            if def_entry is None:
                def_issues += ErrorHandler.format_error(
                    ValidationErrors.HED_DEF_UNMATCHED, tag=original_tag)
            else:
                def_tag_name, def_contents = def_entry.get_definition(
                    original_tag, placeholder_value=placeholder)
                if def_tag_name:
                    return DefTagNames.DEF_EXPAND_ORG_KEY, def_contents
                elif def_entry.takes_value:
                    def_issues += ErrorHandler.format_error(
                        ValidationErrors.HED_DEF_VALUE_MISSING,
                        tag=original_tag)
                else:
                    def_issues += ErrorHandler.format_error(
                        ValidationErrors.HED_DEF_VALUE_EXTRA, tag=original_tag)
        return None, None
Ejemplo n.º 19
0
    def check_tag_level_issue(self, original_tag_list, is_top_level, is_group):
        """
            Checks all tags in the group to verify they are correctly positioned in the hierarchy

        Parameters
        ----------
        original_tag_list: [HedTag]
           A list containing the original tags.
        is_top_level: bool
            If True, this group is a "top level tag group", that can contain definitions, Onset, etc tags.
        is_group: bool
            If true, group is contained by parenthesis
        Returns
        -------
        []
            The validation issues associated with each level in a HED string.
        """
        validation_issues = []
        if self._run_semantic_validation:
            top_level_tags = [
                tag for tag in original_tag_list
                if tag.base_tag_has_attribute(HedKey.TopLevelTagGroup)
            ]
            tag_group_tags = [
                tag for tag in original_tag_list
                if tag.base_tag_has_attribute(HedKey.TagGroup)
            ]
            for tag_group_tag in tag_group_tags:
                if not is_group:
                    validation_issues += ErrorHandler.format_error(
                        ValidationErrors.HED_TAG_GROUP_TAG, tag=tag_group_tag)
            for top_level_tag in top_level_tags:
                if not is_top_level:
                    validation_issues += ErrorHandler.format_error(
                        ValidationErrors.HED_TOP_LEVEL_TAG, tag=top_level_tag)

            if is_top_level and len(top_level_tags) > 1:
                validation_issues += ErrorHandler.format_error(
                    ValidationErrors.HED_MULTIPLE_TOP_TAGS,
                    tag=top_level_tags[0],
                    multiple_tags=top_level_tags[1:])

        return validation_issues
Ejemplo n.º 20
0
    def check_tag_unit_class_units_are_valid(self, original_tag,
                                             check_for_warnings):
        """Reports a validation error if the tag provided has a unit class and the units are incorrect.

        Parameters
        ----------
        original_tag: HedTag
            The original tag that is used to report the error.
        check_for_warnings: bool
            Indicates whether to check for warnings.
        Returns
        -------
        []
            A validation issues list. If no issues are found then an empty list is returned.

        """
        validation_issues = []
        if original_tag.is_unit_class_tag():
            stripped_value, unit = original_tag.get_stripped_unit_value()
            if not unit:
                if self._validate_value_class_portion(original_tag,
                                                      stripped_value):
                    if check_for_warnings:
                        # only suggest a unit is missing if this is a valid number
                        if tag_validator_util.validate_numeric_value_class(
                                stripped_value):
                            default_unit = original_tag.get_unit_class_default_unit(
                            )
                            validation_issues += ErrorHandler.format_error(
                                ValidationErrors.HED_UNITS_DEFAULT_USED,
                                tag=original_tag,
                                default_unit=default_unit)
                else:
                    tag_unit_class_units = original_tag.get_tag_unit_class_units(
                    )
                    if tag_unit_class_units:
                        validation_issues += ErrorHandler.format_error(
                            ValidationErrors.HED_UNITS_INVALID,
                            original_tag,
                            unit_class_units=tag_unit_class_units)
        return validation_issues
Ejemplo n.º 21
0
    def _convert_to_form(self, hed_schema, tag_form, error_handler):
        """
        Converts all tags in a given spreadsheet to a given form

        Parameters
        ----------
        hed_schema : HedSchema
            The schema to use to convert tags.
        tag_form: str
            The form to convert the tags to.  (short_tag, long_tag, base_tag, etc)
        error_handler : ErrorHandler
            The error handler to use for context, uses a default one if none.

        Returns
        -------
        issues_list: [{}]
            A list of issues found during conversion
        """
        if error_handler is None:
            error_handler = ErrorHandler()
        error_list = []
        for row_number, column_to_hed_tags_dictionary in self:
            error_handler.push_error_context(ErrorContext.ROW, row_number)
            for column_number in column_to_hed_tags_dictionary:
                error_handler.push_error_context(ErrorContext.COLUMN,
                                                 column_number)
                column_hed_string = column_to_hed_tags_dictionary[
                    column_number]
                error_list += column_hed_string.convert_to_canonical_forms(
                    hed_schema)
                self.set_cell(row_number,
                              column_number,
                              column_hed_string,
                              include_column_prefix_if_exist=False,
                              tag_form=tag_form)
                error_handler.pop_error_context()
            error_handler.pop_error_context()

        return error_list
Ejemplo n.º 22
0
    def _validate_column_structure(self, error_handler):
        """
            This checks primarily for type errors, such as expecting a string and getting a list in a json sidecar.

        Parameters
        ----------
        error_handler

        Returns
        -------

        """
        val_issues = []
        if self.column_type is None:
            val_issues += ErrorHandler.format_error(
                SidecarErrors.UNKNOWN_COLUMN_TYPE,
                column_name=self.column_name)
        elif self.column_type == ColumnType.Categorical:
            raw_hed_dict = self._hed_dict["HED"]
            if not raw_hed_dict:
                val_issues += ErrorHandler.format_error(
                    SidecarErrors.BLANK_HED_STRING)

        error_handler.add_context_to_issues(val_issues)

        for hed_string_obj, key_name in self._hed_iter(
                also_return_bad_types=True):
            new_col_issues = []
            error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME,
                                             key_name)
            if not isinstance(hed_string_obj, HedString):
                new_col_issues += ErrorHandler.format_error(
                    SidecarErrors.WRONG_HED_DATA_TYPE,
                    given_type=type(hed_string_obj),
                    expected_type="str")
                error_handler.add_context_to_issues(new_col_issues)
            val_issues += new_col_issues

        return val_issues
Ejemplo n.º 23
0
def validate_schema_term(hed_term):
    """
    Takes a single term(ie short tag) and checks capitalization and illegal characters.

    Parameters
    ----------
    hed_term : str
        A single hed term
    Returns
    -------
    issue_list: [{}]
        A list of all formatting issues found in the term
    """
    issues_list = []
    # Any # terms will have already been validated as the previous entry.
    if hed_term == "#":
        return issues_list

    for i, char in enumerate(hed_term):
        if i == 0 and not (char.isdigit() or char.isupper()):
            issues_list += ErrorHandler.format_error(
                SchemaWarnings.INVALID_CAPITALIZATION,
                hed_term,
                char_index=i,
                problem_char=char)
            continue
        if char in ALLOWED_TAG_CHARS:
            continue
        if char.isalnum():
            continue
        issues_list += ErrorHandler.format_error(
            SchemaWarnings.INVALID_CHARACTERS_IN_TAG,
            hed_term,
            char_index=i,
            problem_char=char)
    return issues_list
Ejemplo n.º 24
0
    def check_tag_requires_child(self, original_tag):
        """Reports a validation error if the tag provided has the 'requireChild' attribute.

        Parameters
        ----------
        original_tag: HedTag
            The original tag that is used to report the error.
        Returns
        -------
        []
            A validation issues list. If no issues are found then an empty list is returned.

        """
        validation_issues = []
        if original_tag.has_attribute(HedKey.RequireChild):
            validation_issues += ErrorHandler.format_error(
                ValidationErrors.HED_TAG_REQUIRES_CHILD, tag=original_tag)
        return validation_issues
Ejemplo n.º 25
0
    def expand(self, input_text):
        """
            Expands the input_text based on the rules for this column.
            Eg adding name_prefix, inserting a column hed_string from key, etc.

        Parameters
        ----------
        input_text : str
            Text to expand(generally from a single cell in a spreadsheet)

        Returns
        -------
        hed_string: str
            The expanded column as a hed_string
        attribute_name_or_error_message: str or {}
            If this is a string, contains the name of this column as an attribute.
            If the first return value is None, this is an error message dict.
        """
        column_type = self.column_type

        if column_type == ColumnType.Categorical:
            final_text = self._get_category_hed_string(input_text)
            if final_text:
                return HedString(final_text), False
            else:
                return None, ErrorHandler.format_error(
                    ValidationErrors.HED_SIDECAR_KEY_MISSING,
                    invalid_key=input_text,
                    category_keys=list(self._hed_dict["HED"].keys()))
        elif column_type == ColumnType.Value:
            prelim_text = self._get_value_hed_string()
            final_text = prelim_text.replace("#", input_text)
            return HedString(final_text), False
        elif column_type == ColumnType.HEDTags:
            hed_string_obj = HedString(input_text)
            final_text = self._prepend_prefix_to_required_tag_column_if_needed(
                hed_string_obj, self.column_prefix)
            return final_text, False
        elif column_type == ColumnType.Ignore:
            return None, False
        elif column_type == ColumnType.Attribute:
            return input_text, self.column_name

        return None, {"error_type": "INTERNAL_ERROR"}
Ejemplo n.º 26
0
    def check_tag_value_class_valid(self, original_tag):
        """Reports a validation error if the tag provided has an invalid value portion

        Parameters
        ----------
        original_tag: HedTag
            The original tag that is used to report the error.
        Returns
        -------
        error_list: []
            A validation issues list. If no issues are found then an empty list is returned.
        """
        validation_issues = []
        if not self._validate_value_class_portion(
                original_tag, original_tag.extension_or_value_portion):
            validation_issues += ErrorHandler.format_error(
                ValidationErrors.HED_VALUE_INVALID, original_tag)

        return validation_issues
Ejemplo n.º 27
0
 def _check_invalid_chars(self,
                          check_string,
                          allowed_chars,
                          source_tag,
                          starting_index=0):
     validation_issues = []
     for i, character in enumerate(check_string):
         if character.isalnum():
             continue
         if character in allowed_chars:
             continue
         # Todo: Remove this patch when clock times and invalid characters are more properly checked
         if character == ":":
             continue
         validation_issues += ErrorHandler.format_error(
             ValidationErrors.INVALID_TAG_CHARACTER,
             tag=source_tag,
             index_in_tag=starting_index + i,
             index_in_tag_end=starting_index + i + 1)
     return validation_issues
Ejemplo n.º 28
0
    def check_tag_formatting(self, original_tag):
        """Reports a validation errors for any repeated or erroneous slashes

        Parameters
        ----------
        original_tag: HedTag
            The original tag that is used to report the error.
        Returns
        -------
        []
            A validation issues list. If no issues are found then an empty list is returned.
        """
        validation_issues = []
        for match in self.pattern_doubleslash.finditer(original_tag.org_tag):
            validation_issues += ErrorHandler.format_error(
                ValidationErrors.HED_NODE_NAME_EMPTY,
                tag=original_tag,
                index_in_tag=match.start(),
                index_in_tag_end=match.end())

        return validation_issues
Ejemplo n.º 29
0
    def extract_definitions(self, error_handler=None):
        """
        Gathers and validates all definitions found in this spreadsheet

        Parameters
        ----------
        error_handler : ErrorHandler
            The error handler to use for context, uses a default one if none.

        Returns
        -------
        def_dict: DefDict
            Contains all the definitions located in the file
        """
        if error_handler is None:
            error_handler = ErrorHandler()
        new_def_dict = DefDict()
        validators = [new_def_dict]
        _ = self._run_validators(validators,
                                 run_on_raw=True,
                                 error_handler=error_handler)
        return new_def_dict
Ejemplo n.º 30
0
    def _report_invalid_character_error(self, hed_string, index):
        """Reports an error that is related to an invalid character.

        Parameters
        ----------
        hed_string: str
            The HED string that caused the error.
        index: int
            The index of the invalid character in the HED string.
        Returns
        -------
        [{}]
            A singleton list with a dictionary representing the error.

        """
        error_type = ValidationErrors.HED_CHARACTER_INVALID
        character = hed_string[index]
        if character == "~":
            error_type = ValidationErrors.HED_TILDES_UNSUPPORTED
        return ErrorHandler.format_error(error_type,
                                         char_index=index,
                                         source_string=hed_string)