Python load_raw_schema Examples, biolinkml.utils.rawloader.load_raw_schema Python Examples

Example #1

0

Show file

    def test_load_raw_file(self):
        """ Test loading a data file """
        self._verify_schema1_content(load_raw_schema(os.path.join(datadir, 'schema1.yaml')), 'schema1')

        # Verify that we can't pass source_file parameters when we've got a directory name
        with self.assertRaises(AssertionError):
            load_raw_schema(os.path.join(datadir, 'schema1.yaml'), source_file_size=117)

Example #2

0

Show file

    def test_model_uris(self):
        """ Test that the variables in meta.yaml match the contents of biolinkml/__init__.py """
        self.assertTrue(os.path.exists(LOCAL_YAML_PATH))
        self.assertTrue(os.path.exists(LOCAL_CONTEXT_PATH))
        meta_yaml = load_raw_schema(LOCAL_YAML_PATH)
        self.validate_yaml_content(meta_yaml, False)

        types_yaml = load_raw_schema(LOCAL_TYPES_PATH)
        self.assertEqual(METATYPE_LOCAL_NAME, types_yaml.default_prefix)
        self.assertEqual(METATYPE_URI, types_yaml.id)
        self.assertEqual(METATYPE_LOCAL_NAME, types_yaml.default_prefix)
        self.assertEqual(METATYPE_NAMESPACE, types_yaml.prefixes[types_yaml.default_prefix].prefix_reference)

Example #3

0

Show file

    def test_as_yaml(self):
        """ Test the YAML output representation """
        schema = self.fix_schema_metadata(
            load_raw_schema(os.path.join(datadir, 'schema4.yaml')))

        self.assertEqual(
            """generation_date: 2018-12-31 17:23
id: !!python/object/new:biolinkml.utils.metamodelcore.URI
- http://example.org/schema4
metamodel_version: 0.5.0
name: !!python/object/new:biolinkml.meta.SchemaDefinitionName
- schema4
source_file: schema4.yaml
source_file_date: 2018-12-31 17:23
source_file_size: 259
title: Load Raw Schema Test
types:
  integer:
    base: int
    from_schema: !!python/object/new:biolinkml.utils.metamodelcore.URI
    - http://example.org/schema5
    name: !!python/object/new:biolinkml.meta.TypeDefinitionName
    - integer
  string:
    base: str
    from_schema: !!python/object/new:biolinkml.utils.metamodelcore.URI
    - http://example.org/schema4
    name: !!python/object/new:biolinkml.meta.TypeDefinitionName
    - string
""", as_yaml(schema))

Example #4

0

Show file

 def test_as_yaml(self):
     """ Test the YAML output representation """
     schema = self.fix_schema_metadata(
         load_raw_schema(env.input_path('schema4.yaml')))
     env.eval_single_file(env.expected_path('schema4.yaml'),
                          as_yaml(schema),
                          filtr=lambda s: s)

Example #5

0

Show file

    def __init__(self,
                 data: Union[str, TextIO, SchemaDefinition, dict],
                 base_dir: Optional[str] = None,
                 namespaces: Optional[Namespaces] = None,
                 useuris: Optional[bool] = None,
                 importmap: Optional[Mapping[str, str]] = None,
                 logger: Optional[logging.Logger] = None) \
            -> None:
        """ Constructor - load and process a YAML or pre-processed schema

        :param data: YAML schema text, python dict loaded from yaml,  URL, file name, open file or SchemaDefinition
        :param base_dir: base directory or URL where Schema came from
        :param namespaces: namespaces collector
        :param useuris: True means class_uri and slot_uri are identifiers.  False means they are mappings.
        :param importmap: A map from import entries to URI or file name.
        :param logger: Target Logger, if any
        """
        self.logger = logger if logger is not None else logging.getLogger(self.__class__.__name__)
        if isinstance(data, SchemaDefinition):
            self.schema = data
        else:
            self.schema = load_raw_schema(data, base_dir=base_dir)
        # Map from URI to source and version tuple
        self.loaded: OrderedDict[str, Tuple[str, str]] = {self.schema.id: (self.schema.source_file, self.schema.version)}
        self.base_dir = self._get_base_dir(base_dir)
        self.namespaces = namespaces if namespaces else Namespaces()
        self.useuris = useuris if useuris is not None else True
        self.importmap = parse_import_map(importmap, self.base_dir) if importmap is not None else dict()
        self.synopsis: Optional[SchemaSynopsis] = None
        self.schema_location: Optional[str] = None
        self.schema_defaults: Dict[str, str] = {}           # Map from schema URI to default namespace

Example #6

0

Show file

    def test_multi_schemas(self):
        """ Test multiple schemas in the same file """
        def check_types(s: SchemaDefinition) -> None:
            output = env.expected_path('schema4.json')
            if not os.path.exists(output):
                with open(output, 'w') as f:
                    f.write(
                        as_json(
                            JsonObj(
                                **{
                                    k: as_dict(loads(as_json(v)))
                                    for k, v in s.types.items()
                                })))
                    self.fail(f"File {output} created - rerun test")

            with open(output) as f:
                expected = as_dict(load(f))
            self.assertEqual(
                expected,
                {k: as_dict(loads(as_json(v)))
                 for k, v in s.types.items()})
            s.types = None

        self._verify_schema1_content(
            load_raw_schema(env.input_path('schema4.yaml')), 'schema4',
            check_types)

Example #7

0

Show file

File: test_model_uris.py Project: robertdigital/biolinkml

 def check_yaml(source_file):
     model_yaml = load_raw_schema(source_file)
     self.assertEqual(uri, model_yaml.id)
     self.assertEqual(name, model_yaml.name)
     self.assertEqual(namespace_name, model_yaml.default_prefix)
     self.assertEqual(
         namespace, model_yaml.prefixes[
             model_yaml.default_prefix].prefix_reference)
     self.assertEqual(source_file, model_yaml.source_file)

Example #8

0

Show file

File: test_yaml_utils.py Project: robertdigital/biolinkml

 def test_as_json(self):
     schema = self.fix_schema_metadata(
         load_raw_schema(os.path.join(inputdir, 'schema6.yaml')))
     outfile = os.path.join(outputdir, 'schema6.json')
     if not os.path.exists(outfile):
         with open(outfile, 'w') as f:
             f.write(as_json(schema))
             self.fail(f"Generated {outfile} - run test again")
     else:
         self.assertEqual(load(outfile), loads(as_json(schema)))

Example #9

0

Show file

File: test_yaml_utils.py Project: robertdigital/biolinkml

    def test_as_yaml(self):
        """ Test the YAML output representation """
        schema = self.fix_schema_metadata(
            load_raw_schema(os.path.join(inputdir, 'schema4.yaml')))
        outfile = os.path.join(outputdir, 'schema4.yaml')
        if not os.path.exists(outfile):
            with open(outfile, 'w') as f:
                f.write(as_yaml(schema))
            self.fail(f"Output file: {outfile} created - run test again")

        with open(outfile) as f:
            self.assertEqual(f.read(), as_yaml(schema))

Example #10

0

Show file

    def test_multi_schemas(self):
        """ Test multiple schemas in the same file """
        def check_types(s: SchemaDefinition) -> None:
            self.assertEqual({
                'integer': {'base': 'int',
                            'from_schema': 'http://example.org/schema5',
                            'name': 'integer'},
                'string': {'base': 'str',
                           'from_schema': 'http://example.org/schema4',
                           'name': 'string'}},
                             {k: as_dict(loads(as_json(v))) for k, v in s.types.items()})
            s.types = None

        self._verify_schema1_content(load_raw_schema(os.path.join(datadir, 'schema4.yaml')), 'schema4', check_types)

Example #11

0

Show file

    def __init__(self,
                 data: Union[str, TextIO, SchemaDefinition, dict],
                 base_dir: Optional[str] = None,
                 namespaces: Optional[Namespaces] = None) \
            -> None:
        """ Constructor - load and process a YAML or pre-processed schema

        :param data: YAML schema text, python dict loaded from yaml,  URL, file name, open file or SchemaDefinition
        :param base_dir: base directory or URL where Schema came from
        :param namespaces: namespaces collector
        """
        if isinstance(data, SchemaDefinition):
            self.schema = data
        else:
            self.schema = load_raw_schema(data, base_dir=base_dir)
        self.loaded: Set[str] = {self.schema.name}
        self.base_dir = self._get_base_dir(base_dir)
        self.namespaces = namespaces if namespaces else Namespaces()
        self.synopsis: Optional[SchemaSynopsis] = None
        self.schema_location: Optional[str] = None

Example #12

0

Show file

    def test_as_json(self):
        schema = self.fix_schema_metadata(
            load_raw_schema(os.path.join(datadir, 'schema6.yaml')))
        self.assertEqual(
            loads("""{
   "name": "schema6",
   "id": "http://example.org/schema6.fuzz",
   "title": "Load Raw Schema Test",
   "types": [
      {
         "name": "foo",
         "from_schema": "http://example.org/schema6.fuzz",
         "base": "str",
         "uri": "http://example.org/types/String"
      }
   ],
   "slots": [
      {
         "name": "s1",
         "from_schema": "http://example.org/schema6.fuzz",
         "domain": "c1",
         "range": "foo"
      }
   ],
   "classes": [
      {
         "name": "c1",
         "from_schema": "http://example.org/schema6.fuzz"
      }
   ],
   "metamodel_version": "0.5.0",
   "source_file": "schema6.yaml",
   "source_file_date": "2018-12-31 17:23",
   "source_file_size": 259,
   "generation_date": "2018-12-31 17:23"
}"""), loads(as_json(schema)))

Example #13

0

Show file

 def test_schema_id(self):
     """ Test loading a schema with just an id """
     self._verify_schema1_content(load_raw_schema('schema3.yaml', base_dir=datadir), 'schema3')

Example #14

0

Show file

File: schemaloader.py Project: deepakunni3/biolinkml

    def resolve(self) -> SchemaDefinition:
        """Reconcile a loaded schema, applying is_a, mixins, apply_to's and other such things.  Also validate the
        content and load a SchemaSynopsis entry

        :return: Fully resolved definition
        """
        if not self.schema.default_range:
            self.schema.default_range = 'string'
            self.logger.info(f"Default_range not specified. Default set to '{self.schema.default_range}'")

        # Process the namespace declarations
        if not self.schema.default_prefix:
            self.schema.default_prefix = sfx(self.schema.id)
        self.schema_defaults[self.schema.id] = self.schema.default_prefix
        for prefix in self.schema.prefixes.values():
            self.namespaces[prefix.prefix_prefix] = prefix.prefix_reference
        for cmap in self.schema.default_curi_maps:
            self.namespaces.add_prefixmap(cmap, include_defaults=False)
        if not self.namespaces._default:
            if '://' in self.schema.default_prefix:
                self.namespaces._default = self.schema.default_prefix
            elif self.schema.default_prefix in self.namespaces:
                self.namespaces._default = self.namespaces[self.schema.default_prefix]
            else:
                self.raise_value_error(f'Default prefix: {self.schema.default_prefix} is not defined',
                                       self.schema.default_prefix)

        # Process imports
        for imp in self.schema.imports:
            sname = self.importmap.get(str(imp), imp)               # Import map may use CURIE
            sname = self.namespaces.uri_for(sname) if ':' in sname else sname
            sname = self.importmap.get(str(sname), sname)               # It may also use URI or other forms
            import_schemadefinition = \
                load_raw_schema(sname + '.yaml',
                                base_dir=os.path.dirname(self.schema.source_file) if self.schema.source_file else None,
                                merge_modules=self.merge_modules, emit_metadata=self.emit_metadata)
            loaded_schema = (str(sname), import_schemadefinition.version)
            if import_schemadefinition.id in self.loaded:
                # If we've already loaded this, make sure that we've got the same version
                if self.loaded[import_schemadefinition.id][1] != loaded_schema[1]:
                    self.raise_value_error(f"Schema {import_schemadefinition.name} - version mismatch",
                                           import_schemadefinition.name)
                # Note: for debugging purposes we also check whether the version came from the same spot.  This should
                #       be loosened to version only once we're sure that everything is working
                # TODO: The test below needs review -- there are cases where it fails because self.loaded[...][0] has the
                #       full path name and loaded_schema[0] is just the local name
                # if self.loaded[import_schemadefinition.id] != loaded_schema:
                #     self.raise_value_error(f"Schema imported from different files: "
                #                            f"{self.loaded[import_schemadefinition.id][0]} : {loaded_schema[0]}")
            else:
                self.loaded[import_schemadefinition.id] = loaded_schema
                merge_schemas(self.schema, import_schemadefinition, imp, self.namespaces,
                              merge_imports=self.merge_modules)
                self.schema_defaults[import_schemadefinition.id] = import_schemadefinition.default_prefix

        self.namespaces._base = self.schema.default_prefix if ':' in self.schema.default_prefix else \
            self.namespaces[self.schema.default_prefix]

        # Promote embedded attribute definitions to first class slots.
        for cls in self.schema.classes.values():
            for attribute in cls.attributes.values():
                mangled_slot_name = mangled_attribute_name(cls.name, attribute.name)
                if mangled_slot_name in self.schema.slots:
                    self.raise_value_error(f'Class: "{cls.name}" attribute "{attribute.name}" - '
                                           f'mangled name: {mangled_slot_name} already exists', attribute.name)
                new_slot = SlotDefinition(**attribute.__dict__)
                new_slot.domain_of.append(cls.name)
                new_slot.imported_from = cls.imported_from
                if not new_slot.alias:
                    new_slot.alias = attribute.name
                new_slot.name = mangled_slot_name
                self.schema.slots[new_slot.name] = new_slot
                cls.slots.append(mangled_slot_name)

        # Assign class slot ownership
        for cls in self.schema.classes.values():
            if not isinstance(cls, ClassDefinition):
                name = cls['name'] if 'name' in cls else 'Unknown'
                self.raise_value_error(f'Class "{name} (type: {type(cls)})" definition is not a class definition', name)
            if isinstance(cls.slots, str):
                self.logger.warning(f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array")
                cls.slots = [cls.slots]
            for slotname in cls.slots:
                if slotname in self.schema.slots:
                    slot = self.schema.slots[cast(SlotDefinitionName, slotname)]
                    slot.owner = cls.name
                    if cls.name not in slot.domain_of:
                        slot.domain_of.append(cls.name)
                else:
                    self.raise_value_error(f'Class "{cls.name}" - unknown slot: "{slotname}"', slotname)

        # Process slots defined as slot usages
        self.process_slot_usage_definitions()

        # Massage initial set of slots
        for slot in self.schema.slots.values():
            # Propagate domain to containing class
            if slot.domain and slot.domain in self.schema.classes:
                if slot.name not in self.schema.classes[slot.domain].slots:
                    slot.owner = slot.name
                    # self.schema.classes[slot.domain].slots.append(slot.name)
            elif slot.domain:
                self.raise_value_error(f"slot: {slot.name} - unrecognized domain ({slot.domain})", slot.domain)

            # Validate the slot range
            if slot.range is not None and slot.range not in self.schema.types and \
                    slot.range not in self.schema.classes and slot.range not in self.schema.enums:
                self.raise_value_error(f"slot: {slot.name} - unrecognized range ({slot.range})", slot.range)

        # apply to --> mixins
        for cls in self.schema.classes.values():
            for apply_to_cls in cls.apply_to:
                if apply_to_cls in self.schema.classes:
                    self.schema.classes[apply_to_cls].mixins.append(cls.name)
                else:
                    self.raise_value_error(f'Class "{cls.name}" unknown apply_to target: {apply_to_cls}', apply_to_cls)
            # Class URI's also count as (trivial) mappings
            if cls.class_uri is not None:
                cls.mappings.insert(0, cls.class_uri)
            if cls.class_uri is None or not self.useuris:
                cls.class_uri = \
                    self.namespaces.uri_or_curie_for(self.schema_defaults.get(cls.from_schema, sfx(cls.from_schema)),
                                                                 camelcase(cls.name))

        # Get the inverse ducks all in a row before we start filling other stuff in
        for slot in self.schema.slots.values():
            if slot.inverse:
                inverse_slot = self.schema.slots.get(slot.inverse, None)
                if inverse_slot:
                    if not inverse_slot.inverse:
                        inverse_slot.inverse = slot.name
                    elif inverse_slot.inverse != slot.name:
                        self.raise_value_error(f'Slot {slot.name}.inverse ({slot.inverse}) does not match '
                                               f'slot {inverse_slot.name}.inverse ({inverse_slot.inverse})')
                else:
                    self.raise_value_error(f'Slot {slot.name}.inverse ({slot.inverse}) is not defined')

        # Update slots with parental information
        merged_slots: List[SlotDefinitionName] = []
        for slot in self.schema.slots.values():
            if not slot.from_schema:
                slot.from_schema = self.schema.id
            self.merge_slot(slot, merged_slots)
            # Add default ranges
            if slot.range is None:
                # Inverses will be handled later on in the process
                if not slot.inverse:
                    slot.range = self.schema.default_range

        # Update enums
        merged_enums: List[EnumDefinitionName] = []
        for enum in self.schema.enums.values():
            if not enum.from_schema:
                enum.from_schema = self.schema.id
            # TODO: Need to add "is_a" to enums
            # self.merge_enum(enum, merged_enums)

        # Process the slot_usages
        for cls in self.schema.classes.values():
            self.process_slot_usages(cls)
            if not cls.from_schema:
                cls.from_schema = self.schema.id

        # Merge class with its mixins and the like
        merged_classes: List[ClassDefinitionName] = []
        for cls in self.schema.classes.values():
            self.merge_class(cls, merged_classes)

        # Update types with parental information
        merged_types: List[TypeDefinitionName] = []
        for typ in self.schema.types.values():
            if not typ.base and not typ.typeof:
                self.raise_value_error(f'type "{typ.name}" must declare a type base or parent (typeof)', typ.name)
            if not typ.typeof and not typ.uri:
                self.raise_value_error(f'type "{typ.name}" does not declare a URI', typ.name)
            self.merge_type(typ, merged_types)
            if not typ.from_schema:
                typ.from_schema = self.schema.id

        # Update the subsets as needed
        for ss in self.schema.subsets.values():
            if not ss.from_schema:
                ss.from_schema = self.schema.id

        # Massage initial set of slots
        for slot in self.schema.slots.values():
            # Keys and identifiers must be present
            if bool(slot.key or slot.identifier):
                if slot.required is None:
                    slot.required = True
                elif not slot.required:
                    self.raise_value_error(f"slot: {slot.name} - key and identifier slots cannot be optional", slot.name)
                if slot.key and slot.identifier:
                    self.raise_value_error(f"slot: {slot.name} - A slot cannot be both a key and identifier at the same time", slot.name)

            # Propagate domain to containing class
            if slot.domain and slot.domain in self.schema.classes:
                if slot.name not in self.schema.classes[slot.domain].slots and not slot.owner:
                    slot.owner = slot.name
                    # Slot domains to not appear
                    # self.schema.classes[slot.domain].slots.append(slot.name)
            elif slot.domain:
                self.raise_value_error(f"slot: {slot.name} - unrecognized domain ({slot.domain})", slot.domain)
            if slot.ifabsent:
                from biolinkml.utils.ifabsent_functions import isabsent_match
                if isabsent_match(slot.ifabsent) is None:
                    self.raise_value_error(f"Unrecognized ifabsent action for slot '{slot.name}': '{slot.ifabsent}'", slot.ifabsent)

            # Keys and identifiers must be present
            if bool(slot.key or slot.identifier):
                if slot.required is None:
                    slot.required = True
                elif not slot.required:
                    self.raise_value_error(f"slot: {slot.name} - key and identifier slots cannot be optional", slot.name)

            # Validate the slot range
            if slot.range is not None and  slot.range not in self.schema.types and \
                    slot.range not in self.schema.classes and slot.range not in self.schema.enums:
                self.raise_value_error(f"slot: {slot.name} - unrecognized range ({slot.range})", slot.range)

        # Massage classes, propagating class slots entries domain back to the target slots
        for cls in self.schema.classes.values():
            if not isinstance(cls, ClassDefinition):
                name = cls['name'] if 'name' in cls else 'Unknown'
                self.raise_value_error(f'Class "{name} (type: {type(cls)})" definition is not a class definition')
            if isinstance(cls.slots, str):
                self.logger.warning(f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array")
                cls.slots = [cls.slots]
            for slotname in cls.slots:
                if slotname in self.schema.slots:
                    slot = self.schema.slots[cast(SlotDefinitionName, slotname)]
                else:
                    self.raise_value_error(f'Class "{cls.name}" - unknown slot: "{slotname}"', slotname)

        for slot in self.schema.slots.values():
            if slot.from_schema is None:
                slot.from_schema = self.schema.id
            # Inline any class definitions that don't have identifiers.  Note that keys ARE inlined
            if slot.range in self.schema.classes:
                range_class = self.schema.classes[cast(ClassDefinitionName, slot.range)]
                if slot.inlined_as_list or not any([self.schema.slots[s].identifier or
                                                    self.schema.slots[s].key for s in range_class.slots]):
                    slot.inlined = True

            if slot.slot_uri is not None:
                slot.mappings.insert(0, slot.slot_uri)
            # Assign missing predicates
            if slot.slot_uri is None or not self.useuris:
                slot.slot_uri = \
                    self.namespaces.uri_or_curie_for(self.schema_defaults.get(slot.from_schema, sfx(slot.from_schema)),
                                                                 self.slot_name_for(slot))

            if slot.subproperty_of and slot.subproperty_of not in self.schema.slots:
                self.raise_value_error(f'Slot: "{slot.name}" - subproperty_of: "{slot.subproperty_of}" '
                                       f'does not reference a slot definition', slot.subproperty_of)

        # Evaluate any slot inverses
        def domain_range_alignment(fwd_slot: SlotDefinition, inverse_slot: SlotDefinition) -> bool:
            """ Determine whether the range of fwd_slot is compatible with the domain of inverse_slot """
            # TODO: Determine what to do about class and slot hierarchy
            if fwd_slot.range and fwd_slot.range not in self.schema.classes:
                raise ValueError(f"Slot '{fwd_slot.name}' range ({fwd_slot.range}) is not an class -- inverse is not possible")
            if fwd_slot.domain:
                if not inverse_slot.range:
                    inverse_slot.range = fwd_slot.domain
                elif not domain_range_alignment(fwd_slot, inverse_slot):
                    self.logger.warning(f"Slot: {slot.name} and inverse slot: {inverse_slot.name} are not compatible")
            return True

        # Get the inverse domains and ranges sorted
        for slot in self.schema.slots.values():
            if slot.inverse:
                # Note that the inverse OF the inverse will be caught in this same iterator
                inverse_slot = self.schema.slots[slot.inverse]
                if not slot.range:
                    if inverse_slot.domain:
                        slot.range = inverse_slot.domain
                    elif len(inverse_slot.domain_of):
                        if len(inverse_slot.domain_of) > 1:
                            dom_list = ', '.join(inverse_slot.domain_of)
                            self.logger.warning(f"Slot {slot.name}.inverse ({inverse_slot.name}), "
                                                f"has multi domains ({dom_list})  Multi ranges not yet implemented")
                        slot.range = inverse_slot.domain_of[0]
                    else:
                        raise ValueError(f"Unable to determine the range of slot `{slot.name}'. "
                                         f"Its inverse ({inverse_slot.name}) has no declared domain")
                elif not inverse_slot.domain and len(inverse_slot.domain_of) == 0:
                    inverse_slot.domain = slot.range
                elif slot.range not in (inverse_slot.domain, inverse_slot.domain_of):
                    self.logger.warning(f"Range of slot '{slot.name}' ({slot.range}) "
                                        f"does not line with the domain of its inverse ({inverse_slot.name})")

        # Check for duplicate class and type names
        def check_dups(s1: Set[ElementName], s2: Set[ElementName]) -> Tuple[List[ElementName], str]:
            if s1.isdisjoint(s2):
                return [], ''

            # Return an ordered list of d1/d1 tuples
            # For some curious reason, s1.intersection(s2) and s2.intersection(s1) BOTH yield s1 elements
            dups = sorted(s1.intersection(s2))
            dup_locs = list()
            for dup in dups:
                dup_locs += [s1e for s1e in s1 if s1e == dup]
                dup_locs += [s2e for s2e in s2 if s2e == dup]

            return dup_locs, ', '.join(dups)


        classes = set(self.schema.classes.keys())
        self.validate_item_names("class", classes)
        slots = set(self.schema.slots.keys())
        self.validate_item_names("slot", slots)
        types = set(self.schema.types.keys())
        self.validate_item_names("type", types)
        subsets = set(self.schema.subsets.keys())
        self.validate_item_names("subset", subsets)
        enums = set(self.schema.enums.keys())
        self.validate_item_names('enum', enums)

        # Check that the default range is valid
        default_range_needed = any(slot.range == self.schema.default_range for slot in self.schema.slots.values())
        if default_range_needed and \
                self.schema.default_range not in self.schema.types and \
                self.schema.default_range not in self.schema.classes:
            raise ValueError(f'Unknown default range: "{self.schema.default_range}"')

        # We are currently limited to one key per class
        for cls in self.schema.classes.values():
            class_slots = []
            for sn in cls.slots:
                slot = self.schema.slots[sn]
                if slot.key or slot.identifier:
                    class_slots.append(sn)
            if len(class_slots) > 1:
                self.raise_value_error(f'Class "{cls.name}" - multiple keys/identifiers not allowed ({", ".join(class_slots)})', class_slots[1])

        # Check out all the namespaces
        self.check_prefixes()

        # Cannot have duplicate class or type keys
        dups, items = check_dups(types, classes)
        if items:
            self.raise_value_errors(f"Overlapping type and class names: {items}", dups)
        dups, items = check_dups(enums, classes)
        if items:
            self.raise_value_errors(f"Overlapping enum and class names: {items}", dups)
        dups, items = check_dups(types, enums)
        if items:
            self.raise_value_errors(f"Overlapping type and enum names: {items}", dups)

        dups, items = check_dups(slots, classes)
        if items:
            self.logger_warning(f"Overlapping slot and class names: {items}", dups)

        dups, items = check_dups(subsets, classes)
        if items:
            self.logger_warning(f"Overlapping subset and class names: {items}", dups)

        dups, items = check_dups(types, slots)
        if items:
            self.logger_warning(f"Overlapping type and slot names: {items}", dups)

        dups, items = check_dups(subsets, slots)
        if items:
            self.logger_warning(f"Overlapping subset and slot names: {items}", dups)

        dups, items = check_dups(subsets, types)
        if items:
            self.logger_warning(f"Overlapping subset and type names: {items}", dups)

        dups, items = check_dups(enums, slots)
        if items:
            self.logger_warning(f"Overlapping enum and slot names: {items}", dups)

        dups, items = check_dups(subsets, enums)
        if items:
            self.logger_warning(f"Overlapping subset and enum names: {items}", dups)

        # Check over the various enumeration constraints
        for enum in self.schema.enums.values():
            if enum.code_set_version:
                if enum.code_set_tag:
                    self.raise_value_errors(f'Enum: "{enum.name}" cannot have both version and tag',
                                            [enum.code_set_version, enum.code_set_tag])
                if not enum.code_set:
                    self.raise_value_error(f'Enum: "{enum.name}" needs a code set to have a version', enum.name)
            if enum.code_set_tag:
                if not enum.code_set:
                    self.raise_value_error(f'Enum: "{enum.name}" needs a code set to have a tag', enum.name)
            if enum.pv_formula:
                if not enum.code_set:
                    self.raise_value_error(f'Enum: "{enum.name}" needs a code set to have a formula', enum.name)
                if enum.permissible_values:
                    self.raise_value_error(f'Enum: "{enum.name}" can have a formula or permissible values but not both',
                                           enum.name)
        for slot in self.schema.slots.values():
            if slot.range and slot.range in self.schema.enums:
                if slot.inlined or slot.inlined_as_list:
                    self.raise_value_error(f'Slot: "{slot.name}" enumerations cannot be inlined', slot.range)

        # Make the source file relative if it is locally generated
        self.schema_location = self.schema.source_file
        if self.schema.source_file and '://' not in self.schema.source_file:
            self.schema.source_file = os.path.basename(self.schema.source_file)

        # Make sure there is only one tree_root
        tree_root = None
        for cls in self.schema.classes.values():
            if cls.tree_root:
                if tree_root is not None:
                    self.logger.warning(f"Duplicate tree_root: {cls.name} with {tree_root}")
                else:
                    tree_root = cls.name

        self.synopsis = SchemaSynopsis(self.schema)
        errs = self.synopsis.errors()
        if errs:
            print("Warning: The following errors were encountered in the schema")
            for errline in errs:
                print("\t" + errline)
            print()
        for subset, referees in self.synopsis.subsetrefs.items():
            if subset not in self.schema.subsets:
                self.raise_value_error(f"Subset: {subset} is not defined", subset)
        return self.schema

Example #15

0

Show file

    def resolve(self) -> SchemaDefinition:
        """Reconcile a loaded schema, applying is_a, mixins, apply_to's and other such things.  Also validate the
        content and load a SchemaSynopsis entry

        :return: Fully resolved definition
        """
        if not self.schema.default_range:
            self.schema.default_range = 'string'
            print(f"Warning: default_range not specified. Default set to '{self.schema.default_range}'",
                  file=sys.stderr)

        # Process the namespace declarations
        if not self.schema.default_prefix:
            self.schema.default_prefix = sfx(self.schema.id)
        self.schema_defaults[self.schema.id] = self.schema.default_prefix
        for prefix in self.schema.prefixes.values():
            self.namespaces[prefix.prefix_prefix] = prefix.prefix_reference
        for cmap in self.schema.default_curi_maps:
            self.namespaces.add_prefixmap(cmap, include_defaults=False)
        if not self.namespaces._default:
            if '://' in self.schema.default_prefix:
                self.namespaces._default = self.schema.default_prefix
            elif self.schema.default_prefix in self.namespaces:
                self.namespaces._default = self.namespaces[self.schema.default_prefix]
            else:
                self.raise_value_error(f'Default prefix: {self.schema.default_prefix} is not defined',
                                       self.schema.default_prefix)

        # Process imports
        for imp in self.schema.imports:
            sname = self.importmap.get(str(imp), imp)               # Import map may use CURIE
            sname = self.namespaces.uri_for(sname) if ':' in sname else sname
            sname = self.importmap.get(str(sname), sname)               # It may also use URI or other forms
            import_schemadefinition = \
                load_raw_schema(sname + '.yaml',
                                base_dir=os.path.dirname(self.schema.source_file) if self.schema.source_file else None)
            loaded_schema = (str(sname), import_schemadefinition.version)
            if import_schemadefinition.id in self.loaded:
                # If we've already loaded this, make sure that we've got the same version
                if self.loaded[import_schemadefinition.id][1] != loaded_schema[1]:
                    self.raise_value_error(f"Schema {import_schemadefinition.name} - version mismatch",
                                           import_schemadefinition.name)
                # Note: for debugging purposes we also check whether the version came from the same spot.  This should
                #       be loosened to version only once we're sure that everything is working
                if self.loaded[import_schemadefinition.id] != loaded_schema:
                    self.raise_value_error(f"Schema imported from different files: "
                                           f"{self.loaded[import_schemadefinition.id][0]} : {loaded_schema[0]}")
            else:
                self.loaded[import_schemadefinition.id] = loaded_schema
                merge_schemas(self.schema, import_schemadefinition, imp, self.namespaces)
                self.schema_defaults[import_schemadefinition.id] = import_schemadefinition.default_prefix

        self.namespaces._base = self.schema.default_prefix if ':' in self.schema.default_prefix else \
            self.namespaces[self.schema.default_prefix]

        # Assign class slot ownership
        for cls in self.schema.classes.values():
            if not isinstance(cls, ClassDefinition):
                name = cls['name'] if 'name' in cls else 'Unknown'
                self.raise_value_error(f'Class "{name} (type: {type(cls)})" definition is not a class definition', name)
            if isinstance(cls.slots, str):
                self.logger.warning(f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array")
                cls.slots = [cls.slots]
            for slotname in cls.slots:
                if slotname in self.schema.slots:
                    slot = self.schema.slots[cast(SlotDefinitionName, slotname)]
                    slot.owner = cls.name
                    if cls.name not in slot.domain_of:
                        slot.domain_of.append(cls.name)
                else:
                    self.raise_value_error(f'Class "{cls.name}" - unknown slot: "{slotname}"', slotname)


        # Massage initial set of slots
        for slot in self.schema.slots.values():
            # Propagate domain to containing class
            if slot.domain and slot.domain in self.schema.classes:
                if slot.name not in self.schema.classes[slot.domain].slots:
                    slot.owner = slot.name
                    # self.schema.classes[slot.domain].slots.append(slot.name)
            elif slot.domain:
                self.raise_value_error(f"slot: {slot.name} - unrecognized domain ({slot.domain})", slot.domain)

            # Validate the slot range
            if slot.range is not None and  slot.range not in self.schema.types and \
                    slot.range not in self.schema.classes:
                self.raise_value_error(f"slot: {slot.name} - unrecognized range ({slot.range})", slot.range)

        # apply to --> mixins
        for cls in self.schema.classes.values():
            for apply_to_cls in cls.apply_to:
                if apply_to_cls in self.schema.classes:
                    self.schema.classes[apply_to_cls].mixins.append(cls.name)
                else:
                    self.raise_value_error(f'Class "{cls.name}" unknown apply_to target: {apply_to_cls}', apply_to_cls)
            # Class URI's also count as (trivial) mappings
            if cls.class_uri is not None:
                cls.mappings.insert(0, cls.class_uri)
            if cls.class_uri is None or not self.useuris:
                cls.class_uri = \
                    self.namespaces.uri_or_curie_for(self.schema_defaults.get(cls.from_schema, sfx(cls.from_schema)),
                                                                 camelcase(cls.name))

        # Get the inverse ducks all in a row before we start filling other stuff in
        for slot in self.schema.slots.values():
            if slot.inverse:
                inverse_slot = self.schema.slots.get(slot.inverse, None)
                if inverse_slot:
                    if not inverse_slot.inverse:
                        inverse_slot.inverse = slot.name
                    elif inverse_slot.inverse != slot.name:
                        self.raise_value_error(f'Slot {slot.name}.inverse ({slot.inverse}) does not match '
                                               f'slot {inverse_slot.name}.inverse ({inverse_slot.inverse})')
                else:
                    self.raise_value_error(f'Slot {slot.name}.inverse ({slot.inverse}) is not defined')

        # Update slots with parental information
        merged_slots: List[SlotDefinitionName] = []
        for slot in self.schema.slots.values():
            if not slot.from_schema:
                slot.from_schema = self.schema.id
            self.merge_slot(slot, merged_slots)
            # Add default ranges
            if slot.range is None:
                # Inverses will be handled later on in the process
                if not slot.inverse:
                    slot.range = self.schema.default_range

        # Update classes with is_a and mixin information
        merged_classes: List[ClassDefinitionName] = []
        for cls in self.schema.classes.values():
            if not cls.from_schema:
                cls.from_schema = self.schema.id
            self.merge_class(cls, merged_classes)

        # Update types with parental information
        merged_types: List[TypeDefinitionName] = []
        for typ in self.schema.types.values():
            if not typ.base and not typ.typeof:
                self.raise_value_error(f'type "{typ.name}" must declare a type base or parent (typeof)', typ.name)
            if not typ.typeof and not typ.uri:
                self.raise_value_error(f'type "{typ.name}" does not declare a URI', typ.name)
            self.merge_type(typ, merged_types)
            if not typ.from_schema:
                typ.from_schema = self.schema.id

        # Update the subsets as needed
        for ss in self.schema.subsets.values():
            if not ss.from_schema:
                ss.from_schema = self.schema.id

        # Massage initial set of slots
        for slot in self.schema.slots.values():
            # Keys and identifiers must be present
            if bool(slot.key or slot.identifier):
                if slot.required is None:
                    slot.required = True
                elif not slot.required:
                    self.raise_value_error(f"slot: {slot.name} - key and identifier slots cannot be optional", slot.name)
                if slot.key and slot.identifier:
                    self.raise_value_error(f"slot: {slot.name} - A slot cannot be both a key and identifier at the same time", slot.name)

            # Propagate domain to containing class
            if slot.domain and slot.domain in self.schema.classes:
                if slot.name not in self.schema.classes[slot.domain].slots and not slot.owner:
                    slot.owner = slot.name
                    # Slot domains to not appear
                    # self.schema.classes[slot.domain].slots.append(slot.name)
            elif slot.domain:
                self.raise_value_error(f"slot: {slot.name} - unrecognized domain ({slot.domain})", slot.domain)
            if slot.ifabsent:
                from biolinkml.utils.ifabsent_functions import isabsent_match
                if isabsent_match(slot.ifabsent) is None:
                    self.raise_value_error(f"Unrecognized ifabsent action for slot '{slot.name}': '{slot.ifabsent}'", slot.ifabsent)

            # Keys and identifiers must be present
            if bool(slot.key or slot.identifier):
                if slot.required is None:
                    slot.required = True
                elif not slot.required:
                    self.raise_value_error(f"slot: {slot.name} - key and identifier slots cannot be optional", slot.name)

            # Validate the slot range
            if slot.range is not None and  slot.range not in self.schema.types and \
                    slot.range not in self.schema.classes:
                self.raise_value_error(f"slot: {slot.name} - unrecognized range ({slot.range})", slot.range)

        # Massage classes, propagating class slots entries domain back to the target slots
        for cls in self.schema.classes.values():
            if not isinstance(cls, ClassDefinition):
                name = cls['name'] if 'name' in cls else 'Unknown'
                self.raise_value_error(f'Class "{name} (type: {type(cls)})" definition is not a class definition')
            if isinstance(cls.slots, str):
                self.logger.warning(f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array")
                cls.slots = [cls.slots]
            for slotname in cls.slots:
                if slotname in self.schema.slots:
                    slot = self.schema.slots[cast(SlotDefinitionName, slotname)]
                else:
                    self.raise_value_error(f'Class "{cls.name}" - unknown slot: "{slotname}"', slotname)

        for slot in self.schema.slots.values():
            if slot.from_schema is None:
                slot.from_schema = self.schema.id
            # Inline any class definitions that don't have identifiers.  Note that keys ARE inlined
            if slot.range in self.schema.classes:
                range_class = self.schema.classes[cast(ClassDefinitionName, slot.range)]
                if not any([self.schema.slots[s].identifier for s in range_class.slots]):
                    slot.inlined = True

            if slot.slot_uri is not None:
                slot.mappings.insert(0, slot.slot_uri)
            # Assign missing predicates
            if slot.slot_uri is None or not self.useuris:
                slot.slot_uri = \
                    self.namespaces.uri_or_curie_for(self.schema_defaults.get(slot.from_schema, sfx(slot.from_schema)),
                                                                 self.slot_name_for(slot))

        # Evaluate any slot inverses
        def domain_range_alignment(fwd_slot: SlotDefinition, inverse_slot: SlotDefinition) -> bool:
            """ Determine whether the range of fwd_slot is compatible with the domain of inverse_slot """
            # TODO: Determine what to do about class and slot hierarchy
            if fwd_slot.range and fwd_slot.range not in self.schema.classes:
                raise ValueError(f"Slot '{fwd_slot.name}' range ({fwd_slot.range}) is not an class -- inverse is not possible")
            if fwd_slot.domain:
                if not inverse_slot.range:
                    inverse_slot.range = fwd_slot.domain
                elif not domain_range_alignment(fwd_slot, inverse_slot):
                    self.logger.warning(f"Slot: {slot.name} and inverse slot: {inverse_slot.name} are not compatible")
            return True

        # Get the inverse domains and ranges sorted
        for slot in self.schema.slots.values():
            if slot.inverse:
                # Note that the inverse OF the inverse will be caught in this same iterator
                inverse_slot = self.schema.slots[slot.inverse]
                if not slot.range:
                    if inverse_slot.domain:
                        slot.range = inverse_slot.domain
                    elif len(inverse_slot.domain_of):
                        if len(inverse_slot.domain_of) > 1:
                            dom_list = ', '.join(inverse_slot.domain_of)
                            self.logger.warning(f"Slot {slot.name}.inverse ({inverse_slot.name}), "
                                                f"has multi domains ({dom_list})  Multi ranges not yet implemented")
                        slot.range = inverse_slot.domain_of[0]
                    else:
                        raise ValueError(f"Unable to determine the range of slot `{slot.name}'. "
                                         f"Its inverse ({inverse_slot.name}) has no declared domain")
                elif not inverse_slot.domain and len(inverse_slot.domain_of) == 0:
                    inverse_slot.domain = slot.range
                elif slot.range not in (inverse_slot.domain, inverse_slot.domain_of):
                    self.logger.warning(f"Range of slot '{slot.name}' ({slot.range}) "
                                        f"does not line with the domain of its inverse ({inverse_slot.name})")

        # Check for duplicate class and type names
        def check_dups(s1: Set[ElementName], s2: Set[ElementName]) -> str:
            return ', '.join(sorted(s1.intersection(s2)))

        classes = set(self.schema.classes.keys())
        self.validate_item_names("class", classes)
        slots = set(self.schema.slots.keys())
        self.validate_item_names("slot", slots)
        types = set(self.schema.types.keys())
        self.validate_item_names("type", types)
        subsets = set(self.schema.subsets.keys())
        self.validate_item_names("subset", subsets)

        # Check that the default range is valid
        if not self.schema.default_range:
            raise ValueError("Default range is not specified")
        if self.schema.default_range not in self.schema.types and self.schema.default_range not in self.schema.classes:
            raise ValueError(f'Unknown default range: "{self.schema.default_range}"')

        # We are currently limited to one key per class
        for cls in self.schema.classes.values():
            class_slots = []
            for sn in cls.slots:
                slot = self.schema.slots[sn]
                if slot.key:
                    class_slots.append(slot.name)
            if len(class_slots) > 1:
                self.raise_value_error(f'Class "{cls.name}" - multiple keys not allowed ({", ".join(class_slots)})', class_slots[1])

        # Check out all the namespaces
        self.check_prefixes()

        # Cannot have duplicate class or type keys
        dups = check_dups(classes, types)
        if dups:
            raise ValueError(f"Shared class and type names detected: {dups}")

        dups = check_dups(classes, slots)
        if dups:
            self.logger.warning(f"Shared class and slot names: {dups}")
        dups = check_dups(classes, subsets)
        if dups:
            self.logger.warning(f"Shared class and subset names: {dups}")
        dups = check_dups(slots, types)
        if dups:
            self.logger.warning(f"Shared type and slot names: {dups}")
        dups = check_dups(slots, subsets)
        if dups:
            self.logger.warning(f"Shared slot and subset names: {dups}")
        dups = check_dups(types, subsets)
        if dups:
            self.logger.warning(f"Shared type and subset names: {dups}")

        # Make the source file relative if it is locally generated
        self.schema_location = self.schema.source_file
        if self.schema.source_file and '://' not in self.schema.source_file:
            self.schema.source_file = os.path.basename(self.schema.source_file)

        # Make sure there is only one tree_root
        tree_root = None
        for cls in self.schema.classes.values():
            if cls.tree_root:
                if tree_root is not None:
                    self.logger.warning(f"Duplicate tree_root: {cls.name} with {tree_root}")
                else:
                    tree_root = cls.name

        self.synopsis = SchemaSynopsis(self.schema)
        errs = self.synopsis.errors()
        if errs:
            print("Warning: The following errors were encountered in the schema")
            for errline in errs:
                print("\t" + errline)
            print()
        for subset, referees in self.synopsis.subsetrefs.items():
            if subset not in self.schema.subsets:
                self.raise_value_error(f"Subset: {subset} is not defined", subset)
        return self.schema

Example #16

0

Show file

 def test_as_json(self):
     schema = self.fix_schema_metadata(
         load_raw_schema(env.input_path('schema6.yaml')))
     env.eval_single_file(env.expected_path('schema6.json'),
                          as_json(schema),
                          filtr=lambda s: s)

Example #17

0

Show file

 def test_base_dir(self):
     """ Test the base directory option  """
     self._verify_schema1_content(load_raw_schema('schema1.yaml', base_dir=datadir), 'schema1')

Example #18

0

Show file

 def test_explicit_name(self):
     """ Test the named schema option """
     self._verify_schema1_content(load_raw_schema(os.path.join(datadir, 'schema2.yaml')), 'schema2')

Example #19

0

Show file

 def test_name_from_sourcefile(self):
     """ Test no identifier at all  """
     with self.assertRaises(ValueError):
         load_raw_schema(env.input_path('schema5.yaml'))

Example #20

0

Show file

 def test_model_access(self):
     """ Make sure that the law loader can dereference a URL and that the data matches """
     online_meta_yaml = load_raw_schema(METAMODEL_URI)
     self.validate_yaml_content(online_meta_yaml, True)

Example #21

0

Show file

    def resolve(self) -> SchemaDefinition:
        """Reconcile a loaded schema, applying is_a, mixins, apply_to's and other such things.  Also validate the
        content and load a SchemaSynopsis entry

        :return: Fully resolved definition
        """
        if not self.schema.default_range:
            self.schema.default_range = 'string'
            print(
                f"Warning: default_range not specified. Default set to '{self.schema.default_range}'",
                file=sys.stderr)

        # Process the namespace declarations
        if not self.schema.default_prefix:
            self.schema.default_prefix = Namespaces.sfx(self.schema.id)
        for prefix in self.schema.prefixes.values():
            self.namespaces[prefix.prefix_prefix] = prefix.prefix_reference
        for cmap in self.schema.default_curi_maps:
            self.namespaces.add_prefixmap(cmap, include_defaults=False)
        if not self.namespaces._default:
            if '://' in self.schema.default_prefix:
                self.namespaces._default = self.schema.default_prefix
            elif self.schema.default_prefix in self.namespaces:
                self.namespaces._default = self.namespaces[
                    self.schema.default_prefix]
            else:
                raise ValueError(
                    f'Default prefix: {self.schema.default_prefix} is not defined'
                )

        # Process imports
        for sname in self.schema.imports:
            sloc = self.namespaces.uri_for(sname) if ':' in sname else sname
            if sloc not in self.loaded:
                self.loaded.add(sloc)
                merge_schemas(
                    self.schema,
                    load_raw_schema(sloc + '.yaml', base_dir=self.base_dir),
                    sloc, self.namespaces)

        self.namespaces._base = self.schema.default_prefix if ':' in self.schema.default_prefix else \
            self.namespaces[self.schema.default_prefix]

        # Massage initial set of slots
        for slot in self.schema.slots.values():
            # Propagate domain to containing class
            if slot.domain and slot.domain in self.schema.classes:
                if slot.name not in self.schema.classes[slot.domain].slots:
                    self.schema.classes[slot.domain].slots.append(slot.name)
            elif slot.domain:
                self.raise_value_error(
                    f"slot: {slot.name} - unrecognized domain ({slot.domain})")

            # Keys and identifiers must be present
            if bool(slot.key or slot.identifier):
                if slot.required is None:
                    slot.required = True
                elif not slot.required:
                    self.raise_value_error(
                        f"slot: {slot.name} - key and identifier slots cannot be optional"
                    )

            # Validate the slot range
            if slot.range is not None and  slot.range not in self.schema.types and \
                    slot.range not in self.schema.classes:
                self.raise_value_error(
                    f"slot: {slot.name} - unrecognized range ({slot.range})")

        # Massage classes, propagating class slots entries domain back to the target slots
        for cls in self.schema.classes.values():
            if not isinstance(cls, ClassDefinition):
                name = cls['name'] if 'name' in cls else 'Unknown'
                self.raise_value_error(
                    f'Class "{name} (type: {type(cls)})" definition is not a class definition'
                )
            if isinstance(cls.slots, str):
                print(
                    f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array",
                    file=sys.stderr)
                cls.slots = [cls.slots]
            for slotname in cls.slots:
                if slotname in self.schema.slots:
                    slot = self.schema.slots[cast(SlotDefinitionName,
                                                  slotname)]
                    if slot.domain is None:
                        slot.domain = cls.name
                    elif slot.domain != cls.name:
                        self.raise_value_error(
                            f'Slot: {slot.name} domain ({slot.domain}) '
                            f'does not match declaring class "({cls.name})"')
                else:
                    self.raise_value_error(
                        f'Class "{cls.name}" - unknown slot: "{slotname}"')

        # apply to --> mixins
        for cls in self.schema.classes.values():
            for apply_to_cls in cls.apply_to:
                if apply_to_cls in self.schema.classes:
                    self.schema.classes[apply_to_cls].mixins.append(cls.name)
                else:
                    self.raise_value_error(
                        f'Class "{cls.name}" unknown apply_to target: {apply_to_cls}'
                    )
            if cls.class_uri is None:
                cls.class_uri = self.namespaces.uri_or_curie_for(
                    self.schema.default_prefix, camelcase(cls.name))

        # Update slots with parental information
        merged_slots: List[SlotDefinitionName] = []
        for slot in self.schema.slots.values():
            if not slot.from_schema:
                slot.from_schema = self.schema.id
            self.merge_slot(slot, merged_slots)
            # Add default ranges
            if slot.range is None:
                slot.range = self.schema.default_range

        # Update classes with is_a and mixin information
        merged_classes: List[ClassDefinitionName] = []
        for cls in self.schema.classes.values():
            if not cls.from_schema:
                cls.from_schema = self.schema.id
            self.merge_class(cls, merged_classes)

        # Update types with parental information
        merged_types: List[TypeDefinitionName] = []
        for typ in self.schema.types.values():
            if not typ.base and not typ.typeof:
                self.raise_value_error(
                    f'type "{typ.name}" must declare a type base or parent (typeof)'
                )
            if not typ.typeof and not typ.uri:
                self.raise_value_error(
                    f'type "{typ.name}" does not declare a URI')
            self.merge_type(typ, merged_types)
            if not typ.from_schema:
                typ.from_schema = self.schema.id

        # Update the subsets as needed
        for ss in self.schema.subsets.values():
            if not ss.from_schema:
                ss.from_schema = self.schema.id

        # Massage initial set of slots
        for slot in self.schema.slots.values():
            # Propagate domain to containing class
            if slot.domain and slot.domain in self.schema.classes:
                if slot.name not in self.schema.classes[slot.domain].slots:
                    self.schema.classes[slot.domain].slots.append(slot.name)
            elif slot.domain:
                self.raise_value_error(
                    f"slot: {slot.name} - unrecognized domain ({slot.domain})")

            # Keys and identifiers must be present
            if bool(slot.key or slot.identifier):
                if slot.required is None:
                    slot.required = True
                elif not slot.required:
                    self.raise_value_error(
                        f"slot: {slot.name} - key and identifier slots cannot be optional"
                    )

            # Validate the slot range
            if slot.range is not None and  slot.range not in self.schema.types and \
                    slot.range not in self.schema.classes:
                self.raise_value_error(
                    f"slot: {slot.name} - unrecognized range ({slot.range})")

        # Massage classes, propagating class slots entries domain back to the target slots
        for cls in self.schema.classes.values():
            if not isinstance(cls, ClassDefinition):
                name = cls['name'] if 'name' in cls else 'Unknown'
                self.raise_value_error(
                    f'Class "{name} (type: {type(cls)})" definition is not a class definition'
                )
            if isinstance(cls.slots, str):
                print(
                    f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array",
                    file=sys.stderr)
                cls.slots = [cls.slots]
            for slotname in cls.slots:
                if slotname in self.schema.slots:
                    slot = self.schema.slots[cast(SlotDefinitionName,
                                                  slotname)]
                    if slot.domain is None:
                        slot.domain = cls.name

                    # TODO: fix this check
                    # elif slot.domain != cls.name:
                    #     self.raise_value_error(f'Slot: {slot.name} domain ({slot.domain}) '
                    #                            f'does not match declaring class "({cls.name})"')
                else:
                    self.raise_value_error(
                        f'Class "{cls.name}" - unknown slot: "{slotname}"')

        for slot in self.schema.slots.values():
            # Inline any class definitions that don't have identifiers.  Note that keys ARE inlined
            if slot.range in self.schema.classes:
                range_class = self.schema.classes[cast(ClassDefinitionName,
                                                       slot.range)]
                if not any([
                        self.schema.slots[s].identifier
                        for s in range_class.slots
                ]):
                    slot.inlined = True

            # Assign missing predicates
            if slot.slot_uri is None:
                slot.slot_uri = self.namespaces.uri_or_curie_for(
                    self.schema.default_prefix, self.slot_name_for(slot))

        # Check for duplicate class and type names
        def check_dups(s1: Set[ElementName], s2: Set[ElementName]) -> str:
            return ', '.join(sorted(s1.intersection(s2)))

        classes = set(self.schema.classes.keys())
        slots = set(self.schema.slots.keys())
        types = set(self.schema.types.keys())
        subsets = set(self.schema.subsets.keys())

        # Check that the default range is valid
        if not self.schema.default_range:
            raise ValueError("Default range is not specified")
        if self.schema.default_range not in self.schema.types and self.schema.default_range not in self.schema.classes:
            raise ValueError(
                f'Unknown default range: "{self.schema.default_range}"')

        # We are currently limited to one key per class
        for cls in self.schema.classes.values():
            class_slots = []
            for sn in cls.slots:
                slot = self.schema.slots[sn]
                if slot.key:
                    class_slots.append(slot.name)
            if len(class_slots) > 1:
                self.raise_value_error(
                    f'Class "{cls.name}" - multiple keys not allowed ({", ".join(class_slots)})'
                )

        # Check out all the namespaces
        self.check_prefixes()

        # Cannot have duplicate class or type keys
        dups = check_dups(classes, types)
        if dups:
            raise ValueError(f"Shared class and type names detected: {dups}")

        dups = check_dups(classes, slots)
        if dups:
            print(f"Warning: Shared class and slot names: {dups}",
                  file=sys.stderr)
        dups = check_dups(classes, subsets)
        if dups:
            print(f"Warning: Shared class and subset names: {dups}",
                  file=sys.stderr)
        dups = check_dups(slots, types)
        if dups:
            print(f"Warning: Shared type and slot names: {dups}",
                  file=sys.stderr)
        dups = check_dups(slots, subsets)
        if dups:
            print(f"Warning: Shared slot and subset names: {dups}",
                  file=sys.stderr)
        dups = check_dups(types, subsets)
        if dups:
            print(f"Warning: Shared type and subset names: {dups}",
                  file=sys.stderr)

        # Make the source file relative if it is locally generated
        self.schema_location = self.schema.source_file
        if self.schema.source_file and '://' not in self.schema.source_file:
            self.schema.source_file = os.path.basename(self.schema.source_file)

        self.synopsis = SchemaSynopsis(self.schema)
        for subset, referees in self.synopsis.subsetrefs.items():
            if subset not in self.schema.subsets:
                self.raise_value_error(f"Subset: {subset} is not defined")
        return self.schema

Example #22

0

Show file

 def test_name_from_sourcefile(self):
     """ Test no identifier at all  """
     with self.assertRaises(ValueError):
         load_raw_schema(os.path.join(datadir, 'schema5.yaml'))

Example #23

0

Show file

 def test_load_text(self):
     """ Test loading straight text """
     with open(os.path.join(datadir, 'schema1.yaml')) as f:
         self._verify_schema1_content(load_raw_schema(f.read(), 'schema1.yaml', "Mon Dec 31 11:25:38 2018", 76),
                                      'schema1')

Example #24

0

Show file

 def test_explicit_name(self):
     """ Test the named schema option """
     self._verify_schema1_content(
         load_raw_schema(env.input_path('schema2.yaml')), 'schema2')