def test_load_raw_file(self): """ Test loading a data file """ self._verify_schema1_content(load_raw_schema(os.path.join(datadir, 'schema1.yaml')), 'schema1') # Verify that we can't pass source_file parameters when we've got a directory name with self.assertRaises(AssertionError): load_raw_schema(os.path.join(datadir, 'schema1.yaml'), source_file_size=117)
def test_model_uris(self): """ Test that the variables in meta.yaml match the contents of biolinkml/__init__.py """ self.assertTrue(os.path.exists(LOCAL_YAML_PATH)) self.assertTrue(os.path.exists(LOCAL_CONTEXT_PATH)) meta_yaml = load_raw_schema(LOCAL_YAML_PATH) self.validate_yaml_content(meta_yaml, False) types_yaml = load_raw_schema(LOCAL_TYPES_PATH) self.assertEqual(METATYPE_LOCAL_NAME, types_yaml.default_prefix) self.assertEqual(METATYPE_URI, types_yaml.id) self.assertEqual(METATYPE_LOCAL_NAME, types_yaml.default_prefix) self.assertEqual(METATYPE_NAMESPACE, types_yaml.prefixes[types_yaml.default_prefix].prefix_reference)
def test_as_yaml(self): """ Test the YAML output representation """ schema = self.fix_schema_metadata( load_raw_schema(os.path.join(datadir, 'schema4.yaml'))) self.assertEqual( """generation_date: 2018-12-31 17:23 id: !!python/object/new:biolinkml.utils.metamodelcore.URI - http://example.org/schema4 metamodel_version: 0.5.0 name: !!python/object/new:biolinkml.meta.SchemaDefinitionName - schema4 source_file: schema4.yaml source_file_date: 2018-12-31 17:23 source_file_size: 259 title: Load Raw Schema Test types: integer: base: int from_schema: !!python/object/new:biolinkml.utils.metamodelcore.URI - http://example.org/schema5 name: !!python/object/new:biolinkml.meta.TypeDefinitionName - integer string: base: str from_schema: !!python/object/new:biolinkml.utils.metamodelcore.URI - http://example.org/schema4 name: !!python/object/new:biolinkml.meta.TypeDefinitionName - string """, as_yaml(schema))
def test_as_yaml(self): """ Test the YAML output representation """ schema = self.fix_schema_metadata( load_raw_schema(env.input_path('schema4.yaml'))) env.eval_single_file(env.expected_path('schema4.yaml'), as_yaml(schema), filtr=lambda s: s)
def __init__(self, data: Union[str, TextIO, SchemaDefinition, dict], base_dir: Optional[str] = None, namespaces: Optional[Namespaces] = None, useuris: Optional[bool] = None, importmap: Optional[Mapping[str, str]] = None, logger: Optional[logging.Logger] = None) \ -> None: """ Constructor - load and process a YAML or pre-processed schema :param data: YAML schema text, python dict loaded from yaml, URL, file name, open file or SchemaDefinition :param base_dir: base directory or URL where Schema came from :param namespaces: namespaces collector :param useuris: True means class_uri and slot_uri are identifiers. False means they are mappings. :param importmap: A map from import entries to URI or file name. :param logger: Target Logger, if any """ self.logger = logger if logger is not None else logging.getLogger(self.__class__.__name__) if isinstance(data, SchemaDefinition): self.schema = data else: self.schema = load_raw_schema(data, base_dir=base_dir) # Map from URI to source and version tuple self.loaded: OrderedDict[str, Tuple[str, str]] = {self.schema.id: (self.schema.source_file, self.schema.version)} self.base_dir = self._get_base_dir(base_dir) self.namespaces = namespaces if namespaces else Namespaces() self.useuris = useuris if useuris is not None else True self.importmap = parse_import_map(importmap, self.base_dir) if importmap is not None else dict() self.synopsis: Optional[SchemaSynopsis] = None self.schema_location: Optional[str] = None self.schema_defaults: Dict[str, str] = {} # Map from schema URI to default namespace
def test_multi_schemas(self): """ Test multiple schemas in the same file """ def check_types(s: SchemaDefinition) -> None: output = env.expected_path('schema4.json') if not os.path.exists(output): with open(output, 'w') as f: f.write( as_json( JsonObj( **{ k: as_dict(loads(as_json(v))) for k, v in s.types.items() }))) self.fail(f"File {output} created - rerun test") with open(output) as f: expected = as_dict(load(f)) self.assertEqual( expected, {k: as_dict(loads(as_json(v))) for k, v in s.types.items()}) s.types = None self._verify_schema1_content( load_raw_schema(env.input_path('schema4.yaml')), 'schema4', check_types)
def check_yaml(source_file): model_yaml = load_raw_schema(source_file) self.assertEqual(uri, model_yaml.id) self.assertEqual(name, model_yaml.name) self.assertEqual(namespace_name, model_yaml.default_prefix) self.assertEqual( namespace, model_yaml.prefixes[ model_yaml.default_prefix].prefix_reference) self.assertEqual(source_file, model_yaml.source_file)
def test_as_json(self): schema = self.fix_schema_metadata( load_raw_schema(os.path.join(inputdir, 'schema6.yaml'))) outfile = os.path.join(outputdir, 'schema6.json') if not os.path.exists(outfile): with open(outfile, 'w') as f: f.write(as_json(schema)) self.fail(f"Generated {outfile} - run test again") else: self.assertEqual(load(outfile), loads(as_json(schema)))
def test_as_yaml(self): """ Test the YAML output representation """ schema = self.fix_schema_metadata( load_raw_schema(os.path.join(inputdir, 'schema4.yaml'))) outfile = os.path.join(outputdir, 'schema4.yaml') if not os.path.exists(outfile): with open(outfile, 'w') as f: f.write(as_yaml(schema)) self.fail(f"Output file: {outfile} created - run test again") with open(outfile) as f: self.assertEqual(f.read(), as_yaml(schema))
def test_multi_schemas(self): """ Test multiple schemas in the same file """ def check_types(s: SchemaDefinition) -> None: self.assertEqual({ 'integer': {'base': 'int', 'from_schema': 'http://example.org/schema5', 'name': 'integer'}, 'string': {'base': 'str', 'from_schema': 'http://example.org/schema4', 'name': 'string'}}, {k: as_dict(loads(as_json(v))) for k, v in s.types.items()}) s.types = None self._verify_schema1_content(load_raw_schema(os.path.join(datadir, 'schema4.yaml')), 'schema4', check_types)
def __init__(self, data: Union[str, TextIO, SchemaDefinition, dict], base_dir: Optional[str] = None, namespaces: Optional[Namespaces] = None) \ -> None: """ Constructor - load and process a YAML or pre-processed schema :param data: YAML schema text, python dict loaded from yaml, URL, file name, open file or SchemaDefinition :param base_dir: base directory or URL where Schema came from :param namespaces: namespaces collector """ if isinstance(data, SchemaDefinition): self.schema = data else: self.schema = load_raw_schema(data, base_dir=base_dir) self.loaded: Set[str] = {self.schema.name} self.base_dir = self._get_base_dir(base_dir) self.namespaces = namespaces if namespaces else Namespaces() self.synopsis: Optional[SchemaSynopsis] = None self.schema_location: Optional[str] = None
def test_as_json(self): schema = self.fix_schema_metadata( load_raw_schema(os.path.join(datadir, 'schema6.yaml'))) self.assertEqual( loads("""{ "name": "schema6", "id": "http://example.org/schema6.fuzz", "title": "Load Raw Schema Test", "types": [ { "name": "foo", "from_schema": "http://example.org/schema6.fuzz", "base": "str", "uri": "http://example.org/types/String" } ], "slots": [ { "name": "s1", "from_schema": "http://example.org/schema6.fuzz", "domain": "c1", "range": "foo" } ], "classes": [ { "name": "c1", "from_schema": "http://example.org/schema6.fuzz" } ], "metamodel_version": "0.5.0", "source_file": "schema6.yaml", "source_file_date": "2018-12-31 17:23", "source_file_size": 259, "generation_date": "2018-12-31 17:23" }"""), loads(as_json(schema)))
def test_schema_id(self): """ Test loading a schema with just an id """ self._verify_schema1_content(load_raw_schema('schema3.yaml', base_dir=datadir), 'schema3')
def resolve(self) -> SchemaDefinition: """Reconcile a loaded schema, applying is_a, mixins, apply_to's and other such things. Also validate the content and load a SchemaSynopsis entry :return: Fully resolved definition """ if not self.schema.default_range: self.schema.default_range = 'string' self.logger.info(f"Default_range not specified. Default set to '{self.schema.default_range}'") # Process the namespace declarations if not self.schema.default_prefix: self.schema.default_prefix = sfx(self.schema.id) self.schema_defaults[self.schema.id] = self.schema.default_prefix for prefix in self.schema.prefixes.values(): self.namespaces[prefix.prefix_prefix] = prefix.prefix_reference for cmap in self.schema.default_curi_maps: self.namespaces.add_prefixmap(cmap, include_defaults=False) if not self.namespaces._default: if '://' in self.schema.default_prefix: self.namespaces._default = self.schema.default_prefix elif self.schema.default_prefix in self.namespaces: self.namespaces._default = self.namespaces[self.schema.default_prefix] else: self.raise_value_error(f'Default prefix: {self.schema.default_prefix} is not defined', self.schema.default_prefix) # Process imports for imp in self.schema.imports: sname = self.importmap.get(str(imp), imp) # Import map may use CURIE sname = self.namespaces.uri_for(sname) if ':' in sname else sname sname = self.importmap.get(str(sname), sname) # It may also use URI or other forms import_schemadefinition = \ load_raw_schema(sname + '.yaml', base_dir=os.path.dirname(self.schema.source_file) if self.schema.source_file else None, merge_modules=self.merge_modules, emit_metadata=self.emit_metadata) loaded_schema = (str(sname), import_schemadefinition.version) if import_schemadefinition.id in self.loaded: # If we've already loaded this, make sure that we've got the same version if self.loaded[import_schemadefinition.id][1] != loaded_schema[1]: self.raise_value_error(f"Schema {import_schemadefinition.name} - version mismatch", import_schemadefinition.name) # Note: for debugging purposes we also check whether the version came from the same spot. This should # be loosened to version only once we're sure that everything is working # TODO: The test below needs review -- there are cases where it fails because self.loaded[...][0] has the # full path name and loaded_schema[0] is just the local name # if self.loaded[import_schemadefinition.id] != loaded_schema: # self.raise_value_error(f"Schema imported from different files: " # f"{self.loaded[import_schemadefinition.id][0]} : {loaded_schema[0]}") else: self.loaded[import_schemadefinition.id] = loaded_schema merge_schemas(self.schema, import_schemadefinition, imp, self.namespaces, merge_imports=self.merge_modules) self.schema_defaults[import_schemadefinition.id] = import_schemadefinition.default_prefix self.namespaces._base = self.schema.default_prefix if ':' in self.schema.default_prefix else \ self.namespaces[self.schema.default_prefix] # Promote embedded attribute definitions to first class slots. for cls in self.schema.classes.values(): for attribute in cls.attributes.values(): mangled_slot_name = mangled_attribute_name(cls.name, attribute.name) if mangled_slot_name in self.schema.slots: self.raise_value_error(f'Class: "{cls.name}" attribute "{attribute.name}" - ' f'mangled name: {mangled_slot_name} already exists', attribute.name) new_slot = SlotDefinition(**attribute.__dict__) new_slot.domain_of.append(cls.name) new_slot.imported_from = cls.imported_from if not new_slot.alias: new_slot.alias = attribute.name new_slot.name = mangled_slot_name self.schema.slots[new_slot.name] = new_slot cls.slots.append(mangled_slot_name) # Assign class slot ownership for cls in self.schema.classes.values(): if not isinstance(cls, ClassDefinition): name = cls['name'] if 'name' in cls else 'Unknown' self.raise_value_error(f'Class "{name} (type: {type(cls)})" definition is not a class definition', name) if isinstance(cls.slots, str): self.logger.warning(f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array") cls.slots = [cls.slots] for slotname in cls.slots: if slotname in self.schema.slots: slot = self.schema.slots[cast(SlotDefinitionName, slotname)] slot.owner = cls.name if cls.name not in slot.domain_of: slot.domain_of.append(cls.name) else: self.raise_value_error(f'Class "{cls.name}" - unknown slot: "{slotname}"', slotname) # Process slots defined as slot usages self.process_slot_usage_definitions() # Massage initial set of slots for slot in self.schema.slots.values(): # Propagate domain to containing class if slot.domain and slot.domain in self.schema.classes: if slot.name not in self.schema.classes[slot.domain].slots: slot.owner = slot.name # self.schema.classes[slot.domain].slots.append(slot.name) elif slot.domain: self.raise_value_error(f"slot: {slot.name} - unrecognized domain ({slot.domain})", slot.domain) # Validate the slot range if slot.range is not None and slot.range not in self.schema.types and \ slot.range not in self.schema.classes and slot.range not in self.schema.enums: self.raise_value_error(f"slot: {slot.name} - unrecognized range ({slot.range})", slot.range) # apply to --> mixins for cls in self.schema.classes.values(): for apply_to_cls in cls.apply_to: if apply_to_cls in self.schema.classes: self.schema.classes[apply_to_cls].mixins.append(cls.name) else: self.raise_value_error(f'Class "{cls.name}" unknown apply_to target: {apply_to_cls}', apply_to_cls) # Class URI's also count as (trivial) mappings if cls.class_uri is not None: cls.mappings.insert(0, cls.class_uri) if cls.class_uri is None or not self.useuris: cls.class_uri = \ self.namespaces.uri_or_curie_for(self.schema_defaults.get(cls.from_schema, sfx(cls.from_schema)), camelcase(cls.name)) # Get the inverse ducks all in a row before we start filling other stuff in for slot in self.schema.slots.values(): if slot.inverse: inverse_slot = self.schema.slots.get(slot.inverse, None) if inverse_slot: if not inverse_slot.inverse: inverse_slot.inverse = slot.name elif inverse_slot.inverse != slot.name: self.raise_value_error(f'Slot {slot.name}.inverse ({slot.inverse}) does not match ' f'slot {inverse_slot.name}.inverse ({inverse_slot.inverse})') else: self.raise_value_error(f'Slot {slot.name}.inverse ({slot.inverse}) is not defined') # Update slots with parental information merged_slots: List[SlotDefinitionName] = [] for slot in self.schema.slots.values(): if not slot.from_schema: slot.from_schema = self.schema.id self.merge_slot(slot, merged_slots) # Add default ranges if slot.range is None: # Inverses will be handled later on in the process if not slot.inverse: slot.range = self.schema.default_range # Update enums merged_enums: List[EnumDefinitionName] = [] for enum in self.schema.enums.values(): if not enum.from_schema: enum.from_schema = self.schema.id # TODO: Need to add "is_a" to enums # self.merge_enum(enum, merged_enums) # Process the slot_usages for cls in self.schema.classes.values(): self.process_slot_usages(cls) if not cls.from_schema: cls.from_schema = self.schema.id # Merge class with its mixins and the like merged_classes: List[ClassDefinitionName] = [] for cls in self.schema.classes.values(): self.merge_class(cls, merged_classes) # Update types with parental information merged_types: List[TypeDefinitionName] = [] for typ in self.schema.types.values(): if not typ.base and not typ.typeof: self.raise_value_error(f'type "{typ.name}" must declare a type base or parent (typeof)', typ.name) if not typ.typeof and not typ.uri: self.raise_value_error(f'type "{typ.name}" does not declare a URI', typ.name) self.merge_type(typ, merged_types) if not typ.from_schema: typ.from_schema = self.schema.id # Update the subsets as needed for ss in self.schema.subsets.values(): if not ss.from_schema: ss.from_schema = self.schema.id # Massage initial set of slots for slot in self.schema.slots.values(): # Keys and identifiers must be present if bool(slot.key or slot.identifier): if slot.required is None: slot.required = True elif not slot.required: self.raise_value_error(f"slot: {slot.name} - key and identifier slots cannot be optional", slot.name) if slot.key and slot.identifier: self.raise_value_error(f"slot: {slot.name} - A slot cannot be both a key and identifier at the same time", slot.name) # Propagate domain to containing class if slot.domain and slot.domain in self.schema.classes: if slot.name not in self.schema.classes[slot.domain].slots and not slot.owner: slot.owner = slot.name # Slot domains to not appear # self.schema.classes[slot.domain].slots.append(slot.name) elif slot.domain: self.raise_value_error(f"slot: {slot.name} - unrecognized domain ({slot.domain})", slot.domain) if slot.ifabsent: from biolinkml.utils.ifabsent_functions import isabsent_match if isabsent_match(slot.ifabsent) is None: self.raise_value_error(f"Unrecognized ifabsent action for slot '{slot.name}': '{slot.ifabsent}'", slot.ifabsent) # Keys and identifiers must be present if bool(slot.key or slot.identifier): if slot.required is None: slot.required = True elif not slot.required: self.raise_value_error(f"slot: {slot.name} - key and identifier slots cannot be optional", slot.name) # Validate the slot range if slot.range is not None and slot.range not in self.schema.types and \ slot.range not in self.schema.classes and slot.range not in self.schema.enums: self.raise_value_error(f"slot: {slot.name} - unrecognized range ({slot.range})", slot.range) # Massage classes, propagating class slots entries domain back to the target slots for cls in self.schema.classes.values(): if not isinstance(cls, ClassDefinition): name = cls['name'] if 'name' in cls else 'Unknown' self.raise_value_error(f'Class "{name} (type: {type(cls)})" definition is not a class definition') if isinstance(cls.slots, str): self.logger.warning(f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array") cls.slots = [cls.slots] for slotname in cls.slots: if slotname in self.schema.slots: slot = self.schema.slots[cast(SlotDefinitionName, slotname)] else: self.raise_value_error(f'Class "{cls.name}" - unknown slot: "{slotname}"', slotname) for slot in self.schema.slots.values(): if slot.from_schema is None: slot.from_schema = self.schema.id # Inline any class definitions that don't have identifiers. Note that keys ARE inlined if slot.range in self.schema.classes: range_class = self.schema.classes[cast(ClassDefinitionName, slot.range)] if slot.inlined_as_list or not any([self.schema.slots[s].identifier or self.schema.slots[s].key for s in range_class.slots]): slot.inlined = True if slot.slot_uri is not None: slot.mappings.insert(0, slot.slot_uri) # Assign missing predicates if slot.slot_uri is None or not self.useuris: slot.slot_uri = \ self.namespaces.uri_or_curie_for(self.schema_defaults.get(slot.from_schema, sfx(slot.from_schema)), self.slot_name_for(slot)) if slot.subproperty_of and slot.subproperty_of not in self.schema.slots: self.raise_value_error(f'Slot: "{slot.name}" - subproperty_of: "{slot.subproperty_of}" ' f'does not reference a slot definition', slot.subproperty_of) # Evaluate any slot inverses def domain_range_alignment(fwd_slot: SlotDefinition, inverse_slot: SlotDefinition) -> bool: """ Determine whether the range of fwd_slot is compatible with the domain of inverse_slot """ # TODO: Determine what to do about class and slot hierarchy if fwd_slot.range and fwd_slot.range not in self.schema.classes: raise ValueError(f"Slot '{fwd_slot.name}' range ({fwd_slot.range}) is not an class -- inverse is not possible") if fwd_slot.domain: if not inverse_slot.range: inverse_slot.range = fwd_slot.domain elif not domain_range_alignment(fwd_slot, inverse_slot): self.logger.warning(f"Slot: {slot.name} and inverse slot: {inverse_slot.name} are not compatible") return True # Get the inverse domains and ranges sorted for slot in self.schema.slots.values(): if slot.inverse: # Note that the inverse OF the inverse will be caught in this same iterator inverse_slot = self.schema.slots[slot.inverse] if not slot.range: if inverse_slot.domain: slot.range = inverse_slot.domain elif len(inverse_slot.domain_of): if len(inverse_slot.domain_of) > 1: dom_list = ', '.join(inverse_slot.domain_of) self.logger.warning(f"Slot {slot.name}.inverse ({inverse_slot.name}), " f"has multi domains ({dom_list}) Multi ranges not yet implemented") slot.range = inverse_slot.domain_of[0] else: raise ValueError(f"Unable to determine the range of slot `{slot.name}'. " f"Its inverse ({inverse_slot.name}) has no declared domain") elif not inverse_slot.domain and len(inverse_slot.domain_of) == 0: inverse_slot.domain = slot.range elif slot.range not in (inverse_slot.domain, inverse_slot.domain_of): self.logger.warning(f"Range of slot '{slot.name}' ({slot.range}) " f"does not line with the domain of its inverse ({inverse_slot.name})") # Check for duplicate class and type names def check_dups(s1: Set[ElementName], s2: Set[ElementName]) -> Tuple[List[ElementName], str]: if s1.isdisjoint(s2): return [], '' # Return an ordered list of d1/d1 tuples # For some curious reason, s1.intersection(s2) and s2.intersection(s1) BOTH yield s1 elements dups = sorted(s1.intersection(s2)) dup_locs = list() for dup in dups: dup_locs += [s1e for s1e in s1 if s1e == dup] dup_locs += [s2e for s2e in s2 if s2e == dup] return dup_locs, ', '.join(dups) classes = set(self.schema.classes.keys()) self.validate_item_names("class", classes) slots = set(self.schema.slots.keys()) self.validate_item_names("slot", slots) types = set(self.schema.types.keys()) self.validate_item_names("type", types) subsets = set(self.schema.subsets.keys()) self.validate_item_names("subset", subsets) enums = set(self.schema.enums.keys()) self.validate_item_names('enum', enums) # Check that the default range is valid default_range_needed = any(slot.range == self.schema.default_range for slot in self.schema.slots.values()) if default_range_needed and \ self.schema.default_range not in self.schema.types and \ self.schema.default_range not in self.schema.classes: raise ValueError(f'Unknown default range: "{self.schema.default_range}"') # We are currently limited to one key per class for cls in self.schema.classes.values(): class_slots = [] for sn in cls.slots: slot = self.schema.slots[sn] if slot.key or slot.identifier: class_slots.append(sn) if len(class_slots) > 1: self.raise_value_error(f'Class "{cls.name}" - multiple keys/identifiers not allowed ({", ".join(class_slots)})', class_slots[1]) # Check out all the namespaces self.check_prefixes() # Cannot have duplicate class or type keys dups, items = check_dups(types, classes) if items: self.raise_value_errors(f"Overlapping type and class names: {items}", dups) dups, items = check_dups(enums, classes) if items: self.raise_value_errors(f"Overlapping enum and class names: {items}", dups) dups, items = check_dups(types, enums) if items: self.raise_value_errors(f"Overlapping type and enum names: {items}", dups) dups, items = check_dups(slots, classes) if items: self.logger_warning(f"Overlapping slot and class names: {items}", dups) dups, items = check_dups(subsets, classes) if items: self.logger_warning(f"Overlapping subset and class names: {items}", dups) dups, items = check_dups(types, slots) if items: self.logger_warning(f"Overlapping type and slot names: {items}", dups) dups, items = check_dups(subsets, slots) if items: self.logger_warning(f"Overlapping subset and slot names: {items}", dups) dups, items = check_dups(subsets, types) if items: self.logger_warning(f"Overlapping subset and type names: {items}", dups) dups, items = check_dups(enums, slots) if items: self.logger_warning(f"Overlapping enum and slot names: {items}", dups) dups, items = check_dups(subsets, enums) if items: self.logger_warning(f"Overlapping subset and enum names: {items}", dups) # Check over the various enumeration constraints for enum in self.schema.enums.values(): if enum.code_set_version: if enum.code_set_tag: self.raise_value_errors(f'Enum: "{enum.name}" cannot have both version and tag', [enum.code_set_version, enum.code_set_tag]) if not enum.code_set: self.raise_value_error(f'Enum: "{enum.name}" needs a code set to have a version', enum.name) if enum.code_set_tag: if not enum.code_set: self.raise_value_error(f'Enum: "{enum.name}" needs a code set to have a tag', enum.name) if enum.pv_formula: if not enum.code_set: self.raise_value_error(f'Enum: "{enum.name}" needs a code set to have a formula', enum.name) if enum.permissible_values: self.raise_value_error(f'Enum: "{enum.name}" can have a formula or permissible values but not both', enum.name) for slot in self.schema.slots.values(): if slot.range and slot.range in self.schema.enums: if slot.inlined or slot.inlined_as_list: self.raise_value_error(f'Slot: "{slot.name}" enumerations cannot be inlined', slot.range) # Make the source file relative if it is locally generated self.schema_location = self.schema.source_file if self.schema.source_file and '://' not in self.schema.source_file: self.schema.source_file = os.path.basename(self.schema.source_file) # Make sure there is only one tree_root tree_root = None for cls in self.schema.classes.values(): if cls.tree_root: if tree_root is not None: self.logger.warning(f"Duplicate tree_root: {cls.name} with {tree_root}") else: tree_root = cls.name self.synopsis = SchemaSynopsis(self.schema) errs = self.synopsis.errors() if errs: print("Warning: The following errors were encountered in the schema") for errline in errs: print("\t" + errline) print() for subset, referees in self.synopsis.subsetrefs.items(): if subset not in self.schema.subsets: self.raise_value_error(f"Subset: {subset} is not defined", subset) return self.schema
def resolve(self) -> SchemaDefinition: """Reconcile a loaded schema, applying is_a, mixins, apply_to's and other such things. Also validate the content and load a SchemaSynopsis entry :return: Fully resolved definition """ if not self.schema.default_range: self.schema.default_range = 'string' print(f"Warning: default_range not specified. Default set to '{self.schema.default_range}'", file=sys.stderr) # Process the namespace declarations if not self.schema.default_prefix: self.schema.default_prefix = sfx(self.schema.id) self.schema_defaults[self.schema.id] = self.schema.default_prefix for prefix in self.schema.prefixes.values(): self.namespaces[prefix.prefix_prefix] = prefix.prefix_reference for cmap in self.schema.default_curi_maps: self.namespaces.add_prefixmap(cmap, include_defaults=False) if not self.namespaces._default: if '://' in self.schema.default_prefix: self.namespaces._default = self.schema.default_prefix elif self.schema.default_prefix in self.namespaces: self.namespaces._default = self.namespaces[self.schema.default_prefix] else: self.raise_value_error(f'Default prefix: {self.schema.default_prefix} is not defined', self.schema.default_prefix) # Process imports for imp in self.schema.imports: sname = self.importmap.get(str(imp), imp) # Import map may use CURIE sname = self.namespaces.uri_for(sname) if ':' in sname else sname sname = self.importmap.get(str(sname), sname) # It may also use URI or other forms import_schemadefinition = \ load_raw_schema(sname + '.yaml', base_dir=os.path.dirname(self.schema.source_file) if self.schema.source_file else None) loaded_schema = (str(sname), import_schemadefinition.version) if import_schemadefinition.id in self.loaded: # If we've already loaded this, make sure that we've got the same version if self.loaded[import_schemadefinition.id][1] != loaded_schema[1]: self.raise_value_error(f"Schema {import_schemadefinition.name} - version mismatch", import_schemadefinition.name) # Note: for debugging purposes we also check whether the version came from the same spot. This should # be loosened to version only once we're sure that everything is working if self.loaded[import_schemadefinition.id] != loaded_schema: self.raise_value_error(f"Schema imported from different files: " f"{self.loaded[import_schemadefinition.id][0]} : {loaded_schema[0]}") else: self.loaded[import_schemadefinition.id] = loaded_schema merge_schemas(self.schema, import_schemadefinition, imp, self.namespaces) self.schema_defaults[import_schemadefinition.id] = import_schemadefinition.default_prefix self.namespaces._base = self.schema.default_prefix if ':' in self.schema.default_prefix else \ self.namespaces[self.schema.default_prefix] # Assign class slot ownership for cls in self.schema.classes.values(): if not isinstance(cls, ClassDefinition): name = cls['name'] if 'name' in cls else 'Unknown' self.raise_value_error(f'Class "{name} (type: {type(cls)})" definition is not a class definition', name) if isinstance(cls.slots, str): self.logger.warning(f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array") cls.slots = [cls.slots] for slotname in cls.slots: if slotname in self.schema.slots: slot = self.schema.slots[cast(SlotDefinitionName, slotname)] slot.owner = cls.name if cls.name not in slot.domain_of: slot.domain_of.append(cls.name) else: self.raise_value_error(f'Class "{cls.name}" - unknown slot: "{slotname}"', slotname) # Massage initial set of slots for slot in self.schema.slots.values(): # Propagate domain to containing class if slot.domain and slot.domain in self.schema.classes: if slot.name not in self.schema.classes[slot.domain].slots: slot.owner = slot.name # self.schema.classes[slot.domain].slots.append(slot.name) elif slot.domain: self.raise_value_error(f"slot: {slot.name} - unrecognized domain ({slot.domain})", slot.domain) # Validate the slot range if slot.range is not None and slot.range not in self.schema.types and \ slot.range not in self.schema.classes: self.raise_value_error(f"slot: {slot.name} - unrecognized range ({slot.range})", slot.range) # apply to --> mixins for cls in self.schema.classes.values(): for apply_to_cls in cls.apply_to: if apply_to_cls in self.schema.classes: self.schema.classes[apply_to_cls].mixins.append(cls.name) else: self.raise_value_error(f'Class "{cls.name}" unknown apply_to target: {apply_to_cls}', apply_to_cls) # Class URI's also count as (trivial) mappings if cls.class_uri is not None: cls.mappings.insert(0, cls.class_uri) if cls.class_uri is None or not self.useuris: cls.class_uri = \ self.namespaces.uri_or_curie_for(self.schema_defaults.get(cls.from_schema, sfx(cls.from_schema)), camelcase(cls.name)) # Get the inverse ducks all in a row before we start filling other stuff in for slot in self.schema.slots.values(): if slot.inverse: inverse_slot = self.schema.slots.get(slot.inverse, None) if inverse_slot: if not inverse_slot.inverse: inverse_slot.inverse = slot.name elif inverse_slot.inverse != slot.name: self.raise_value_error(f'Slot {slot.name}.inverse ({slot.inverse}) does not match ' f'slot {inverse_slot.name}.inverse ({inverse_slot.inverse})') else: self.raise_value_error(f'Slot {slot.name}.inverse ({slot.inverse}) is not defined') # Update slots with parental information merged_slots: List[SlotDefinitionName] = [] for slot in self.schema.slots.values(): if not slot.from_schema: slot.from_schema = self.schema.id self.merge_slot(slot, merged_slots) # Add default ranges if slot.range is None: # Inverses will be handled later on in the process if not slot.inverse: slot.range = self.schema.default_range # Update classes with is_a and mixin information merged_classes: List[ClassDefinitionName] = [] for cls in self.schema.classes.values(): if not cls.from_schema: cls.from_schema = self.schema.id self.merge_class(cls, merged_classes) # Update types with parental information merged_types: List[TypeDefinitionName] = [] for typ in self.schema.types.values(): if not typ.base and not typ.typeof: self.raise_value_error(f'type "{typ.name}" must declare a type base or parent (typeof)', typ.name) if not typ.typeof and not typ.uri: self.raise_value_error(f'type "{typ.name}" does not declare a URI', typ.name) self.merge_type(typ, merged_types) if not typ.from_schema: typ.from_schema = self.schema.id # Update the subsets as needed for ss in self.schema.subsets.values(): if not ss.from_schema: ss.from_schema = self.schema.id # Massage initial set of slots for slot in self.schema.slots.values(): # Keys and identifiers must be present if bool(slot.key or slot.identifier): if slot.required is None: slot.required = True elif not slot.required: self.raise_value_error(f"slot: {slot.name} - key and identifier slots cannot be optional", slot.name) if slot.key and slot.identifier: self.raise_value_error(f"slot: {slot.name} - A slot cannot be both a key and identifier at the same time", slot.name) # Propagate domain to containing class if slot.domain and slot.domain in self.schema.classes: if slot.name not in self.schema.classes[slot.domain].slots and not slot.owner: slot.owner = slot.name # Slot domains to not appear # self.schema.classes[slot.domain].slots.append(slot.name) elif slot.domain: self.raise_value_error(f"slot: {slot.name} - unrecognized domain ({slot.domain})", slot.domain) if slot.ifabsent: from biolinkml.utils.ifabsent_functions import isabsent_match if isabsent_match(slot.ifabsent) is None: self.raise_value_error(f"Unrecognized ifabsent action for slot '{slot.name}': '{slot.ifabsent}'", slot.ifabsent) # Keys and identifiers must be present if bool(slot.key or slot.identifier): if slot.required is None: slot.required = True elif not slot.required: self.raise_value_error(f"slot: {slot.name} - key and identifier slots cannot be optional", slot.name) # Validate the slot range if slot.range is not None and slot.range not in self.schema.types and \ slot.range not in self.schema.classes: self.raise_value_error(f"slot: {slot.name} - unrecognized range ({slot.range})", slot.range) # Massage classes, propagating class slots entries domain back to the target slots for cls in self.schema.classes.values(): if not isinstance(cls, ClassDefinition): name = cls['name'] if 'name' in cls else 'Unknown' self.raise_value_error(f'Class "{name} (type: {type(cls)})" definition is not a class definition') if isinstance(cls.slots, str): self.logger.warning(f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array") cls.slots = [cls.slots] for slotname in cls.slots: if slotname in self.schema.slots: slot = self.schema.slots[cast(SlotDefinitionName, slotname)] else: self.raise_value_error(f'Class "{cls.name}" - unknown slot: "{slotname}"', slotname) for slot in self.schema.slots.values(): if slot.from_schema is None: slot.from_schema = self.schema.id # Inline any class definitions that don't have identifiers. Note that keys ARE inlined if slot.range in self.schema.classes: range_class = self.schema.classes[cast(ClassDefinitionName, slot.range)] if not any([self.schema.slots[s].identifier for s in range_class.slots]): slot.inlined = True if slot.slot_uri is not None: slot.mappings.insert(0, slot.slot_uri) # Assign missing predicates if slot.slot_uri is None or not self.useuris: slot.slot_uri = \ self.namespaces.uri_or_curie_for(self.schema_defaults.get(slot.from_schema, sfx(slot.from_schema)), self.slot_name_for(slot)) # Evaluate any slot inverses def domain_range_alignment(fwd_slot: SlotDefinition, inverse_slot: SlotDefinition) -> bool: """ Determine whether the range of fwd_slot is compatible with the domain of inverse_slot """ # TODO: Determine what to do about class and slot hierarchy if fwd_slot.range and fwd_slot.range not in self.schema.classes: raise ValueError(f"Slot '{fwd_slot.name}' range ({fwd_slot.range}) is not an class -- inverse is not possible") if fwd_slot.domain: if not inverse_slot.range: inverse_slot.range = fwd_slot.domain elif not domain_range_alignment(fwd_slot, inverse_slot): self.logger.warning(f"Slot: {slot.name} and inverse slot: {inverse_slot.name} are not compatible") return True # Get the inverse domains and ranges sorted for slot in self.schema.slots.values(): if slot.inverse: # Note that the inverse OF the inverse will be caught in this same iterator inverse_slot = self.schema.slots[slot.inverse] if not slot.range: if inverse_slot.domain: slot.range = inverse_slot.domain elif len(inverse_slot.domain_of): if len(inverse_slot.domain_of) > 1: dom_list = ', '.join(inverse_slot.domain_of) self.logger.warning(f"Slot {slot.name}.inverse ({inverse_slot.name}), " f"has multi domains ({dom_list}) Multi ranges not yet implemented") slot.range = inverse_slot.domain_of[0] else: raise ValueError(f"Unable to determine the range of slot `{slot.name}'. " f"Its inverse ({inverse_slot.name}) has no declared domain") elif not inverse_slot.domain and len(inverse_slot.domain_of) == 0: inverse_slot.domain = slot.range elif slot.range not in (inverse_slot.domain, inverse_slot.domain_of): self.logger.warning(f"Range of slot '{slot.name}' ({slot.range}) " f"does not line with the domain of its inverse ({inverse_slot.name})") # Check for duplicate class and type names def check_dups(s1: Set[ElementName], s2: Set[ElementName]) -> str: return ', '.join(sorted(s1.intersection(s2))) classes = set(self.schema.classes.keys()) self.validate_item_names("class", classes) slots = set(self.schema.slots.keys()) self.validate_item_names("slot", slots) types = set(self.schema.types.keys()) self.validate_item_names("type", types) subsets = set(self.schema.subsets.keys()) self.validate_item_names("subset", subsets) # Check that the default range is valid if not self.schema.default_range: raise ValueError("Default range is not specified") if self.schema.default_range not in self.schema.types and self.schema.default_range not in self.schema.classes: raise ValueError(f'Unknown default range: "{self.schema.default_range}"') # We are currently limited to one key per class for cls in self.schema.classes.values(): class_slots = [] for sn in cls.slots: slot = self.schema.slots[sn] if slot.key: class_slots.append(slot.name) if len(class_slots) > 1: self.raise_value_error(f'Class "{cls.name}" - multiple keys not allowed ({", ".join(class_slots)})', class_slots[1]) # Check out all the namespaces self.check_prefixes() # Cannot have duplicate class or type keys dups = check_dups(classes, types) if dups: raise ValueError(f"Shared class and type names detected: {dups}") dups = check_dups(classes, slots) if dups: self.logger.warning(f"Shared class and slot names: {dups}") dups = check_dups(classes, subsets) if dups: self.logger.warning(f"Shared class and subset names: {dups}") dups = check_dups(slots, types) if dups: self.logger.warning(f"Shared type and slot names: {dups}") dups = check_dups(slots, subsets) if dups: self.logger.warning(f"Shared slot and subset names: {dups}") dups = check_dups(types, subsets) if dups: self.logger.warning(f"Shared type and subset names: {dups}") # Make the source file relative if it is locally generated self.schema_location = self.schema.source_file if self.schema.source_file and '://' not in self.schema.source_file: self.schema.source_file = os.path.basename(self.schema.source_file) # Make sure there is only one tree_root tree_root = None for cls in self.schema.classes.values(): if cls.tree_root: if tree_root is not None: self.logger.warning(f"Duplicate tree_root: {cls.name} with {tree_root}") else: tree_root = cls.name self.synopsis = SchemaSynopsis(self.schema) errs = self.synopsis.errors() if errs: print("Warning: The following errors were encountered in the schema") for errline in errs: print("\t" + errline) print() for subset, referees in self.synopsis.subsetrefs.items(): if subset not in self.schema.subsets: self.raise_value_error(f"Subset: {subset} is not defined", subset) return self.schema
def test_as_json(self): schema = self.fix_schema_metadata( load_raw_schema(env.input_path('schema6.yaml'))) env.eval_single_file(env.expected_path('schema6.json'), as_json(schema), filtr=lambda s: s)
def test_base_dir(self): """ Test the base directory option """ self._verify_schema1_content(load_raw_schema('schema1.yaml', base_dir=datadir), 'schema1')
def test_explicit_name(self): """ Test the named schema option """ self._verify_schema1_content(load_raw_schema(os.path.join(datadir, 'schema2.yaml')), 'schema2')
def test_name_from_sourcefile(self): """ Test no identifier at all """ with self.assertRaises(ValueError): load_raw_schema(env.input_path('schema5.yaml'))
def test_model_access(self): """ Make sure that the law loader can dereference a URL and that the data matches """ online_meta_yaml = load_raw_schema(METAMODEL_URI) self.validate_yaml_content(online_meta_yaml, True)
def resolve(self) -> SchemaDefinition: """Reconcile a loaded schema, applying is_a, mixins, apply_to's and other such things. Also validate the content and load a SchemaSynopsis entry :return: Fully resolved definition """ if not self.schema.default_range: self.schema.default_range = 'string' print( f"Warning: default_range not specified. Default set to '{self.schema.default_range}'", file=sys.stderr) # Process the namespace declarations if not self.schema.default_prefix: self.schema.default_prefix = Namespaces.sfx(self.schema.id) for prefix in self.schema.prefixes.values(): self.namespaces[prefix.prefix_prefix] = prefix.prefix_reference for cmap in self.schema.default_curi_maps: self.namespaces.add_prefixmap(cmap, include_defaults=False) if not self.namespaces._default: if '://' in self.schema.default_prefix: self.namespaces._default = self.schema.default_prefix elif self.schema.default_prefix in self.namespaces: self.namespaces._default = self.namespaces[ self.schema.default_prefix] else: raise ValueError( f'Default prefix: {self.schema.default_prefix} is not defined' ) # Process imports for sname in self.schema.imports: sloc = self.namespaces.uri_for(sname) if ':' in sname else sname if sloc not in self.loaded: self.loaded.add(sloc) merge_schemas( self.schema, load_raw_schema(sloc + '.yaml', base_dir=self.base_dir), sloc, self.namespaces) self.namespaces._base = self.schema.default_prefix if ':' in self.schema.default_prefix else \ self.namespaces[self.schema.default_prefix] # Massage initial set of slots for slot in self.schema.slots.values(): # Propagate domain to containing class if slot.domain and slot.domain in self.schema.classes: if slot.name not in self.schema.classes[slot.domain].slots: self.schema.classes[slot.domain].slots.append(slot.name) elif slot.domain: self.raise_value_error( f"slot: {slot.name} - unrecognized domain ({slot.domain})") # Keys and identifiers must be present if bool(slot.key or slot.identifier): if slot.required is None: slot.required = True elif not slot.required: self.raise_value_error( f"slot: {slot.name} - key and identifier slots cannot be optional" ) # Validate the slot range if slot.range is not None and slot.range not in self.schema.types and \ slot.range not in self.schema.classes: self.raise_value_error( f"slot: {slot.name} - unrecognized range ({slot.range})") # Massage classes, propagating class slots entries domain back to the target slots for cls in self.schema.classes.values(): if not isinstance(cls, ClassDefinition): name = cls['name'] if 'name' in cls else 'Unknown' self.raise_value_error( f'Class "{name} (type: {type(cls)})" definition is not a class definition' ) if isinstance(cls.slots, str): print( f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array", file=sys.stderr) cls.slots = [cls.slots] for slotname in cls.slots: if slotname in self.schema.slots: slot = self.schema.slots[cast(SlotDefinitionName, slotname)] if slot.domain is None: slot.domain = cls.name elif slot.domain != cls.name: self.raise_value_error( f'Slot: {slot.name} domain ({slot.domain}) ' f'does not match declaring class "({cls.name})"') else: self.raise_value_error( f'Class "{cls.name}" - unknown slot: "{slotname}"') # apply to --> mixins for cls in self.schema.classes.values(): for apply_to_cls in cls.apply_to: if apply_to_cls in self.schema.classes: self.schema.classes[apply_to_cls].mixins.append(cls.name) else: self.raise_value_error( f'Class "{cls.name}" unknown apply_to target: {apply_to_cls}' ) if cls.class_uri is None: cls.class_uri = self.namespaces.uri_or_curie_for( self.schema.default_prefix, camelcase(cls.name)) # Update slots with parental information merged_slots: List[SlotDefinitionName] = [] for slot in self.schema.slots.values(): if not slot.from_schema: slot.from_schema = self.schema.id self.merge_slot(slot, merged_slots) # Add default ranges if slot.range is None: slot.range = self.schema.default_range # Update classes with is_a and mixin information merged_classes: List[ClassDefinitionName] = [] for cls in self.schema.classes.values(): if not cls.from_schema: cls.from_schema = self.schema.id self.merge_class(cls, merged_classes) # Update types with parental information merged_types: List[TypeDefinitionName] = [] for typ in self.schema.types.values(): if not typ.base and not typ.typeof: self.raise_value_error( f'type "{typ.name}" must declare a type base or parent (typeof)' ) if not typ.typeof and not typ.uri: self.raise_value_error( f'type "{typ.name}" does not declare a URI') self.merge_type(typ, merged_types) if not typ.from_schema: typ.from_schema = self.schema.id # Update the subsets as needed for ss in self.schema.subsets.values(): if not ss.from_schema: ss.from_schema = self.schema.id # Massage initial set of slots for slot in self.schema.slots.values(): # Propagate domain to containing class if slot.domain and slot.domain in self.schema.classes: if slot.name not in self.schema.classes[slot.domain].slots: self.schema.classes[slot.domain].slots.append(slot.name) elif slot.domain: self.raise_value_error( f"slot: {slot.name} - unrecognized domain ({slot.domain})") # Keys and identifiers must be present if bool(slot.key or slot.identifier): if slot.required is None: slot.required = True elif not slot.required: self.raise_value_error( f"slot: {slot.name} - key and identifier slots cannot be optional" ) # Validate the slot range if slot.range is not None and slot.range not in self.schema.types and \ slot.range not in self.schema.classes: self.raise_value_error( f"slot: {slot.name} - unrecognized range ({slot.range})") # Massage classes, propagating class slots entries domain back to the target slots for cls in self.schema.classes.values(): if not isinstance(cls, ClassDefinition): name = cls['name'] if 'name' in cls else 'Unknown' self.raise_value_error( f'Class "{name} (type: {type(cls)})" definition is not a class definition' ) if isinstance(cls.slots, str): print( f"File: {self.schema.source_file} Class: {cls.name} Slots are not an array", file=sys.stderr) cls.slots = [cls.slots] for slotname in cls.slots: if slotname in self.schema.slots: slot = self.schema.slots[cast(SlotDefinitionName, slotname)] if slot.domain is None: slot.domain = cls.name # TODO: fix this check # elif slot.domain != cls.name: # self.raise_value_error(f'Slot: {slot.name} domain ({slot.domain}) ' # f'does not match declaring class "({cls.name})"') else: self.raise_value_error( f'Class "{cls.name}" - unknown slot: "{slotname}"') for slot in self.schema.slots.values(): # Inline any class definitions that don't have identifiers. Note that keys ARE inlined if slot.range in self.schema.classes: range_class = self.schema.classes[cast(ClassDefinitionName, slot.range)] if not any([ self.schema.slots[s].identifier for s in range_class.slots ]): slot.inlined = True # Assign missing predicates if slot.slot_uri is None: slot.slot_uri = self.namespaces.uri_or_curie_for( self.schema.default_prefix, self.slot_name_for(slot)) # Check for duplicate class and type names def check_dups(s1: Set[ElementName], s2: Set[ElementName]) -> str: return ', '.join(sorted(s1.intersection(s2))) classes = set(self.schema.classes.keys()) slots = set(self.schema.slots.keys()) types = set(self.schema.types.keys()) subsets = set(self.schema.subsets.keys()) # Check that the default range is valid if not self.schema.default_range: raise ValueError("Default range is not specified") if self.schema.default_range not in self.schema.types and self.schema.default_range not in self.schema.classes: raise ValueError( f'Unknown default range: "{self.schema.default_range}"') # We are currently limited to one key per class for cls in self.schema.classes.values(): class_slots = [] for sn in cls.slots: slot = self.schema.slots[sn] if slot.key: class_slots.append(slot.name) if len(class_slots) > 1: self.raise_value_error( f'Class "{cls.name}" - multiple keys not allowed ({", ".join(class_slots)})' ) # Check out all the namespaces self.check_prefixes() # Cannot have duplicate class or type keys dups = check_dups(classes, types) if dups: raise ValueError(f"Shared class and type names detected: {dups}") dups = check_dups(classes, slots) if dups: print(f"Warning: Shared class and slot names: {dups}", file=sys.stderr) dups = check_dups(classes, subsets) if dups: print(f"Warning: Shared class and subset names: {dups}", file=sys.stderr) dups = check_dups(slots, types) if dups: print(f"Warning: Shared type and slot names: {dups}", file=sys.stderr) dups = check_dups(slots, subsets) if dups: print(f"Warning: Shared slot and subset names: {dups}", file=sys.stderr) dups = check_dups(types, subsets) if dups: print(f"Warning: Shared type and subset names: {dups}", file=sys.stderr) # Make the source file relative if it is locally generated self.schema_location = self.schema.source_file if self.schema.source_file and '://' not in self.schema.source_file: self.schema.source_file = os.path.basename(self.schema.source_file) self.synopsis = SchemaSynopsis(self.schema) for subset, referees in self.synopsis.subsetrefs.items(): if subset not in self.schema.subsets: self.raise_value_error(f"Subset: {subset} is not defined") return self.schema
def test_name_from_sourcefile(self): """ Test no identifier at all """ with self.assertRaises(ValueError): load_raw_schema(os.path.join(datadir, 'schema5.yaml'))
def test_load_text(self): """ Test loading straight text """ with open(os.path.join(datadir, 'schema1.yaml')) as f: self._verify_schema1_content(load_raw_schema(f.read(), 'schema1.yaml', "Mon Dec 31 11:25:38 2018", 76), 'schema1')
def test_explicit_name(self): """ Test the named schema option """ self._verify_schema1_content( load_raw_schema(env.input_path('schema2.yaml')), 'schema2')