def fix_schema_metadata(self, schema: SchemaDefinition) -> SchemaDefinition: self.assertIsNotNone(schema.generation_date) schema.source_file = os.path.basename(schema.source_file) schema.generation_date = "2018-12-31 17:23" self.assertIsNotNone(schema.metamodel_version) schema.metamodel_version = "0.5.0" self.assertIsNotNone(schema.source_file_size) schema.source_file_size = 259 self.assertIsNotNone(schema.source_file_date) schema.source_file_date = "2018-12-31 17:23" return schema
def _verify_schema1_content(self, schema: SchemaDefinition, source_file, addl_checks: Callable[[SchemaDefinition], None]=None) -> None: expected = loads(f"""{{ "name": "{source_file}", "id": "http://example.org/{source_file}", "title": "Load Raw Schema Test", "metamodel_version": "0.5.0", "source_file": "{source_file}.yaml", "source_file_date": "Mon Dec 31 11:25:38 2018", "source_file_size": 76, "generation_date": "2018-12-31 11:50" }}""") schema.source_file = os.path.basename(schema.source_file) if addl_checks: addl_checks(schema) self.assertTrue(isinstance(schema.metamodel_version, str)) expected.metamodel_version = schema.metamodel_version self.assertTrue(isinstance(schema.source_file_date, str)) expected.source_file_date = schema.source_file_date self.assertTrue(isinstance(schema.source_file_size, int)) expected.source_file_size = schema.source_file_size self.assertTrue(isinstance(schema.generation_date, str)) expected.generation_date = schema.generation_date self.assertEqual(expected, loads(as_json(schema)))
def merge_schemas(target: SchemaDefinition, mergee: SchemaDefinition, imported_from: Optional[str] = None, namespaces: Optional[Namespaces] = None) -> None: """ Merge mergee into target """ assert target.name is not None, "Schema name must be supplied" if target.license is None: target.license = mergee.license target.imports += [ imp for imp in mergee.imports if imp not in target.imports ] set_from_schema(mergee) if namespaces: merge_namespaces(target, mergee, namespaces) if imported_from is None: imported_from_uri = None else: if imported_from.startswith("http") or ":" not in imported_from: imported_from_uri = imported_from else: imported_from_uri = namespaces.uri_for(imported_from) merge_dicts(target.classes, mergee.classes, imported_from, imported_from_uri) merge_dicts(target.slots, mergee.slots, imported_from, imported_from_uri) merge_dicts(target.types, mergee.types, imported_from, imported_from_uri) merge_dicts(target.subsets, mergee.subsets, imported_from, imported_from_uri)
def check_types(s: SchemaDefinition) -> None: self.assertEqual({ 'integer': {'base': 'int', 'from_schema': 'http://example.org/schema5', 'name': 'integer'}, 'string': {'base': 'str', 'from_schema': 'http://example.org/schema4', 'name': 'string'}}, {k: as_dict(loads(as_json(v))) for k, v in s.types.items()}) s.types = None
def check_types(s: SchemaDefinition) -> None: output = os.path.join(outputdir, 'schema4.json') if not os.path.exists(output): with open(output, 'w') as f: f.write(as_json(JsonObj(**{k: as_dict(loads(as_json(v))) for k, v in s.types.items()}))) self.fail(f"File {output} created - rerun test") with open(output) as f: expected = as_dict(load(f)) self.assertEqual(expected, {k: as_dict(loads(as_json(v))) for k, v in s.types.items()}) s.types = None
def merge_schemas(target: SchemaDefinition, mergee: SchemaDefinition, imported_from: Optional[str] = None, namespaces: Optional[Namespaces] = None) -> None: """ Merge mergee into target """ assert target.name is not None, "Schema name must be supplied" if target.license is None: target.license = mergee.license target.imports += [ imp for imp in mergee.imports if imp not in target.imports ] set_from_schema(mergee) if namespaces: merge_namespaces(target, mergee, namespaces) merge_dicts(target.classes, mergee.classes, imported_from) merge_dicts(target.slots, mergee.slots, imported_from) merge_dicts(target.types, mergee.types, imported_from)
def load_raw_schema(data: Union[str, dict, TextIO], source_file: Optional[str] = None, source_file_date: Optional[str] = None, source_file_size: Optional[int] = None, base_dir: Optional[str] = None, merge_modules: Optional[bool] = True, emit_metadata: Optional[bool] = True) -> SchemaDefinition: """ Load and flatten SchemaDefinition from a file name, a URL or a block of text @param data: URL, file name or block of text YAML Object or open file handle @param source_file: Source file name for the schema if data is type TextIO @param source_file_date: timestamp of source file if data is type TextIO @param source_file_size: size of source file if data is type TextIO @param base_dir: Working directory or base URL of sources @param merge_modules: True means combine modules into one source, false means keep separate @param emit_metadata: True means add source file info to the output @return: Un-processed Schema Definition object """ def _name_from_url(url) -> str: return urlparse(url).path.rsplit('/', 1)[-1].rsplit('.', 1)[0] if isinstance(data, str): # If passing the actual YAML if '\n' in data: return load_raw_schema(StringIO(data), source_file=source_file, base_dir=base_dir, source_file_date=source_file_date, source_file_size=source_file_size, emit_metadata=emit_metadata) # Passing a URL or file name assert source_file is None, "source_file parameter not allowed if data is a file or URL" assert source_file_date is None, "source_file_date parameter not allowed if data is a file or URL" assert source_file_size is None, "source_file_size parameter not allowed if data is a file or URL" if '://' in data or (base_dir and '://' in base_dir): # URL fname = Namespaces.join(base_dir, data) if '://' not in data else data req = Request(fname) req.add_header("Accept", "text/yaml, application/yaml;q=0.9") try: response = urlopen(req) except HTTPError as e: # This is here because the message out of urllib doesn't include the file name e.msg = f"{e.filename}" raise e with response: return load_raw_schema(response, fname, response.info()['Last-Modified'], response.info()['Content-Length'], emit_metadata=emit_metadata) else: # File name if not base_dir: fname = os.path.abspath(data) base_dir = os.path.dirname(fname) else: fname = data if os.path.isabs(data) else os.path.abspath( os.path.join(base_dir, data)) with open(fname) as f: return load_raw_schema(f, fname, time.ctime(os.path.getmtime(fname)), os.path.getsize(fname), base_dir, emit_metadata=emit_metadata) else: # Loaded YAML or file handle that references YAML schemadefs = copy.deepcopy(data) if isinstance( data, dict) else yaml.load(data, DupCheckYamlLoader) if schemadefs is None: raise ValueError("Empty schema - cannot process") elif not isinstance(schemadefs, dict): raise ValueError("Unrecognized schema content - cannot process") # Convert the schema into a "name: definition" form if not all(isinstance(e, dict) for e in schemadefs.values()): if 'name' in schemadefs: schemaname = schemadefs.pop('name') elif 'id' in schemadefs: schemaname = _name_from_url(schemadefs['id']) else: raise ValueError("Unable to determine schema name") schema_body = [schemadefs] schemadefs = {schemaname: schemadefs} else: schema_body = list(schemadefs.values()) def check_is_dict(element: str) -> None: """ Verify that element is an instance of a dictionary, mapping empty elements to dictionaries """ for body_schemaname, body_body in schemadefs.items(): if element in body_body: if body_body[element] is None: body_body[element] = dict() elif not isinstance(body_body[element], dict): raise ValueError( f'Schema: {body_schemaname} - Element: {element} must be a dictionary' ) def fix_multiples(container: str, element: str) -> None: """ A common error is representing a list object as a singleton. This fixes this problem :param container: name of container to fix (e.g. a specific clas instance) :param element: name or list element to adjust (e.g. notes" """ # Note: multiple bodies in the schema are an at-risk feature. Doesn't seem to have a real use case. for body_body in schema_body: if container in body_body: for c in body_body[container].values(): if c and element in c and isinstance(c[element], str): c[element] = [c[element]] for e in ['slots', 'classes', 'types', 'subsets']: """ Validate the basic categories, fixing multiples where appropriate """ check_is_dict(e) fix_multiples(e, 'in_subset') fix_multiples(e, 'apply_to') for e in ['imports']: for body in schema_body: if e in body: if isinstance(body[e], str): body[e] = [body[e]] # Add the implicit domain to the slot usages for body in schema_body: for cname, cls in body.get('classes', {}).items(): if cls is None: cls = {} body['classes'][cname] = cls elif not isinstance(cls, dict): raise ValueError( f"{TypedNode.yaml_loc(cname)}: class definition is not a structure" ) for uname, usage in cls.get('slot usage', {}).items(): if usage is None: usage = {} cls['slot usage'][uname] = usage if 'domain' not in usage: usage['domain'] = cname schema: Optional[SchemaDefinition] = None for sname, sdef in { k: SchemaDefinition(name=k, **v) for k, v in schemadefs.items() }.items(): if schema is None: schema = sdef if source_file: schema.source_file = source_file if emit_metadata: schema.source_file_date = source_file_date schema.source_file_size = source_file_size schema.generation_date = datetime.now().strftime( "%Y-%m-%d %H:%M") schema.metamodel_version = metamodel_version set_from_schema(schema) else: merge_schemas(schema, sdef, merge_imports=merge_modules) return schema
def load_raw_schema(data: Union[str, dict, TextIO], source_file: Optional[str] = None, source_file_date: Optional[str] = None, source_file_size: Optional[int] = None, base_dir: Optional[str] = None) -> SchemaDefinition: """ Load and flatten SchemaDefinition from a file name, a URL or a block of text @param data: URL, file name or block of text @param source_file: Source file name for the schema if data is type TextIO @param source_file_date: timestamp of source file if data is type TextIO @param source_file_size: size of source file if data is type TextIO @param base_dir: Working directory or base URL of sources @return: Map from schema name to SchemaDefinition """ def _name_from_url(url) -> str: return urlparse(url).path.rsplit('/', 1)[-1].rsplit('.', 1)[0] if isinstance(data, str): if '\n' in data: # Actual data file being passed return load_raw_schema(StringIO(data), source_file, source_file_date, source_file_size, base_dir) assert source_file is None, "source_file parameter not allowed if data is a file or URL" assert source_file_date is None, "source_file_date parameter not allowed if data is a file or URL" assert source_file_size is None, "source_file_size parameter not allowed if data is a file or URL" if '://' in data or (base_dir and '://' in base_dir): # URL being passed fname = Namespaces.join(base_dir, data) if '://' not in data else data req = Request(fname) req.add_header("Accept", "text/yaml, application/yaml;q=0.9") with urlopen(req) as response: return load_raw_schema(response, fname, response.info()['Last-Modified'], response.info()['Content-Length']) else: # File name being passed if not base_dir: fname = os.path.abspath(data) base_dir = os.path.dirname(fname) else: fname = data if os.path.isabs(data) else os.path.abspath(os.path.join(base_dir, data)) with open(fname) as f: return load_raw_schema(f, fname, time.ctime(os.path.getmtime(fname)), os.path.getsize(fname), base_dir) else: schemadefs = copy.deepcopy(data) if isinstance(data, dict) else yaml.load(data, DupCheckYamlLoader) # Convert the schema into a "name: definition" form if not all(isinstance(e, dict) for e in schemadefs.values()): if 'name' in schemadefs: schemaname = schemadefs.pop('name') elif 'id' in schemadefs: schemaname = _name_from_url(schemadefs['id']) else: raise ValueError("Unable to determine schema name") schema_body = [schemadefs] schemadefs = {schemaname: schemadefs} else: schema_body = list(schemadefs.values()) def check_is_dict(element: str) -> None: for schemaname, body in schemadefs.items(): if element in body and not isinstance(body[element], dict): raise ValueError(f'Schema: {schemaname} - {element} must be a dictionary') def fix_multiples(container: str, element: str) -> None: """ Convert strings to lists in common elements that have both single and multiple options """ for body in schema_body: if container in body: for c in body[container].values(): if c and element in c and isinstance(c[element], str): c[element] = [c[element]] for e in ['slots', 'classes', 'types', 'subsets']: check_is_dict(e) fix_multiples(e, 'in_subset') fix_multiples(e, 'apply_to') for e in ['imports']: for body in schema_body: if e in body: if isinstance(body[e], str): body[e] = [body[e]] # Add the implicit domain to the slot usages for body in schema_body: for cname, cls in body.get('classes', {}).items(): if cls is None: cls = {} body['classes'][cname] = cls for uname, usage in cls.get('slot usage', {}).items(): if usage is None: usage = {} cls['slot usage'][uname] = usage if 'domain' not in usage: usage['domain'] = cname schema: SchemaDefinition = None for sname, sdef in {k: SchemaDefinition(name=k, **v) for k, v in schemadefs.items()}.items(): if schema is None: schema = sdef if source_file: schema.source_file = source_file schema.source_file_date = source_file_date schema.source_file_size = source_file_size schema.generation_date = datetime.now().strftime("%Y-%m-%d %H:%M") schema.metamodel_version = metamodel_version set_from_schema(schema) else: merge_schemas(schema, sdef) return schema