예제 #1
0
 def fix_schema_metadata(self,
                         schema: SchemaDefinition) -> SchemaDefinition:
     self.assertIsNotNone(schema.generation_date)
     schema.source_file = os.path.basename(schema.source_file)
     schema.generation_date = "2018-12-31 17:23"
     self.assertIsNotNone(schema.metamodel_version)
     schema.metamodel_version = "0.5.0"
     self.assertIsNotNone(schema.source_file_size)
     schema.source_file_size = 259
     self.assertIsNotNone(schema.source_file_date)
     schema.source_file_date = "2018-12-31 17:23"
     return schema
예제 #2
0
    def _verify_schema1_content(self, schema: SchemaDefinition, source_file,
                                addl_checks: Callable[[SchemaDefinition], None]=None) -> None:
        expected = loads(f"""{{
           "name": "{source_file}",
           "id": "http://example.org/{source_file}",
           "title": "Load Raw Schema Test",
           "metamodel_version": "0.5.0",
           "source_file": "{source_file}.yaml",
           "source_file_date": "Mon Dec 31 11:25:38 2018",
           "source_file_size": 76,
           "generation_date": "2018-12-31 11:50"
        }}""")
        schema.source_file = os.path.basename(schema.source_file)
        if addl_checks:
            addl_checks(schema)
        self.assertTrue(isinstance(schema.metamodel_version, str))
        expected.metamodel_version = schema.metamodel_version
        self.assertTrue(isinstance(schema.source_file_date, str))
        expected.source_file_date = schema.source_file_date
        self.assertTrue(isinstance(schema.source_file_size, int))
        expected.source_file_size = schema.source_file_size
        self.assertTrue(isinstance(schema.generation_date, str))
        expected.generation_date = schema.generation_date

        self.assertEqual(expected, loads(as_json(schema)))
예제 #3
0
def merge_schemas(target: SchemaDefinition,
                  mergee: SchemaDefinition,
                  imported_from: Optional[str] = None,
                  namespaces: Optional[Namespaces] = None) -> None:
    """ Merge mergee into target """
    assert target.name is not None, "Schema name must be supplied"
    if target.license is None:
        target.license = mergee.license

    target.imports += [
        imp for imp in mergee.imports if imp not in target.imports
    ]
    set_from_schema(mergee)

    if namespaces:
        merge_namespaces(target, mergee, namespaces)

    if imported_from is None:
        imported_from_uri = None
    else:
        if imported_from.startswith("http") or ":" not in imported_from:
            imported_from_uri = imported_from
        else:
            imported_from_uri = namespaces.uri_for(imported_from)
    merge_dicts(target.classes, mergee.classes, imported_from,
                imported_from_uri)
    merge_dicts(target.slots, mergee.slots, imported_from, imported_from_uri)
    merge_dicts(target.types, mergee.types, imported_from, imported_from_uri)
    merge_dicts(target.subsets, mergee.subsets, imported_from,
                imported_from_uri)
예제 #4
0
 def check_types(s: SchemaDefinition) -> None:
     self.assertEqual({
         'integer': {'base': 'int',
                     'from_schema': 'http://example.org/schema5',
                     'name': 'integer'},
         'string': {'base': 'str',
                    'from_schema': 'http://example.org/schema4',
                    'name': 'string'}},
                      {k: as_dict(loads(as_json(v))) for k, v in s.types.items()})
     s.types = None
예제 #5
0
        def check_types(s: SchemaDefinition) -> None:
            output = os.path.join(outputdir, 'schema4.json')
            if not os.path.exists(output):
                with open(output, 'w') as f:
                    f.write(as_json(JsonObj(**{k: as_dict(loads(as_json(v))) for k, v in s.types.items()})))
                    self.fail(f"File {output} created - rerun test")

            with open(output) as f:
                expected = as_dict(load(f))
            self.assertEqual(expected, {k: as_dict(loads(as_json(v))) for k, v in s.types.items()})
            s.types = None
예제 #6
0
def merge_schemas(target: SchemaDefinition,
                  mergee: SchemaDefinition,
                  imported_from: Optional[str] = None,
                  namespaces: Optional[Namespaces] = None) -> None:
    """ Merge mergee into target """
    assert target.name is not None, "Schema name must be supplied"
    if target.license is None:
        target.license = mergee.license

    target.imports += [
        imp for imp in mergee.imports if imp not in target.imports
    ]
    set_from_schema(mergee)

    if namespaces:
        merge_namespaces(target, mergee, namespaces)

    merge_dicts(target.classes, mergee.classes, imported_from)
    merge_dicts(target.slots, mergee.slots, imported_from)
    merge_dicts(target.types, mergee.types, imported_from)
예제 #7
0
def load_raw_schema(data: Union[str, dict, TextIO],
                    source_file: Optional[str] = None,
                    source_file_date: Optional[str] = None,
                    source_file_size: Optional[int] = None,
                    base_dir: Optional[str] = None,
                    merge_modules: Optional[bool] = True,
                    emit_metadata: Optional[bool] = True) -> SchemaDefinition:
    """ Load and flatten SchemaDefinition from a file name, a URL or a block of text

    @param data: URL, file name or block of text YAML Object or open file handle
    @param source_file: Source file name for the schema if data is type TextIO
    @param source_file_date: timestamp of source file if data is type TextIO
    @param source_file_size: size of source file if data is type TextIO
    @param base_dir: Working directory or base URL of sources
    @param merge_modules: True means combine modules into one source, false means keep separate
    @param emit_metadata: True means add source file info to the output
    @return: Un-processed Schema Definition object
    """
    def _name_from_url(url) -> str:
        return urlparse(url).path.rsplit('/', 1)[-1].rsplit('.', 1)[0]

    if isinstance(data, str):
        # If passing the actual YAML
        if '\n' in data:
            return load_raw_schema(StringIO(data),
                                   source_file=source_file,
                                   base_dir=base_dir,
                                   source_file_date=source_file_date,
                                   source_file_size=source_file_size,
                                   emit_metadata=emit_metadata)

        # Passing a URL or file name
        assert source_file is None, "source_file parameter not allowed if data is a file or URL"
        assert source_file_date is None, "source_file_date parameter not allowed if data is a file or URL"
        assert source_file_size is None, "source_file_size parameter not allowed if data is a file or URL"

        if '://' in data or (base_dir and '://' in base_dir):
            # URL
            fname = Namespaces.join(base_dir,
                                    data) if '://' not in data else data
            req = Request(fname)
            req.add_header("Accept", "text/yaml, application/yaml;q=0.9")
            try:
                response = urlopen(req)
            except HTTPError as e:
                # This is here because the message out of urllib doesn't include the file name
                e.msg = f"{e.filename}"
                raise e
            with response:
                return load_raw_schema(response,
                                       fname,
                                       response.info()['Last-Modified'],
                                       response.info()['Content-Length'],
                                       emit_metadata=emit_metadata)

        else:
            # File name
            if not base_dir:
                fname = os.path.abspath(data)
                base_dir = os.path.dirname(fname)
            else:
                fname = data if os.path.isabs(data) else os.path.abspath(
                    os.path.join(base_dir, data))
            with open(fname) as f:
                return load_raw_schema(f,
                                       fname,
                                       time.ctime(os.path.getmtime(fname)),
                                       os.path.getsize(fname),
                                       base_dir,
                                       emit_metadata=emit_metadata)
    else:
        # Loaded YAML or file handle that references YAML
        schemadefs = copy.deepcopy(data) if isinstance(
            data, dict) else yaml.load(data, DupCheckYamlLoader)
        if schemadefs is None:
            raise ValueError("Empty schema - cannot process")
        elif not isinstance(schemadefs, dict):
            raise ValueError("Unrecognized schema content - cannot process")

        # Convert the schema into a "name: definition" form
        if not all(isinstance(e, dict) for e in schemadefs.values()):
            if 'name' in schemadefs:
                schemaname = schemadefs.pop('name')
            elif 'id' in schemadefs:
                schemaname = _name_from_url(schemadefs['id'])
            else:
                raise ValueError("Unable to determine schema name")
            schema_body = [schemadefs]
            schemadefs = {schemaname: schemadefs}
        else:
            schema_body = list(schemadefs.values())

        def check_is_dict(element: str) -> None:
            """ Verify that element is an instance of a dictionary, mapping empty elements to dictionaries """
            for body_schemaname, body_body in schemadefs.items():
                if element in body_body:
                    if body_body[element] is None:
                        body_body[element] = dict()
                    elif not isinstance(body_body[element], dict):
                        raise ValueError(
                            f'Schema: {body_schemaname} - Element: {element} must be a dictionary'
                        )

        def fix_multiples(container: str, element: str) -> None:
            """
            A common error is representing a list object as a singleton.  This fixes this problem
            :param container: name of container to fix (e.g. a specific clas instance)
            :param element:  name or list element to adjust (e.g. notes"
            """
            # Note: multiple bodies in the schema are an at-risk feature.  Doesn't seem to have a real use case.
            for body_body in schema_body:
                if container in body_body:
                    for c in body_body[container].values():
                        if c and element in c and isinstance(c[element], str):
                            c[element] = [c[element]]

        for e in ['slots', 'classes', 'types', 'subsets']:
            """ Validate the basic categories, fixing multiples where appropriate """
            check_is_dict(e)
            fix_multiples(e, 'in_subset')
            fix_multiples(e, 'apply_to')

        for e in ['imports']:
            for body in schema_body:
                if e in body:
                    if isinstance(body[e], str):
                        body[e] = [body[e]]

        # Add the implicit domain to the slot usages
        for body in schema_body:
            for cname, cls in body.get('classes', {}).items():
                if cls is None:
                    cls = {}
                    body['classes'][cname] = cls
                elif not isinstance(cls, dict):
                    raise ValueError(
                        f"{TypedNode.yaml_loc(cname)}: class definition is not a structure"
                    )
                for uname, usage in cls.get('slot usage', {}).items():
                    if usage is None:
                        usage = {}
                        cls['slot usage'][uname] = usage
                    if 'domain' not in usage:
                        usage['domain'] = cname

        schema: Optional[SchemaDefinition] = None
        for sname, sdef in {
                k: SchemaDefinition(name=k, **v)
                for k, v in schemadefs.items()
        }.items():
            if schema is None:
                schema = sdef
                if source_file:
                    schema.source_file = source_file
                if emit_metadata:
                    schema.source_file_date = source_file_date
                    schema.source_file_size = source_file_size
                    schema.generation_date = datetime.now().strftime(
                        "%Y-%m-%d %H:%M")
                schema.metamodel_version = metamodel_version
                set_from_schema(schema)
            else:
                merge_schemas(schema, sdef, merge_imports=merge_modules)
        return schema
예제 #8
0
def load_raw_schema(data: Union[str, dict, TextIO],
                    source_file: Optional[str] = None,
                    source_file_date: Optional[str] = None,
                    source_file_size: Optional[int] = None,
                    base_dir: Optional[str] = None) -> SchemaDefinition:
    """ Load and flatten SchemaDefinition from a file name, a URL or a block of text

    @param data: URL, file name or block of text
    @param source_file: Source file name for the schema if data is type TextIO
    @param source_file_date: timestamp of source file if data is type TextIO
    @param source_file_size: size of source file if data is type TextIO
    @param base_dir: Working directory or base URL of sources

    @return: Map from schema name to SchemaDefinition
    """
    def _name_from_url(url) -> str:
        return urlparse(url).path.rsplit('/', 1)[-1].rsplit('.', 1)[0]

    if isinstance(data, str):
        if '\n' in data:
            # Actual data file being passed
            return load_raw_schema(StringIO(data), source_file, source_file_date, source_file_size, base_dir)

        assert source_file is None, "source_file parameter not allowed if data is a file or URL"
        assert source_file_date is None, "source_file_date parameter not allowed if data is a file or URL"
        assert source_file_size is None, "source_file_size parameter not allowed if data is a file or URL"

        if '://' in data or (base_dir and '://' in base_dir):
            # URL being passed
            fname = Namespaces.join(base_dir, data) if '://' not in data else data
            req = Request(fname)
            req.add_header("Accept", "text/yaml, application/yaml;q=0.9")
            with urlopen(req) as response:
                return load_raw_schema(response, fname, response.info()['Last-Modified'],
                                       response.info()['Content-Length'])
        else:
            # File name being passed
            if not base_dir:
                fname = os.path.abspath(data)
                base_dir = os.path.dirname(fname)
            else:
                fname = data if os.path.isabs(data) else os.path.abspath(os.path.join(base_dir, data))
            with open(fname) as f:
                return load_raw_schema(f, fname, time.ctime(os.path.getmtime(fname)), os.path.getsize(fname), base_dir)
    else:
        schemadefs = copy.deepcopy(data) if isinstance(data, dict) else yaml.load(data, DupCheckYamlLoader)

        # Convert the schema into a "name: definition" form
        if not all(isinstance(e, dict) for e in schemadefs.values()):
            if 'name' in schemadefs:
                schemaname = schemadefs.pop('name')
            elif 'id' in schemadefs:
                schemaname = _name_from_url(schemadefs['id'])
            else:
                raise ValueError("Unable to determine schema name")
            schema_body = [schemadefs]
            schemadefs = {schemaname: schemadefs}
        else:
            schema_body = list(schemadefs.values())

        def check_is_dict(element: str) -> None:
            for schemaname, body in schemadefs.items():
                if element in body and not isinstance(body[element], dict):
                    raise ValueError(f'Schema: {schemaname} - {element} must be a dictionary')

        def fix_multiples(container:  str, element: str) -> None:
            """ Convert strings to lists in common elements that have both single and multiple options """
            for body in schema_body:
                if container in body:
                    for c in body[container].values():
                        if c and element in c and isinstance(c[element], str):
                            c[element] = [c[element]]

        for e in ['slots', 'classes', 'types', 'subsets']:
            check_is_dict(e)
            fix_multiples(e, 'in_subset')
            fix_multiples(e, 'apply_to')

        for e in ['imports']:
            for body in schema_body:
                if e in body:
                    if isinstance(body[e], str):
                        body[e] = [body[e]]

        # Add the implicit domain to the slot usages
        for body in schema_body:
            for cname, cls in body.get('classes', {}).items():
                if cls is None:
                    cls = {}
                    body['classes'][cname] = cls
                for uname, usage in cls.get('slot usage', {}).items():
                    if usage is None:
                        usage = {}
                        cls['slot usage'][uname] = usage
                    if 'domain' not in usage:
                        usage['domain'] = cname

        schema: SchemaDefinition = None
        for sname, sdef in {k: SchemaDefinition(name=k, **v) for k, v in schemadefs.items()}.items():
            if schema is None:
                schema = sdef
                if source_file:
                    schema.source_file = source_file
                schema.source_file_date = source_file_date
                schema.source_file_size = source_file_size
                schema.generation_date = datetime.now().strftime("%Y-%m-%d %H:%M")
                schema.metamodel_version = metamodel_version
                set_from_schema(schema)
            else:
                merge_schemas(schema, sdef)
        return schema