コード例 #1
0
ファイル: view2table.py プロジェクト: cmungall/semantic-sql
def cli(inputs, name: str, index: bool):
    """
    Generates a command that turns a view into a table

    See https://github.com/cmungall/semantic-sql/issues/9

    Example usage:
    ```
    python -m semsql.sqlutils.view2table src/linkml/rdf.yaml -n rdfs_label_statement | sqlite3 db/pato.db
    ```
    """
    for input in inputs:
        with open(input, 'r') as stream:
            sv = SchemaView(input)
            schema = sv.schema
            for cn, c in sv.all_classes().items():
                tn = underscore(cn)
                if name is None or str(cn) == name or tn == name:
                    view = get_viewdef(c)
                    if view is not None:
                        print(f'DROP VIEW {tn};')
                        print(f'CREATE TABLE {tn} AS {view};')
                        if index:
                            for sn in sv.class_slots(cn):
                                colname = underscore(sn)
                                print(
                                    f'CREATE INDEX {tn}_{colname} ON {tn}({colname});'
                                )
コード例 #2
0
 def dir_path(
     self, obj: Union[ClassDefinition, SlotDefinition, TypeDefinition,
                      EnumDefinition]
 ) -> str:
     filename = self.formatted_element_name(obj) if isinstance(obj, ClassDefinition) \
         else underscore(obj.name) if isinstance(obj, SlotDefinition) \
         else underscore(obj.name) if isinstance(obj, EnumDefinition) \
         else camelcase(obj.name)
     subdir = '/types' if isinstance(
         obj, TypeDefinition) and not self.no_types_dir else ''
     return f'{self.directory}{subdir}/{filename}.md'
コード例 #3
0
    def visit_enum(self, enum: EnumDefinition) -> None:
        """
        Visit a given enum definition and write the following properties in Markdown,
        - Properties
        - Permissible Values

        Parameters
        ----------
        cls: linkml_runtime.linkml_model.meta.EnumDefinition
            A EnumDefinition

        """
        grand_parent = self.doc_root_title
        parent = "Enums"
        with open(self.exist_warning(self.dir_path(enum)),
                  'w',
                  encoding='UTF-8') as enumfile:
            with redirect_stdout(enumfile):
                enum_curie = self.namespaces.uri_or_curie_for(
                    str(self.namespaces._base), underscore(enum.name))
                enum_uri = self.namespaces.uri_for(enum_curie)
                self.frontmatter(
                    **{
                        'grand_parent': grand_parent,
                        'parent': parent,
                        'title': enum_curie,
                        'layout': 'default'
                    })
                self.element_header(obj=enum,
                                    name=enum.name,
                                    curie=enum_curie,
                                    uri=enum_uri)
                self.element_properties(enum)
コード例 #4
0
 def visit_enum(self, enum: EnumDefinition) -> None:
     with open(self.exist_warning(self.dir_path(enum)), 'w') as enumfile:
         with redirect_stdout(enumfile):
             enum_curie = self.namespaces.uri_or_curie_for(
                 self.namespaces._base, underscore(enum.name))
             enum_uri = self.namespaces.uri_for(enum_curie)
             self.element_header(obj=enum,
                                 name=enum.name,
                                 curie=enum_curie,
                                 uri=enum_uri)
             self.element_properties(enum)
コード例 #5
0
ファイル: viewgen.py プロジェクト: cmungall/semantic-sql
def generate_views_from_linkml(schema: SchemaDefinition,
                               view=True,
                               drop_tables=True) -> None:
    """
    Generates SQL VIEW statements from hints in LinkML linkml

    View hints are encoded in comments section in classes/tables section
    :param schema: LinkML linkml containing hints
    """
    for cn, c in schema.classes.items():
        viewdef = get_viewdef(schema, c)
        sql_table = underscore(cn)
        if viewdef is not None:
            print()
            if drop_tables:
                print(f'DROP TABLE {sql_table};')
            if view:
                print(f'CREATE VIEW {sql_table} AS {viewdef};')
            else:
                print(f'INSERT INTO {sql_table} AS {viewdef};')
コード例 #6
0
 def visit_subset(self, subset: SubsetDefinition) -> None:
     with open(self.exist_warning(self.dir_path(subset)),
               'w') as subsetfile:
         with redirect_stdout(subsetfile):
             curie = self.namespaces.uri_or_curie_for(
                 self.namespaces._base, underscore(subset.name))
             uri = self.namespaces.uri_for(curie)
             self.element_header(obj=subset,
                                 name=subset.name,
                                 curie=curie,
                                 uri=uri)
             # TODO: consider showing hierarchy within a subset
             self.header(3, 'Classes')
             for cls in sorted(self.schema.classes.values(),
                               key=lambda c: c.name):
                 if not cls.mixin:
                     if cls.in_subset and subset.name in cls.in_subset:
                         self.bullet(self.class_link(cls, use_desc=True), 0)
             self.header(3, 'Mixins')
             for cls in sorted(self.schema.classes.values(),
                               key=lambda c: c.name):
                 if cls.mixin:
                     if cls.in_subset and subset.name in cls.in_subset:
                         self.bullet(self.class_link(cls, use_desc=True), 0)
             self.header(3, 'Slots')
             for slot in sorted(self.schema.slots.values(),
                                key=lambda s: s.name):
                 if slot.in_subset and subset.name in slot.in_subset:
                     self.bullet(self.slot_link(slot, use_desc=True), 0)
             self.header(3, 'Types')
             for type in sorted(self.schema.types.values(),
                                key=lambda s: s.name):
                 if type.in_subset and subset.name in type.in_subset:
                     self.bullet(self.type_link(type, use_desc=True), 0)
             self.header(3, 'Enums')
             for enum in sorted(self.schema.enums.values(),
                                key=lambda s: s.name):
                 if enum.in_subset and subset.name in enum.in_subset:
                     self.bullet(self.enum_link(type, use_desc=True), 0)
             self.element_properties(subset)
コード例 #7
0
    def _link(self,
              obj: Optional[Element],
              *,
              after_link: str = None,
              use_desc: bool = False,
              add_subset: bool = True) -> str:
        """ Create a link to ref if appropriate.

        @param ref: the name or value of a class, slot, type or the name of a built in type.
        @param after_link: Text to put between link and description
        @param use_desc: True means append a description after the link if available
        @param add_subset: True means add any subset information that is available
        @return:
        """
        nl = '\n'
        if obj is None or not self.is_secondary_ref(obj.name):
            return self.bbin(obj)
        if isinstance(obj, SlotDefinition):
            # link_name = ((be(obj.domain) + '➞') if obj.alias else '') + self.aliased_slot_name(obj)
            link_name = self.aliased_slot_name(obj)
            link_ref = underscore(obj.name)
        elif isinstance(obj, TypeDefinition):
            link_name = camelcase(obj.name)
            link_ref = f"types/{link_name}" if not self.no_types_dir else f"{link_name}"
        elif isinstance(obj, ClassDefinition):
            link_name = camelcase(obj.name)
            link_ref = camelcase(link_name)
        elif isinstance(obj, SubsetDefinition):
            link_name = camelcase(obj.name)
            link_ref = camelcase(link_name)
        else:
            link_name = obj.name
            link_ref = link_name
        desc = self.desc_for(obj, use_desc)
        return f'[{link_name}]' \
               f'({link_ref}.{self.format})' + \
                 (f' {after_link} ' if after_link else '') + (f' - {desc.split(nl)[0]}' if desc else '')
コード例 #8
0
    def visit_slot(self, aliased_slot_name: str, slot: SlotDefinition) -> None:
        with open(self.exist_warning(self.dir_path(slot)), 'w') as slotfile:
            with redirect_stdout(slotfile):
                slot_curie = self.namespaces.uri_or_curie_for(
                    self.namespaces._base, underscore(slot.name))
                slot_uri = self.namespaces.uri_for(slot_curie)
                self.element_header(slot, aliased_slot_name, slot_curie,
                                    slot_uri)
                self.mappings(slot)

                self.header(2, 'Domain and Range')
                print(
                    f'{self.class_link(slot.domain)} →{self.predicate_cardinality(slot)} '
                    f'{self.class_type_link(slot.range)}')

                self.header(2, 'Parents')
                if slot.is_a:
                    self.bullet(f' is_a: {self.slot_link(slot.is_a)}')

                self.header(2, 'Children')
                if slot.name in sorted(self.synopsis.isarefs):
                    for child in sorted(
                            self.synopsis.isarefs[slot.name].slotrefs):
                        self.bullet(f' {self.slot_link(child)}')

                self.header(2, 'Used by')
                if slot.name in sorted(self.synopsis.slotrefs):
                    for rc in sorted(
                            self.synopsis.slotrefs[slot.name].classrefs):
                        self.bullet(f'{self.class_link(rc)}')
                if aliased_slot_name == 'relation':
                    if slot.subproperty_of:
                        self.bullet(
                            f' reifies: {self.slot_link(slot.subproperty_of) if slot.subproperty_of in self.schema.slots else slot.subproperty_of}'
                        )
                self.element_properties(slot)
コード例 #9
0
    def visit_subset(self, subset: SubsetDefinition) -> None:
        """
        Visit a given subset definition and write the following properties in Markdown,
        - Classes
        - Mixins
        - Slots
        - Types
        - Enums

        Parameters
        ----------
        cls: linkml_runtime.linkml_model.meta.SubsetDefinition
            A SubsetDefinition

        """
        grand_parent = self.doc_root_title
        parent = "Subsets"
        seen_subset_elements = set()
        with open(self.exist_warning(self.dir_path(subset)),
                  'w',
                  encoding='UTF-8') as subsetfile:
            with redirect_stdout(subsetfile):
                curie = self.namespaces.uri_or_curie_for(
                    str(self.namespaces._base), underscore(subset.name))
                uri = self.namespaces.uri_for(curie)
                self.frontmatter(
                    **{
                        'grand_parent': grand_parent,
                        'parent': parent,
                        'title': curie,
                        'layout': 'default'
                    })
                self.element_header(obj=subset,
                                    name=subset.name,
                                    curie=curie,
                                    uri=uri)
                # TODO: consider showing hierarchy within a subset
                self.header(3, 'Classes')
                for cls in sorted(self.schema.classes.values(),
                                  key=lambda c: c.name.lower()):
                    if not cls.mixin:
                        if cls.in_subset and subset.name in cls.in_subset:
                            if cls.name not in seen_subset_elements:
                                seen_subset_elements.add(cls.name)
                                self.bullet(
                                    self.class_link(cls, use_desc=True), 0)
                self.header(3, 'Mixins')
                for cls in sorted(self.schema.classes.values(),
                                  key=lambda c: c.name.lower()):
                    if cls.mixin:
                        if cls.in_subset and subset.name in cls.in_subset:
                            if cls.name not in seen_subset_elements:
                                seen_subset_elements.add(cls.name)
                                self.bullet(
                                    self.class_link(cls, use_desc=True), 0)
                self.header(3, 'Slots')
                for slot in sorted(self.schema.slots.values(),
                                   key=lambda s: s.name.lower()):
                    if slot.in_subset and subset.name in slot.in_subset:
                        if slot.alias and slot.usage_slot_name:
                            slot = self.schema.slots[slot.usage_slot_name]
                        if slot.name not in seen_subset_elements:
                            seen_subset_elements.add(slot.name)
                            self.bullet(self.slot_link(slot, use_desc=True), 0)
                self.header(3, 'Types')
                for type in sorted(self.schema.types.values(),
                                   key=lambda s: s.name.lower()):
                    if type.in_subset and subset.name in type.in_subset:
                        self.bullet(self.type_link(type, use_desc=True), 0)
                self.header(3, 'Enums')
                for enum in sorted(self.schema.enums.values(),
                                   key=lambda s: s.name.lower()):
                    if enum.in_subset and subset.name in enum.in_subset:
                        self.bullet(self.enum_link(enum, use_desc=True), 0)
                self.element_properties(subset)
コード例 #10
0
    def visit_slot(self, aliased_slot_name: str, slot: SlotDefinition) -> None:
        """
        Visit a given slot definition and write the following properties in Markdown,
        - Frontmatter
        - Mappings
        - Description
        - Domain and Range constraints
        - Parents
        - Children
        - Used by

        Parameters
        ----------
        cls: linkml_runtime.linkml_model.meta.SlotDefinition
            A SlotDefinition

        """
        if not slot.alias:
            with open(self.dir_path(slot), 'w') as slotfile:
                with redirect_stdout(slotfile):
                    slot_curie = self.namespaces.uri_or_curie_for(
                        self.namespaces._base, underscore(slot.name))
                    slot_uri = self.namespaces.uri_for(slot_curie)
                    ancs = self.ancestors(slot)
                    if 'related to' in ancs:
                        if slot.mixin:
                            parent = 'Slot Mixins'
                        else:
                            parent = 'Predicates'
                        grand_parent = 'Slots'
                        slot_type = 'Relation'
                    elif 'node property' in ancs:
                        if slot.mixin:
                            parent = 'Slot Mixins'
                        else:
                            parent = 'Node Properties'
                        grand_parent = 'Slots'
                        slot_type = 'Slot'
                    elif 'association slot' in ancs:
                        if slot.mixin:
                            parent = 'Slot Mixins'
                        else:
                            parent = 'Edge Properties'
                        grand_parent = 'Slots'
                        slot_type = 'Slot'
                    else:
                        if slot.mixin:
                            parent = 'Slot Mixins'
                        else:
                            parent = 'Other Slots'
                        grand_parent = 'Slots'
                        slot_type = 'Slot'
                    self.frontmatter(
                        **{
                            'parent': parent,
                            'title': slot_curie,
                            'grand_parent': grand_parent,
                            'layout': 'default'
                        })
                    simple_name = slot_curie.split(':', 1)[1]
                    self.header(
                        1, f"{slot_type}: {simple_name}" +
                        (f" _(deprecated)_" if slot.deprecated else ""))
                    for s in slot.in_subset:
                        self.badges(s, f'{s}-subset-label')

                    self.para(be(slot.description))
                    print(f'URI: [{slot_curie}]({slot_uri})')

                    self.header(2, 'Domain and Range')
                    print(
                        f'{self.class_link(slot.domain)} ->{self.predicate_cardinality(slot)} '
                        f'{self.class_type_link(slot.range)}')

                    self.header(2, 'Parents')
                    if slot.is_a:
                        self.bullet(f' is_a: {self.slot_link(slot.is_a)}')

                    self.header(2, 'Children')
                    if slot.name in sorted(self.synopsis.isarefs):
                        for child in sorted(
                                self.synopsis.isarefs[slot.name].slotrefs):
                            child_slot = self.schema.slots[child]
                            if not child_slot.alias:
                                self.bullet(f' {self.slot_link(child)}')

                    self.header(2, 'Used by')
                    if slot.name in sorted(self.synopsis.slotrefs):
                        for rc in sorted(
                                self.synopsis.slotrefs[slot.name].classrefs):
                            self.bullet(f'{self.class_link(rc)}')
                    if aliased_slot_name == 'relation':
                        if slot.subproperty_of:
                            self.bullet(
                                f' reifies: {self.slot_link(slot.subproperty_of) if slot.subproperty_of in self.schema.slots else slot.subproperty_of}'
                            )
                    self.element_properties(slot)
コード例 #11
0
    def convert(self) -> Dict[str, Any]:
        """
        Converts set of inputs to a schema

        :return: link schema as a python Dictionary
        """
        trim_strings = lambda x: x.strip() if isinstance(x, str) else x
        core_df = pd.read_csv(self.core_filename,
                              sep="\t").fillna("").applymap(trim_strings)
        pkg_df = pd.read_csv(self.packages_filename,
                             sep="\t").fillna("").applymap(trim_strings)
        slots = {
            'core field': {
                'abstract': True,
                'description': "basic fields"
            },
            'investigation field': {
                'abstract':
                True,
                'description':
                "field describing aspect of the investigation/study to which the sample belongs"
            },
            'nucleic acid sequence source field': {
                'abstract': True
            },
            'sequencing field': {
                'abstract': True
            },
            'mixs extension field': {
                'abstract': True
            },
            'environment field': {
                'abstract': True,
                'description':
                "field describing environmental aspect of a sample"
            }
        }
        classes = {}
        subsets = {}
        enums = {}
        obj = {
            'id': f'http://w3id.org/mixs',
            'name': 'MIxS',
            'description': 'Minimal Information about any Sequence Standard',
            'imports': ['linkml:types', 'checklists', 'core'],
            'prefixes': {
                'linkml': 'https://w3id.org/linkml/',
                'mixs.vocab': 'https://w3id.org/mixs/vocab/',
                'MIXS': 'https://w3id.org/mixs/terms/',
                'MIGS': 'https://w3id.org/mixs/migs/',
            },
            'default_prefix': 'mixs.vocab',
            'slots': {},
            'classes': classes,
            'subsets': subsets
        }

        # TODO: make configurable whether this is in main schema or import
        rschema = new_schema('ranges')
        for k, v in datatype_schema.items():
            rschema[k] = v
        self.save_schema(rschema, 'ranges.yaml')

        cls_slot_req = {}
        slot_cls_req = {}

        core_slots = []
        core_env_slots = []

        core_slot_dict = {}
        # PARSE CORE
        for _, row in core_df.iterrows():
            s_id, slot = self.create_slot(row, enums=enums)
            if s_id is None:
                continue
            slots[s_id] = slot
            core_slot_dict[s_id] = row
            core_slots.append(s_id)
            if row['Section'] == 'environment':
                core_env_slots.append(s_id)

        for checklist, info in CHECKLISTS.items():
            checklist_slot_usage = {}
            checklist_name = info['name']
            for s_id, s_row in core_slot_dict.items():
                cardinality = s_row[checklist]
                # information about whether an item is:
                # - mandatory (M)
                # - conditional mandatory (C)
                # - optional (X)
                # - environment-dependent (E)
                # - or not applicable (-)
                if cardinality != 'E':
                    usage = {}
                    if cardinality == 'M':
                        usage['required'] = True
                    elif cardinality == 'X':
                        usage['required'] = False
                    elif cardinality == 'C':
                        usage['recommended'] = True
                    #elif cardinality == '-':
                    #    usage['comments'] = ['not applicable']
                    if usage != {}:
                        checklist_slot_usage[s_id] = usage
            classes[checklist_name] = {
                'mixin': True,
                'description': info['fullname'],
                'aliases': [info['abbrev']],
                'see_also': info.get('see_also', []),
                #'todos': ['add details here'],
                'slots': list(checklist_slot_usage.keys()),
                'slot_usage': checklist_slot_usage
            }
        classes[CORE_PACKAGE_NAME] = {
            'description':
            'Core package. Do not use this directly, this is used to build other packages',
            'slots': core_slots
        }
        env_packages = []
        # PARSE PACKAGES
        for _, row in pkg_df.iterrows():
            in_core_and_package = False
            p = row['Environmental package']
            req = row['Requirement']
            is_required = req == 'M'
            cn = safe(p.lower())
            if cn not in classes:
                env_packages.append(cn)
                cls_slot_req[cn] = {}
                classes[cn] = {
                    #'is_a': CORE_PACKAGE_NAME,
                    'description': p,
                    'mappings': [],
                    'slots': list(core_env_slots),
                    'slot_usage': {}
                }
            c = classes[cn]

            s_id, slot = self.create_slot(row, enums=enums)

            if s_id is not None:
                c['slot_usage'][s_id] = {'required': is_required}
                cls_slot_req[cn][s_id] = req

                if s_id not in slots:
                    slots[s_id] = slot
                else:
                    in_core_and_package = True
                    slot['todos'] = ['this is in both core and packages']

                if s_id not in slot_cls_req:
                    slot_cls_req[s_id] = {}
                slot_cls_req[s_id][cn] = req
                if s_id not in core_slots:
                    c['slots'].append(s_id)

        # n_cls = len(cls_slot_req.keys())
        # inf_core_slots = []
        # for s_id, s in slot_cls_req.items():
        #     packages_str = ', '.join(list(s.keys()))
        #     if len(s.keys()) == n_cls:
        #         inf_core_slots.append(s_id)
        #         cmt = "This field is used in all packages"
        #     elif len(s.keys()) == 1:
        #         cmt = f"This field is used uniquely in: {packages_str}"
        #     else:
        #         cmt = f"This field is used in: {len(s.keys())} packages: {packages_str}"
        #     slots[s_id]['comments'].append(cmt)

        for p in env_packages:
            for checklist, info in CHECKLISTS.items():
                name = info['name']
                fullname = info['fullname']
                combo = f'{p} {name}'
                classes[combo] = {
                    'is_a':
                    p,
                    'mixins': [name],
                    'description':
                    f'Combinatorial checklist {fullname} with environmental package {p}'
                }
            pname = underscore(p).replace("-", "_")
            obj['imports'].append(pname)
            pschema = new_schema(pname)
            pschema['imports'].append('terms')
            pschema['classes'] = {p: classes[p]}
            del classes[p]
            self.save_schema(pschema, f'{pname}.yaml')

        slot_schema = new_schema('terms')
        slot_schema['imports'].append('ranges')
        slot_schema['slots'] = slots
        slot_schema['enums'] = enums
        slot_schema['subsets'] = {
            'checklist': {
                'description':
                'A MIxS checklist. These can be combined with packages'
            },
            'package': {
                'description':
                'A MIxS package. These can be combined with checklists'
            },
            'checklist_package_combination': {
                'description': 'A combination of a checklist and a package'
            }
        }
        self.save_schema(slot_schema, 'terms.yaml')

        core_schema = new_schema('core')
        core_schema['imports'].append('terms')
        core_schema['classes'] = {'core': obj['classes']['core']}
        del obj['classes']['core']
        self.save_schema(core_schema, 'core.yaml')

        checklist_schema = new_schema('checklists')
        checklist_schema['imports'].append('terms')
        checklist_schema['classes'] = obj['classes']
        obj['classes'] = {}
        self.save_schema(checklist_schema, 'checklists.yaml')

        return obj