def cli(inputs, name: str, index: bool): """ Generates a command that turns a view into a table See https://github.com/cmungall/semantic-sql/issues/9 Example usage: ``` python -m semsql.sqlutils.view2table src/linkml/rdf.yaml -n rdfs_label_statement | sqlite3 db/pato.db ``` """ for input in inputs: with open(input, 'r') as stream: sv = SchemaView(input) schema = sv.schema for cn, c in sv.all_classes().items(): tn = underscore(cn) if name is None or str(cn) == name or tn == name: view = get_viewdef(c) if view is not None: print(f'DROP VIEW {tn};') print(f'CREATE TABLE {tn} AS {view};') if index: for sn in sv.class_slots(cn): colname = underscore(sn) print( f'CREATE INDEX {tn}_{colname} ON {tn}({colname});' )
def dir_path( self, obj: Union[ClassDefinition, SlotDefinition, TypeDefinition, EnumDefinition] ) -> str: filename = self.formatted_element_name(obj) if isinstance(obj, ClassDefinition) \ else underscore(obj.name) if isinstance(obj, SlotDefinition) \ else underscore(obj.name) if isinstance(obj, EnumDefinition) \ else camelcase(obj.name) subdir = '/types' if isinstance( obj, TypeDefinition) and not self.no_types_dir else '' return f'{self.directory}{subdir}/{filename}.md'
def visit_enum(self, enum: EnumDefinition) -> None: """ Visit a given enum definition and write the following properties in Markdown, - Properties - Permissible Values Parameters ---------- cls: linkml_runtime.linkml_model.meta.EnumDefinition A EnumDefinition """ grand_parent = self.doc_root_title parent = "Enums" with open(self.exist_warning(self.dir_path(enum)), 'w', encoding='UTF-8') as enumfile: with redirect_stdout(enumfile): enum_curie = self.namespaces.uri_or_curie_for( str(self.namespaces._base), underscore(enum.name)) enum_uri = self.namespaces.uri_for(enum_curie) self.frontmatter( **{ 'grand_parent': grand_parent, 'parent': parent, 'title': enum_curie, 'layout': 'default' }) self.element_header(obj=enum, name=enum.name, curie=enum_curie, uri=enum_uri) self.element_properties(enum)
def visit_enum(self, enum: EnumDefinition) -> None: with open(self.exist_warning(self.dir_path(enum)), 'w') as enumfile: with redirect_stdout(enumfile): enum_curie = self.namespaces.uri_or_curie_for( self.namespaces._base, underscore(enum.name)) enum_uri = self.namespaces.uri_for(enum_curie) self.element_header(obj=enum, name=enum.name, curie=enum_curie, uri=enum_uri) self.element_properties(enum)
def generate_views_from_linkml(schema: SchemaDefinition, view=True, drop_tables=True) -> None: """ Generates SQL VIEW statements from hints in LinkML linkml View hints are encoded in comments section in classes/tables section :param schema: LinkML linkml containing hints """ for cn, c in schema.classes.items(): viewdef = get_viewdef(schema, c) sql_table = underscore(cn) if viewdef is not None: print() if drop_tables: print(f'DROP TABLE {sql_table};') if view: print(f'CREATE VIEW {sql_table} AS {viewdef};') else: print(f'INSERT INTO {sql_table} AS {viewdef};')
def visit_subset(self, subset: SubsetDefinition) -> None: with open(self.exist_warning(self.dir_path(subset)), 'w') as subsetfile: with redirect_stdout(subsetfile): curie = self.namespaces.uri_or_curie_for( self.namespaces._base, underscore(subset.name)) uri = self.namespaces.uri_for(curie) self.element_header(obj=subset, name=subset.name, curie=curie, uri=uri) # TODO: consider showing hierarchy within a subset self.header(3, 'Classes') for cls in sorted(self.schema.classes.values(), key=lambda c: c.name): if not cls.mixin: if cls.in_subset and subset.name in cls.in_subset: self.bullet(self.class_link(cls, use_desc=True), 0) self.header(3, 'Mixins') for cls in sorted(self.schema.classes.values(), key=lambda c: c.name): if cls.mixin: if cls.in_subset and subset.name in cls.in_subset: self.bullet(self.class_link(cls, use_desc=True), 0) self.header(3, 'Slots') for slot in sorted(self.schema.slots.values(), key=lambda s: s.name): if slot.in_subset and subset.name in slot.in_subset: self.bullet(self.slot_link(slot, use_desc=True), 0) self.header(3, 'Types') for type in sorted(self.schema.types.values(), key=lambda s: s.name): if type.in_subset and subset.name in type.in_subset: self.bullet(self.type_link(type, use_desc=True), 0) self.header(3, 'Enums') for enum in sorted(self.schema.enums.values(), key=lambda s: s.name): if enum.in_subset and subset.name in enum.in_subset: self.bullet(self.enum_link(type, use_desc=True), 0) self.element_properties(subset)
def _link(self, obj: Optional[Element], *, after_link: str = None, use_desc: bool = False, add_subset: bool = True) -> str: """ Create a link to ref if appropriate. @param ref: the name or value of a class, slot, type or the name of a built in type. @param after_link: Text to put between link and description @param use_desc: True means append a description after the link if available @param add_subset: True means add any subset information that is available @return: """ nl = '\n' if obj is None or not self.is_secondary_ref(obj.name): return self.bbin(obj) if isinstance(obj, SlotDefinition): # link_name = ((be(obj.domain) + '➞') if obj.alias else '') + self.aliased_slot_name(obj) link_name = self.aliased_slot_name(obj) link_ref = underscore(obj.name) elif isinstance(obj, TypeDefinition): link_name = camelcase(obj.name) link_ref = f"types/{link_name}" if not self.no_types_dir else f"{link_name}" elif isinstance(obj, ClassDefinition): link_name = camelcase(obj.name) link_ref = camelcase(link_name) elif isinstance(obj, SubsetDefinition): link_name = camelcase(obj.name) link_ref = camelcase(link_name) else: link_name = obj.name link_ref = link_name desc = self.desc_for(obj, use_desc) return f'[{link_name}]' \ f'({link_ref}.{self.format})' + \ (f' {after_link} ' if after_link else '') + (f' - {desc.split(nl)[0]}' if desc else '')
def visit_slot(self, aliased_slot_name: str, slot: SlotDefinition) -> None: with open(self.exist_warning(self.dir_path(slot)), 'w') as slotfile: with redirect_stdout(slotfile): slot_curie = self.namespaces.uri_or_curie_for( self.namespaces._base, underscore(slot.name)) slot_uri = self.namespaces.uri_for(slot_curie) self.element_header(slot, aliased_slot_name, slot_curie, slot_uri) self.mappings(slot) self.header(2, 'Domain and Range') print( f'{self.class_link(slot.domain)} →{self.predicate_cardinality(slot)} ' f'{self.class_type_link(slot.range)}') self.header(2, 'Parents') if slot.is_a: self.bullet(f' is_a: {self.slot_link(slot.is_a)}') self.header(2, 'Children') if slot.name in sorted(self.synopsis.isarefs): for child in sorted( self.synopsis.isarefs[slot.name].slotrefs): self.bullet(f' {self.slot_link(child)}') self.header(2, 'Used by') if slot.name in sorted(self.synopsis.slotrefs): for rc in sorted( self.synopsis.slotrefs[slot.name].classrefs): self.bullet(f'{self.class_link(rc)}') if aliased_slot_name == 'relation': if slot.subproperty_of: self.bullet( f' reifies: {self.slot_link(slot.subproperty_of) if slot.subproperty_of in self.schema.slots else slot.subproperty_of}' ) self.element_properties(slot)
def visit_subset(self, subset: SubsetDefinition) -> None: """ Visit a given subset definition and write the following properties in Markdown, - Classes - Mixins - Slots - Types - Enums Parameters ---------- cls: linkml_runtime.linkml_model.meta.SubsetDefinition A SubsetDefinition """ grand_parent = self.doc_root_title parent = "Subsets" seen_subset_elements = set() with open(self.exist_warning(self.dir_path(subset)), 'w', encoding='UTF-8') as subsetfile: with redirect_stdout(subsetfile): curie = self.namespaces.uri_or_curie_for( str(self.namespaces._base), underscore(subset.name)) uri = self.namespaces.uri_for(curie) self.frontmatter( **{ 'grand_parent': grand_parent, 'parent': parent, 'title': curie, 'layout': 'default' }) self.element_header(obj=subset, name=subset.name, curie=curie, uri=uri) # TODO: consider showing hierarchy within a subset self.header(3, 'Classes') for cls in sorted(self.schema.classes.values(), key=lambda c: c.name.lower()): if not cls.mixin: if cls.in_subset and subset.name in cls.in_subset: if cls.name not in seen_subset_elements: seen_subset_elements.add(cls.name) self.bullet( self.class_link(cls, use_desc=True), 0) self.header(3, 'Mixins') for cls in sorted(self.schema.classes.values(), key=lambda c: c.name.lower()): if cls.mixin: if cls.in_subset and subset.name in cls.in_subset: if cls.name not in seen_subset_elements: seen_subset_elements.add(cls.name) self.bullet( self.class_link(cls, use_desc=True), 0) self.header(3, 'Slots') for slot in sorted(self.schema.slots.values(), key=lambda s: s.name.lower()): if slot.in_subset and subset.name in slot.in_subset: if slot.alias and slot.usage_slot_name: slot = self.schema.slots[slot.usage_slot_name] if slot.name not in seen_subset_elements: seen_subset_elements.add(slot.name) self.bullet(self.slot_link(slot, use_desc=True), 0) self.header(3, 'Types') for type in sorted(self.schema.types.values(), key=lambda s: s.name.lower()): if type.in_subset and subset.name in type.in_subset: self.bullet(self.type_link(type, use_desc=True), 0) self.header(3, 'Enums') for enum in sorted(self.schema.enums.values(), key=lambda s: s.name.lower()): if enum.in_subset and subset.name in enum.in_subset: self.bullet(self.enum_link(enum, use_desc=True), 0) self.element_properties(subset)
def visit_slot(self, aliased_slot_name: str, slot: SlotDefinition) -> None: """ Visit a given slot definition and write the following properties in Markdown, - Frontmatter - Mappings - Description - Domain and Range constraints - Parents - Children - Used by Parameters ---------- cls: linkml_runtime.linkml_model.meta.SlotDefinition A SlotDefinition """ if not slot.alias: with open(self.dir_path(slot), 'w') as slotfile: with redirect_stdout(slotfile): slot_curie = self.namespaces.uri_or_curie_for( self.namespaces._base, underscore(slot.name)) slot_uri = self.namespaces.uri_for(slot_curie) ancs = self.ancestors(slot) if 'related to' in ancs: if slot.mixin: parent = 'Slot Mixins' else: parent = 'Predicates' grand_parent = 'Slots' slot_type = 'Relation' elif 'node property' in ancs: if slot.mixin: parent = 'Slot Mixins' else: parent = 'Node Properties' grand_parent = 'Slots' slot_type = 'Slot' elif 'association slot' in ancs: if slot.mixin: parent = 'Slot Mixins' else: parent = 'Edge Properties' grand_parent = 'Slots' slot_type = 'Slot' else: if slot.mixin: parent = 'Slot Mixins' else: parent = 'Other Slots' grand_parent = 'Slots' slot_type = 'Slot' self.frontmatter( **{ 'parent': parent, 'title': slot_curie, 'grand_parent': grand_parent, 'layout': 'default' }) simple_name = slot_curie.split(':', 1)[1] self.header( 1, f"{slot_type}: {simple_name}" + (f" _(deprecated)_" if slot.deprecated else "")) for s in slot.in_subset: self.badges(s, f'{s}-subset-label') self.para(be(slot.description)) print(f'URI: [{slot_curie}]({slot_uri})') self.header(2, 'Domain and Range') print( f'{self.class_link(slot.domain)} ->{self.predicate_cardinality(slot)} ' f'{self.class_type_link(slot.range)}') self.header(2, 'Parents') if slot.is_a: self.bullet(f' is_a: {self.slot_link(slot.is_a)}') self.header(2, 'Children') if slot.name in sorted(self.synopsis.isarefs): for child in sorted( self.synopsis.isarefs[slot.name].slotrefs): child_slot = self.schema.slots[child] if not child_slot.alias: self.bullet(f' {self.slot_link(child)}') self.header(2, 'Used by') if slot.name in sorted(self.synopsis.slotrefs): for rc in sorted( self.synopsis.slotrefs[slot.name].classrefs): self.bullet(f'{self.class_link(rc)}') if aliased_slot_name == 'relation': if slot.subproperty_of: self.bullet( f' reifies: {self.slot_link(slot.subproperty_of) if slot.subproperty_of in self.schema.slots else slot.subproperty_of}' ) self.element_properties(slot)
def convert(self) -> Dict[str, Any]: """ Converts set of inputs to a schema :return: link schema as a python Dictionary """ trim_strings = lambda x: x.strip() if isinstance(x, str) else x core_df = pd.read_csv(self.core_filename, sep="\t").fillna("").applymap(trim_strings) pkg_df = pd.read_csv(self.packages_filename, sep="\t").fillna("").applymap(trim_strings) slots = { 'core field': { 'abstract': True, 'description': "basic fields" }, 'investigation field': { 'abstract': True, 'description': "field describing aspect of the investigation/study to which the sample belongs" }, 'nucleic acid sequence source field': { 'abstract': True }, 'sequencing field': { 'abstract': True }, 'mixs extension field': { 'abstract': True }, 'environment field': { 'abstract': True, 'description': "field describing environmental aspect of a sample" } } classes = {} subsets = {} enums = {} obj = { 'id': f'http://w3id.org/mixs', 'name': 'MIxS', 'description': 'Minimal Information about any Sequence Standard', 'imports': ['linkml:types', 'checklists', 'core'], 'prefixes': { 'linkml': 'https://w3id.org/linkml/', 'mixs.vocab': 'https://w3id.org/mixs/vocab/', 'MIXS': 'https://w3id.org/mixs/terms/', 'MIGS': 'https://w3id.org/mixs/migs/', }, 'default_prefix': 'mixs.vocab', 'slots': {}, 'classes': classes, 'subsets': subsets } # TODO: make configurable whether this is in main schema or import rschema = new_schema('ranges') for k, v in datatype_schema.items(): rschema[k] = v self.save_schema(rschema, 'ranges.yaml') cls_slot_req = {} slot_cls_req = {} core_slots = [] core_env_slots = [] core_slot_dict = {} # PARSE CORE for _, row in core_df.iterrows(): s_id, slot = self.create_slot(row, enums=enums) if s_id is None: continue slots[s_id] = slot core_slot_dict[s_id] = row core_slots.append(s_id) if row['Section'] == 'environment': core_env_slots.append(s_id) for checklist, info in CHECKLISTS.items(): checklist_slot_usage = {} checklist_name = info['name'] for s_id, s_row in core_slot_dict.items(): cardinality = s_row[checklist] # information about whether an item is: # - mandatory (M) # - conditional mandatory (C) # - optional (X) # - environment-dependent (E) # - or not applicable (-) if cardinality != 'E': usage = {} if cardinality == 'M': usage['required'] = True elif cardinality == 'X': usage['required'] = False elif cardinality == 'C': usage['recommended'] = True #elif cardinality == '-': # usage['comments'] = ['not applicable'] if usage != {}: checklist_slot_usage[s_id] = usage classes[checklist_name] = { 'mixin': True, 'description': info['fullname'], 'aliases': [info['abbrev']], 'see_also': info.get('see_also', []), #'todos': ['add details here'], 'slots': list(checklist_slot_usage.keys()), 'slot_usage': checklist_slot_usage } classes[CORE_PACKAGE_NAME] = { 'description': 'Core package. Do not use this directly, this is used to build other packages', 'slots': core_slots } env_packages = [] # PARSE PACKAGES for _, row in pkg_df.iterrows(): in_core_and_package = False p = row['Environmental package'] req = row['Requirement'] is_required = req == 'M' cn = safe(p.lower()) if cn not in classes: env_packages.append(cn) cls_slot_req[cn] = {} classes[cn] = { #'is_a': CORE_PACKAGE_NAME, 'description': p, 'mappings': [], 'slots': list(core_env_slots), 'slot_usage': {} } c = classes[cn] s_id, slot = self.create_slot(row, enums=enums) if s_id is not None: c['slot_usage'][s_id] = {'required': is_required} cls_slot_req[cn][s_id] = req if s_id not in slots: slots[s_id] = slot else: in_core_and_package = True slot['todos'] = ['this is in both core and packages'] if s_id not in slot_cls_req: slot_cls_req[s_id] = {} slot_cls_req[s_id][cn] = req if s_id not in core_slots: c['slots'].append(s_id) # n_cls = len(cls_slot_req.keys()) # inf_core_slots = [] # for s_id, s in slot_cls_req.items(): # packages_str = ', '.join(list(s.keys())) # if len(s.keys()) == n_cls: # inf_core_slots.append(s_id) # cmt = "This field is used in all packages" # elif len(s.keys()) == 1: # cmt = f"This field is used uniquely in: {packages_str}" # else: # cmt = f"This field is used in: {len(s.keys())} packages: {packages_str}" # slots[s_id]['comments'].append(cmt) for p in env_packages: for checklist, info in CHECKLISTS.items(): name = info['name'] fullname = info['fullname'] combo = f'{p} {name}' classes[combo] = { 'is_a': p, 'mixins': [name], 'description': f'Combinatorial checklist {fullname} with environmental package {p}' } pname = underscore(p).replace("-", "_") obj['imports'].append(pname) pschema = new_schema(pname) pschema['imports'].append('terms') pschema['classes'] = {p: classes[p]} del classes[p] self.save_schema(pschema, f'{pname}.yaml') slot_schema = new_schema('terms') slot_schema['imports'].append('ranges') slot_schema['slots'] = slots slot_schema['enums'] = enums slot_schema['subsets'] = { 'checklist': { 'description': 'A MIxS checklist. These can be combined with packages' }, 'package': { 'description': 'A MIxS package. These can be combined with checklists' }, 'checklist_package_combination': { 'description': 'A combination of a checklist and a package' } } self.save_schema(slot_schema, 'terms.yaml') core_schema = new_schema('core') core_schema['imports'].append('terms') core_schema['classes'] = {'core': obj['classes']['core']} del obj['classes']['core'] self.save_schema(core_schema, 'core.yaml') checklist_schema = new_schema('checklists') checklist_schema['imports'].append('terms') checklist_schema['classes'] = obj['classes'] obj['classes'] = {} self.save_schema(checklist_schema, 'checklists.yaml') return obj