def setUp(self): # preload schemaorg-only schema self.se = Schema(base_schema=["schema.org"]) # test list_all_classes self.clses = self.se.list_all_classes() # test list_all_properties self.props = self.se.list_all_properties()
class TestSchemaOrg(unittest.TestCase): """Using SchemaOrg Schema to test all functions in biothings_schema """ def setUp(self): # preload biothings schema PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' self.se = Schema(PATH) # test list_all_classes self.clses = self.se.list_all_classes() # test list_all_properties self.props = self.se.list_all_properties() def test_schemaclass_class(self): """ Test the SchemaClass Class using all classes in BioThings schema""" # loop through all classes for _cls in self.clses: # test get_class scls = self.se.get_class(_cls.name) # test describe function describe = scls.describe() def test_schemaproperty_class(self): """ Test the SchemaProperty Class using all classes in BioThings schema """ # loop through all properties for _prop in self.props: # test get_property sp = self.se.get_property(_prop.name) # test describe function describe = sp.describe()
def setUp(self): # preload biothings schema PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' self.se = Schema(PATH) # test list_all_classes self.clses = self.se.list_all_classes() # test list_all_properties self.props = self.se.list_all_properties()
def __init__(self, se=None): if not se: self.se = Schema(self.BIOTHINGS_SCHEMA_PATH) else: self.se = se # list all properties which are descendants of identifiers self.id_list = self.se.get_property("identifier", output_type="curie").descendant_properties # get all classes defined in biothings schema JSON-LD file self.defined_clses = [_item.name for _item in self.se.list_all_defined_classes()] # list of properties whose "range" is among defined classes self.linked_prop_list = [_prop.name for _prop in self.se.list_all_defined_properties() if set([_item.name for _item in _prop.range]) & set(self.defined_clses)] self.cls_prop_clsf = {}
def test_initialization_with_context_works(self): biothings_jsonld_path = os.path.join(_CURRENT, 'data', 'biothings_test.jsonld') schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' biothings_schema = load_json_or_yaml(biothings_jsonld_path) self.se_with_context = Schema(schema_url, biothings_schema['@context']) self.assertEqual(self.se_with_context.schema, self.se.schema)
def test_schema_should_not_merge_validation_property_on_nested_classes_if_flag_set_to_false( self): """ Testing merge_recursive_parents function implicitly with merging set to false """ nested_schema_path = os.path.join(_CURRENT, 'data', 'nested_schema.json') nested_schema = load_json_or_yaml(nested_schema_path) # test that data is correctly inserted beforehand self.assertEqual( len(nested_schema['@graph'][0]['$validation']['properties']), 15) self.assertEqual( nested_schema['@graph'][0]['$validation']['properties']['name'] ['description'], 'The name of the Cvisb Dataset') self.assertEqual( nested_schema['@graph'][0]['$validation']['properties']['name'] ['type'], 'string') self.assertEqual( len(nested_schema['@graph'][2]['$validation']['properties']), 1) self.assertEqual( nested_schema['@graph'][2]['$validation']['properties']['name'] ['description'], 'Test description') self.assertEqual( nested_schema['@graph'][2]['$validation']['properties']['name'] ['type'], 'number') self.assertEqual( len(nested_schema['@graph'][3]['$validation']['properties']), 1) self.assertEqual( nested_schema['@graph'][3]['$validation']['properties']['name'] ['type'], 'boolean') schema_nx = Schema(nested_schema, validation_merge=False) # data should remain the same after schema creation self.assertEqual( len(nested_schema['@graph'][0]['$validation']['properties']), 15) self.assertEqual( nested_schema['@graph'][0]['$validation']['properties']['name'] ['description'], 'The name of the Cvisb Dataset') self.assertEqual( nested_schema['@graph'][0]['$validation']['properties']['name'] ['type'], 'string') self.assertEqual( len(nested_schema['@graph'][2]['$validation']['properties']), 1) self.assertEqual( nested_schema['@graph'][2]['$validation']['properties']['name'] ['description'], 'Test description') self.assertEqual( nested_schema['@graph'][2]['$validation']['properties']['name'] ['type'], 'number') self.assertEqual( len(nested_schema['@graph'][3]['$validation']['properties']), 1) self.assertEqual( nested_schema['@graph'][3]['$validation']['properties']['name'] ['type'], 'boolean')
class TestSchemaClass(unittest.TestCase): """Test Schema Validator Class """ def setUp(self): schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' self.se = Schema(schema_url) def test_list_all_classes(self): """ Test list_all_classes function """ all_cls = self.se.list_all_classes() all_cls_names = [_cls.name for _cls in all_cls] # assert root level Class in all classes self.assertIn('schema:Thing', all_cls_names) # assert class "Gene" in all classes self.assertIn('bts:Gene', all_cls_names) # class 'ffff' should not be one of the classes self.assertNotIn('bts:ffff', all_cls_names) # class name should be curie self.assertNotIn('Thing', all_cls_names) # assert type of the class is SchemaClass self.assertEqual(SchemaClass, type(all_cls[0])) def test_list_all_properties(self): """ Test list_all_properties function""" all_props = self.se.list_all_properties() all_prop_names = [_prop.name for _prop in all_props] # assert "name" in all props self.assertIn('schema:name', all_prop_names) # property name should be curie self.assertNotIn('name', all_prop_names) # assert "ffff" should not be one of the props self.assertNotIn('bts:ffff', all_prop_names) # assert type of the property is SchemaProperty self.assertEqual(SchemaProperty, type(all_props[0])) def test_get_class(self): """ Test get_class function""" scls = self.se.get_class("schema:Gene") self.assertEqual(SchemaClass, type(scls)) def test_get_property(self): """ Test get_property function""" sp = self.se.get_property("ensembl") self.assertEqual(SchemaProperty, type(sp))
def setUp(self): schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' self.se = Schema(schema_url) # test response if input is NAME only sp = self.se.get_property("ensembl") self.assertEqual(sp.name, "bts:ensembl") self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl") self.assertEqual(sp.label, "ensembl") # test response if input is CURIE only sp = self.se.get_property("bts:ensembl") self.assertEqual(sp.name, "bts:ensembl") self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl") self.assertEqual(sp.label, "ensembl") # test response if input is URI only sp = self.se.get_property("http://schema.biothings.io/ensembl") self.assertEqual(sp.name, "bts:ensembl") self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl") self.assertEqual(sp.label, "ensembl")
class TestSchemaOrg(unittest.TestCase): """Using SchemaOrg Schema to test all functions in biothings_schema """ def setUp(self): # preload schemaorg-only schema self.se = Schema(base_schema=["schema.org"]) # test list_all_classes self.clses = self.se.list_all_classes() # test list_all_properties self.props = self.se.list_all_properties() def test_schemaclass_class(self): """ Test the SchemaClass Class using all classes in Schemaorg schema""" # loop through all classes for _cls in self.clses: # test get_class scls = self.se.get_class(_cls.name) # test describe function describe = scls.describe() scls = self.se.get_class(_cls.name, output_type="curie") describe = scls.describe() scls = self.se.get_class(_cls.name, output_type="uri") describe = scls.describe() scls = self.se.get_class(_cls.name, output_type="label") describe = scls.describe() def test_schemaproperty_class(self): """ Test the SchemaProperty Class using all classes in Schemaorg schema """ # loop through all properties for _prop in self.props: # test get_property sp = self.se.get_property(_prop.name) # test describe function describe = sp.describe() sp = self.se.get_property(_prop.name, output_type="curie") # test describe function describe = sp.describe() sp = self.se.get_property(_prop.name, output_type="uri") # test describe function describe = sp.describe() sp = self.se.get_property(_prop.name, output_type="label") # test describe function describe = sp.describe()
def setUp(self): biothings_jsonld_path = os.path.join(_CURRENT, 'data', 'biothings_test.jsonld') biothings_schema = load_json_or_yaml(biothings_jsonld_path) schema_nx = Schema(biothings_schema) self.sv = SchemaValidator(biothings_schema, schema_nx) biothings_duplicate = os.path.join(_CURRENT, 'data', 'biothings_duplicate_test.jsonld') duplicate_schema = load_json_or_yaml(biothings_duplicate) self.sv_duplicate = SchemaValidator(duplicate_schema, schema_nx)
class TestSchemaClass(unittest.TestCase): """Test Schema Validator Class """ def setUp(self): schema_file = os.path.join(_CURRENT, 'data', 'extend_from_bioschemas.json') self.se = Schema(schema_file) def test_list_all_classes(self): """ Test list_all_classes function """ all_cls = self.se.list_all_classes() all_cls_names = [_cls.name for _cls in all_cls] # assert root level Class in all classes self.assertIn('bioschemas:Gene', all_cls_names) # class name should be curie self.assertNotIn('Gene', all_cls_names) # assert type of the class is SchemaClass self.assertEqual(SchemaClass, type(all_cls[0])) def test_list_all_properties(self): """ Test list_all_properties function""" all_props = self.se.list_all_properties() all_prop_names = [_prop.name for _prop in all_props] # assert "name" in all props self.assertIn('schema:name', all_prop_names) # property name should be curie self.assertNotIn('name', all_prop_names) # assert "ffff" should not be one of the props self.assertNotIn('bts:ffff', all_prop_names) # assert type of the property is SchemaProperty self.assertEqual(SchemaProperty, type(all_props[0])) def test_get_class(self): """ Test get_class function""" scls = self.se.get_class("bioschemas:Gene") self.assertEqual(SchemaClass, type(scls)) def test_get_property(self): """ Test get_property function""" sp = self.se.get_property("bioschemas:encodesBioChemEntity") self.assertEqual(SchemaProperty, type(sp))
def timeit(): start = time.time() PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' se = Schema(PATH) clses = se.list_all_classes() for _cls in clses: es_class = {'schema': None, 'name': None, 'clses': None, 'props': []} es_class['schema'] = _cls.prefix es_class['name'] = _cls.label es_class['clses'] = [', '.join(map(str, schemas)) for schemas in _cls.parent_classes] for prop in _cls.list_properties(group_by_class=False): info = prop.describe() _property = {'name': str(prop), 'value_types': [str(_type) for _type in info['range']], 'description': info.get('description')} es_class['props'].append(_property) end = time.time() print(end - start)
def load_biothings(self): """Load biothings API into registry network graph.""" # load biothings schema BIOTHINGS_SCHEMA_PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' se = Schema(BIOTHINGS_SCHEMA_PATH) self.mp = MappingParser(se) # loop through API metadata for _api, _info in metadata.items(): # use the mapping parser module to load relationship of each API # into the network if 'mapping_url' in _info: self.registry[_api] = {} self.mp.load_mapping(_info['mapping_url'], _api) self.registry[_api]['mapping'] = self.mp.mapping self.registry[_api]['graph'] = self.mp.connect() self.registry[_api]['type'] = self.mp.type self.G.add_edges_from( self.registry[_api]['graph'].edges(data=True)) return self.G
class SchemaExtractor(): """Extract BioThings Schema and construct networkx graph.""" def __init__(self, schema): """Load biothings schema.""" self.se = Schema(schema) # get all properties which are descendants of "identifier" property self.all_ids = self.se.get_property('identifier', output_type="curie").descendant_properties def find_descendants(self, lst): """Find all descendants for a list of schemaclass classes. :arg list lst: a list of schemaclass classes """ # if input is empty list, return an empty set if not lst: return set() # find descendant of each class and then merge together into a set dsc_lst = set(itertools.chain.from_iterable([self.se.get_class(_cls, output_type="curie").descendant_classes for _cls in lst])) return dsc_lst def find_cls_ids(self, _cls): """Find all identifiers which belongs to a class. :arg cls _cls: a SchemaClass instance """ # get all properties belong to the cls which are descendants of "identifiers" properties = [_prop['curie'] for _prop in self.se.get_class(_cls).list_properties(group_by_class=False) if _prop and _prop['curie'] in self.all_ids] return properties def schema2networkx(self): """Convert schema into a networkx graph. Logics ~~~~~~ Each identifier represents a node node properties include its semantic type (class name) The edge is represented by non-identifier properties """ G = nx.DiGraph() # list all properties defined in the schema properties = self.se.list_all_defined_properties() for _property in properties: if _property not in self.all_ids: # find all descendants of domain classes input_clses = set([_cls.name for _cls in _property.domain if _cls.uri in self.se.full_class_only_graph]) input_clses |= self.find_descendants(input_clses) # find all descendants of range classes output_clses = set([_cls.name for _cls in _property.range if _cls.uri in self.se.full_class_only_graph]) output_clses |= self.find_descendants(output_clses) if input_clses and output_clses: input_ids = set(itertools.chain.from_iterable([self.find_cls_ids(_cls) for _cls in input_clses])) output_ids = set(itertools.chain.from_iterable([self.find_cls_ids(_cls) for _cls in output_clses])) if input_ids and output_ids: G.add_edges_from(zip(input_ids, output_ids), label=_property.label) else: continue else: continue return G
class MappingParser(): """Parse the mapping file between biothings schema and biothings API""" BIOTHINGS_SCHEMA_PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' def __init__(self, se=None): if not se: self.se = Schema(self.BIOTHINGS_SCHEMA_PATH) else: self.se = se # list all properties which are descendants of identifiers self.id_list = self.se.get_property("identifier", output_type="curie").descendant_properties # get all classes defined in biothings schema JSON-LD file self.defined_clses = [_item.name for _item in self.se.list_all_defined_classes()] # list of properties whose "range" is among defined classes self.linked_prop_list = [_prop.name for _prop in self.se.list_all_defined_properties() if set([_item.name for _item in _prop.range]) & set(self.defined_clses)] self.cls_prop_clsf = {} def load_mapping(self, mapping, api=None): self.mapping = load_json_or_yaml(mapping) self.api = api def classify_keys_in_json(self, json_doc): """ classify the keys in a json doc""" result = defaultdict(list) for _key in json_doc.keys(): if _key in self.id_list: result['id'].append(_key) elif _key in self.linked_prop_list: result['links'].append(_key) return result def connect(self): G = nx.MultiDiGraph() self.type = self.mapping.get("@type") # classify the keys in the JSON doc clsf = self.classify_keys_in_json(self.mapping) # for each "links" properties, find its ids for predicate in clsf['links']: if type(self.mapping[predicate]) == dict: self.mapping[predicate] = [self.mapping[predicate]] for _pred in self.mapping[predicate]: if "@type" in _pred: sp = self.se.get_property(predicate) obj_clsf = self.classify_keys_in_json(_pred) common_prefix = find_common_path(get_dict_values(_pred)) input_id = [_pred['$input']] if '$input' in _pred else clsf['id'] source = _pred['$source'] if '$source' in _pred else self.api for _edge in itertools.product(input_id, obj_clsf['id']): output_field = _pred[_edge[1]] input_field = self.mapping[_edge[0]] if type(input_field) == list: input_field = ','.join(input_field) if type(output_field) == list: output_field = ','.join(output_field) G.add_edge(_edge[0], _edge[1], label=predicate, mapping_key=predicate, api=self.api, source=source, input_field=input_field, input_type=self.mapping["@type"], input_id=_edge[0], output_id=_edge[1], output_type=_pred["@type"], output_field=common_prefix if common_prefix else output_field) if metadata[self.api].get('api_type') == 'biothings': inverse_property = None if not sp.inverse_property else sp.inverse_property.name if not inverse_property: print(predicate) G.add_edge(_edge[1], _edge[0], api=self.api, input_field=output_field, input_type=_pred["@type"], source=source, input_id=_edge[1], output_id=_edge[0], output_type=self.mapping["@type"], output_field=input_field, label=inverse_property, mapping_key=_edge[0]) return G
def test_schema_should_correctly_merge_validation_property_on_nested_classes( self): """ Testing merge_recursive_parents function implicitly """ nested_schema_path = os.path.join(_CURRENT, 'data', 'nested_schema.json') nested_schema = load_json_or_yaml(nested_schema_path) # test that data is correctly inserted beforehand self.assertEqual( len(nested_schema['@graph'][0]['$validation']['properties']), 15) self.assertEqual( nested_schema['@graph'][0]['$validation']['properties']['name'] ['description'], 'The name of the Cvisb Dataset') self.assertEqual( nested_schema['@graph'][0]['$validation']['properties']['name'] ['type'], 'string') self.assertEqual( len(nested_schema['@graph'][2]['$validation']['properties']), 1) self.assertEqual( nested_schema['@graph'][2]['$validation']['properties']['name'] ['description'], 'Test description') self.assertEqual( nested_schema['@graph'][2]['$validation']['properties']['name'] ['type'], 'number') self.assertEqual( len(nested_schema['@graph'][3]['$validation']['properties']), 1) self.assertEqual( nested_schema['@graph'][3]['$validation']['properties']['name'] ['type'], 'boolean') schema_nx = Schema(nested_schema) # make sure schema is correctly merged after # Root class should stay the same self.assertEqual( len(nested_schema['@graph'][0]['$validation']['properties']), 15) self.assertEqual( nested_schema['@graph'][0]['$validation']['properties']['name'] ['description'], 'The name of the Cvisb Dataset') self.assertEqual( nested_schema['@graph'][0]['$validation']['properties']['name'] ['type'], 'string') # the rest of the properties should be inherited from the root class self.assertEqual( len(nested_schema['@graph'][2]['$validation']['properties']), 15) # description and type should override parent class self.assertEqual( nested_schema['@graph'][2]['$validation']['properties']['name'] ['description'], 'Test description') self.assertEqual( nested_schema['@graph'][2]['$validation']['properties']['name'] ['type'], 'number') # the rest of the properties should be inherited from the root class self.assertEqual( len(nested_schema['@graph'][3]['$validation']['properties']), 15) # description should be inherited from the parent class self.assertEqual( nested_schema['@graph'][3]['$validation']['properties']['name'] ['description'], 'Test description') self.assertEqual( nested_schema['@graph'][3]['$validation']['properties']['name'] ['type'], 'boolean')
class TestSchemaPropertyClass(unittest.TestCase): """Test SchemaProperty Class """ def setUp(self): schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' self.se = Schema(schema_url) # test response if input is NAME only sp = self.se.get_property("ensembl") self.assertEqual(sp.name, "bts:ensembl") self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl") self.assertEqual(sp.label, "ensembl") # test response if input is CURIE only sp = self.se.get_property("bts:ensembl") self.assertEqual(sp.name, "bts:ensembl") self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl") self.assertEqual(sp.label, "ensembl") # test response if input is URI only sp = self.se.get_property("http://schema.biothings.io/ensembl") self.assertEqual(sp.name, "bts:ensembl") self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl") self.assertEqual(sp.label, "ensembl") def test_initialization(self): # if input property is not in schema, defined_in_schema should be False sp = SchemaProperty('dd', self.se) self.assertFalse(sp.defined_in_schema) def test_parent_properties(self): """ Test parent_properties function """ sp = self.se.get_property("ensembl") parents = sp.parent_properties # check the first item of should be 'Thing' self.assertIn("schema:identifier", [_item.name for _item in parents]) # check negative cases self.assertNotIn("bts:sgd", [_item.name for _item in parents]) # if input doesn't have parent properties, should return empty list sp = self.se.get_property("identifier") parents = sp.parent_properties self.assertEqual(parents, []) # test if input is not defined sp = self.se.get_property('dd') parents = sp.parent_properties self.assertEqual(parents, []) def test_child_properties(self): """ Test child_properties function""" sp = self.se.get_property("identifier") children = sp.child_properties child_names = [_item.name for _item in children] # check if ensembl is in descendants self.assertIn('bts:ensembl', child_names) # check if affectsExpressionOf is in descendants self.assertNotIn('bts:affectsExpressionOf', child_names) # check itself should not in descendants self.assertNotIn('schema:identifier', child_names) # test if input property is the leaf property sp = self.se.get_property("ensembl") children = sp.child_properties self.assertEqual(children, []) # test if input is not defined sp = self.se.get_property("dd") children = sp.child_properties self.assertEqual(children, []) def test_describe(self): """test describe function""" sp = self.se.get_property("dd") describe = sp.describe() self.assertEqual(describe, {})
class SchemaAdapter(): """ Manage a biothings_schema.Schema instance. Provide native type custom format schema class lists. """ def __init__(self, doc=None, **kwargs): contexts = ESSchema.gather_field('@context') self._schema = SchemaParser(schema=doc, context=contexts, **kwargs) self._classes_defs = self._schema.list_all_defined_classes() self._classes_refs = self._schema.list_all_referenced_classes() def __getattr__(self, attr): return getattr(self._schema, attr) def get_class_defs(self): """get only classes defined in this schema each {} will have a field ref: false""" return list(self._get_class_defs().values()) def get_class_refs(self): """get only classes referenced outside this schema each {} will have a field ref: true""" return list(self._get_class_refs().values()) def get_classes(self, include_ref=True): """get all classes and label them if they are referenced if include_ref is False, only "defined" classes are included. """ defs = self._get_class_defs() ans = {} ans.update(defs) if include_ref: refs = self._get_class_refs() ans.update(refs) return list(ans.values()) @staticmethod def _get_class_info(schema_class): ans = {} # biothings_schema.SchemaClass -> { ... } schema_class = SchemaClassWrapper(schema_class) for key in ('name', 'uri', 'prefix', 'label', 'description', 'parent_classes', 'properties', 'validation'): try: ans[key] = getattr(schema_class, key) except AttributeError: pass logging.info(ans['name']) return ans def _get_class_defs(self): ans = {} for schema_class in self._classes_defs: if schema_class.name not in ans: _schema_class = self._get_class_info(schema_class) _schema_class['ref'] = False ans[schema_class.name] = _schema_class return ans def _get_class_refs(self): ans = {} for schema_class in self._classes_refs: if schema_class.name not in ans: _schema_class = self._get_class_info(schema_class) _schema_class['ref'] = True ans[schema_class.name] = _schema_class return ans def has_validation_error(self): """return True if there is at least one validation error.""" for err in self._schema.validator.validation_errors: if not err.warning: return True return False def get_validation_errors(self): """return validation errors as a list of dictionaries""" return [ err.to_dict() for err in self._schema.validator.validation_errors ]
def test_extended_schema_validator_works_as_expected(self): schema_extended_url = 'https://raw.githubusercontent.com/BioSchemas/specifications/master/Gene/jsonld/Gene_v1.0-RELEASE.json' schema = Schema(schema_extended_url)
class SchemaAdapter(): """ Manage a biothings_schema.Schema instance. Provide native type custom format schema class lists. """ def __init__(self, doc=None): contexts = ESSchema.gather_field('@context') self._schema = SchemaParser(doc, contexts) self._classes_defs = self._schema.list_all_defined_classes() self._classes_refs = self._schema.list_all_referenced_classes() def __getattr__(self, attr): return getattr(self._schema, attr) def get_class_defs(self): # get only classes defined in this schema # each {} will have a field ref: false return list(self._get_class_defs().values()) def get_class_refs(self): # get only classes referenced outside this schema # each {} will have a field ref: true return list(self._get_class_refs().values()) def get_classes(self): # get all classes and label them if they are referenced defs = self._get_class_defs() refs = self._get_class_refs() ans = {} ans.update(defs) ans.update(refs) return list(ans.values()) @staticmethod def _get_class_info(schema_class): ans = {} # biothings_schema.SchemaClass -> { ... } schema_class = SchemaClassWrapper(schema_class) for key in ('name', 'uri', 'prefix', 'label', 'description', 'parent_classes', 'properties', 'validation'): try: ans[key] = getattr(schema_class, key) except AttributeError: pass logging.info(ans['name']) return ans def _get_class_defs(self): ans = {} for schema_class in self._classes_defs: if schema_class.name not in ans: _schema_class = self._get_class_info(schema_class) _schema_class['ref'] = False ans[schema_class.name] = _schema_class return ans def _get_class_refs(self): ans = {} for schema_class in self._classes_refs: if schema_class.name not in ans: _schema_class = self._get_class_info(schema_class) _schema_class['ref'] = True ans[schema_class.name] = _schema_class return ans
def setUp(self): schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' self.se = Schema(schema_url)
class TestSchemaClassClass(unittest.TestCase): """Test SchemaClass Class """ def setUp(self): schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' self.se = Schema(schema_url) def test_initialization_with_context_works(self): biothings_jsonld_path = os.path.join(_CURRENT, 'data', 'biothings_test.jsonld') schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld' biothings_schema = load_json_or_yaml(biothings_jsonld_path) self.se_with_context = Schema(schema_url, biothings_schema['@context']) self.assertEqual(self.se_with_context.schema, self.se.schema) def test_initialization(self): # if input class is not in schema, defined_in_schema should be False scls = self.se.get_class("dd") self.assertFalse(scls.defined_in_schema) # test response if input is NAME only scls = self.se.get_class("bts:Gene") self.assertEqual(scls.name, "bts:Gene") self.assertEqual(scls.uri, "http://schema.biothings.io/Gene") self.assertEqual(scls.label, "Gene") # test response if input is CURIE only scls = self.se.get_class("bts:Gene") self.assertEqual(scls.name, "bts:Gene") self.assertEqual(scls.uri, "http://schema.biothings.io/Gene") self.assertEqual(scls.label, "Gene") # test response if input is URI only scls = self.se.get_class("http://schema.biothings.io/Gene") self.assertEqual(scls.name, "bts:Gene") self.assertEqual(scls.uri, "http://schema.biothings.io/Gene") self.assertEqual(scls.label, "Gene") def test_parent_classes(self): """ Test parent_classes function """ scls = self.se.get_class("bts:Gene") parents = scls.parent_classes # check the first item of should be 'Thing' self.assertEqual(parents[0][0].name, 'schema:Thing') # if input is the root class, should return empty list scls = self.se.get_class("Thing") parents = scls.parent_classes self.assertEqual(parents, []) # check the response if class not exist scls = self.se.get_class("dd") parents = scls.parent_classes self.assertEqual(parents, []) ############################### # test if output_type is uri scls = self.se.get_class("bts:Gene", output_type="uri") parents = scls.parent_classes # check the first item of should be 'Thing' self.assertEqual(parents[0][0], 'http://schema.org/Thing') ############################### # test if output_type is label scls = self.se.get_class("bts:Gene", output_type="label") parents = scls.parent_classes # check the first item of should be 'Thing' self.assertEqual(parents[0][0], 'Thing') ############################### # test if output_type is curie scls = self.se.get_class("bts:Gene", output_type="curie") parents = scls.parent_classes # check the first item of should be 'Thing' self.assertEqual(parents[0][0], 'schema:Thing') def test_ancestor_classes(self): """ Test ancestor_classes function""" ############################### # test if output_type is python class scls = self.se.get_class("bts:MolecularEntity") ancestors = scls.ancestor_classes ancestor_names = [_item.name for _item in ancestors] # check if gene is in ancestors self.assertIn('schema:Thing', ancestor_names) self.assertIn('bts:BiologicalEntity', ancestor_names) # check if Gene is in ancestors (Gene is its child classs) self.assertNotIn('bts:Gene', ancestor_names) # check itself should not in ancestors self.assertNotIn('bts:MolecularEntity', ancestor_names) # test if input class is the root class scls = self.se.get_class("Thing") self.assertEqual(scls.ancestor_classes, []) # test if input class not exists scls = self.se.get_class("dd") self.assertEqual(scls.ancestor_classes, []) ############################### # test if output_type is curie scls = self.se.get_class("bts:MolecularEntity", output_type="curie") ancestors = scls.ancestor_classes # check if BiologicalEntity is in descendants self.assertIn('bts:BiologicalEntity', ancestors) self.assertIn('schema:Thing', ancestors) ############################### # test if output_type is label scls = self.se.get_class("bts:MolecularEntity", output_type="label") ancestors = scls.ancestor_classes # check if Thing is in ancestors self.assertIn('Thing', ancestors) self.assertIn('BiologicalEntity', ancestors) ############################### # test if output_type is uri scls = self.se.get_class("bts:MolecularEntity", output_type="uri") ancestors = scls.ancestor_classes # check if gene is in descendants self.assertIn('http://schema.biothings.io/BiologicalEntity', ancestors) self.assertIn('http://schema.org/Thing', ancestors) def test_descendant_classes(self): """ Test descendant_classes function""" ############################### # test if output_type is python class scls = self.se.get_class("bts:MolecularEntity") descendants = scls.descendant_classes descendant_names = [_item.name for _item in descendants] # check if gene is in descendants self.assertIn('bts:Gene', descendant_names) # check if Thing is in descendants (Thing is its parent classs) self.assertNotIn('schema:Thing', descendant_names) # check itself should not in descendants self.assertNotIn('bts:MolecularEntity', descendant_names) # test if input class is the leaf class scls = self.se.get_class("bts:Gene") descendants = scls.descendant_classes self.assertEqual(descendants, []) # test if input class not exists scls = self.se.get_class("dd") descendants = scls.descendant_classes self.assertEqual(descendants, []) ############################### # test if output_type is curie scls = self.se.get_class("bts:MolecularEntity", output_type="curie") descendants = scls.descendant_classes # check if gene is in descendants self.assertIn('bts:Gene', descendants) ############################### # test if output_type is label scls = self.se.get_class("bts:MolecularEntity", output_type="label") descendants = scls.descendant_classes # check if gene is in descendants self.assertIn('Gene', descendants) ############################### # test if output_type is uri scls = self.se.get_class("bts:MolecularEntity", output_type="uri") descendants = scls.descendant_classes # check if gene is in descendants self.assertIn('http://schema.biothings.io/Gene', descendants) def test_child_classes(self): """ Test child_classes function""" ############################### # test if output_type is python class scls = self.se.get_class("bts:MolecularEntity") children = scls.child_classes children_names = [_item.name for _item in children] # check if GeneFamily is in children self.assertIn('bts:GeneFamily', children_names) # check if gene is in children (gene is descendant) self.assertNotIn('bts:Gene', children_names) # check if Thing is in children (Thing is its parent classs) self.assertNotIn('schema:Thing', children_names) # check itself should not in children self.assertNotIn('bts:MolecularEntity', children_names) # test if input class is the leaf class scls = self.se.get_class("bts:Gene") children = scls.child_classes self.assertEqual(children, []) # test if input class is not defined scls = self.se.get_class("dd") children = scls.child_classes self.assertEqual(children, []) ############################### # test if output_type is curie scls = self.se.get_class("bts:MolecularEntity", output_type="curie") children = scls.child_classes # check if GeneFamily is in children self.assertIn('bts:GeneFamily', children) ############################### # test if output_type is uri scls = self.se.get_class("bts:MolecularEntity", output_type="uri") children = scls.child_classes # check if GeneFamily is in children self.assertIn('http://schema.biothings.io/GeneFamily', children) ############################### # test if output_type is label scls = self.se.get_class("bts:MolecularEntity", output_type="label") children = scls.child_classes # check if GeneFamily is in children self.assertIn('GeneFamily', children) def test_used_by(self): """ Test used_by function""" scls = self.se.get_class("bts:GenomicEntity") usage = scls.used_by() self.assertTrue(len(usage) > 1) self.assertEqual(list, type(usage)) # test if class is not defined scls = self.se.get_class("dd") usage = scls.used_by() self.assertEqual(usage, []) def test_describe(self): """test describe function""" scls = self.se.get_class("dd") describe = scls.describe() self.assertEqual(describe, {})
def __init__(self, doc=None): contexts = ESSchema.gather_field('@context') self._schema = SchemaParser(doc, contexts) self._classes_defs = self._schema.list_all_defined_classes() self._classes_refs = self._schema.list_all_referenced_classes()
def __init__(self, schema): """Load biothings schema.""" self.se = Schema(schema) # get all properties which are descendants of "identifier" property self.all_ids = self.se.get_property('identifier', output_type="curie").descendant_properties
# http://su07:9199/omicsdi/_search?q=* # %% [markdown] # ## Question # Thread #cvisb andrew 21 days ago # > For each repository, show how many datasets have each metadata field populated. # %% from collections import defaultdict, Counter from functools import partial from elasticsearch_dsl import Search from elasticsearch import Elasticsearch from biothings_schema import Schema schema = Schema() dataset = schema.get_class("schema:Dataset") properties = sorted([ prop['label'] for prop in dataset.list_properties( class_specific=False, group_by_class=False) ]) # %% client = Elasticsearch('su07:9199') indicies = ('zenodo', 'omicsdi', 'harvard_dataverse','ncbi_geo_transformed') result = defaultdict(partial(defaultdict, Counter)) count = 0 for index in indicies: search = Search(using=client, index=index)
def setUp(self): schema_file = os.path.join(_CURRENT, 'data', 'extend_from_bioschemas.json') self.se = Schema(schema_file)