def setUp(self):
     # preload schemaorg-only schema
     self.se = Schema(base_schema=["schema.org"])
     # test list_all_classes
     self.clses = self.se.list_all_classes()
     # test list_all_properties
     self.props = self.se.list_all_properties()
예제 #2
0
class TestSchemaOrg(unittest.TestCase):
    """Using SchemaOrg Schema to test all functions in biothings_schema
    """
    def setUp(self):
        # preload biothings schema
        PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
        self.se = Schema(PATH)
        # test list_all_classes
        self.clses = self.se.list_all_classes()
        # test list_all_properties
        self.props = self.se.list_all_properties()

    def test_schemaclass_class(self):
        """ Test the SchemaClass Class using all classes in BioThings schema"""
        # loop through all classes
        for _cls in self.clses:
            # test get_class
            scls = self.se.get_class(_cls.name)
            # test describe function
            describe = scls.describe()

    def test_schemaproperty_class(self):
        """ Test the SchemaProperty Class using all classes in BioThings schema
        """
        # loop through all properties
        for _prop in self.props:
            # test get_property
            sp = self.se.get_property(_prop.name)
            # test describe function
            describe = sp.describe()
예제 #3
0
 def setUp(self):
     # preload biothings schema
     PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
     self.se = Schema(PATH)
     # test list_all_classes
     self.clses = self.se.list_all_classes()
     # test list_all_properties
     self.props = self.se.list_all_properties()
예제 #4
0
 def __init__(self, se=None):
     if not se:
         self.se = Schema(self.BIOTHINGS_SCHEMA_PATH)
     else:
         self.se = se
     # list all properties which are descendants of identifiers
     self.id_list = self.se.get_property("identifier",
                                         output_type="curie").descendant_properties
     # get all classes defined in biothings schema JSON-LD file
     self.defined_clses = [_item.name for _item in self.se.list_all_defined_classes()]
     # list of properties whose "range" is among defined classes
     self.linked_prop_list = [_prop.name for _prop in self.se.list_all_defined_properties() if set([_item.name for _item in _prop.range]) & set(self.defined_clses)]
     self.cls_prop_clsf = {}
 def test_initialization_with_context_works(self):
     biothings_jsonld_path = os.path.join(_CURRENT, 'data',
                                          'biothings_test.jsonld')
     schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
     biothings_schema = load_json_or_yaml(biothings_jsonld_path)
     self.se_with_context = Schema(schema_url, biothings_schema['@context'])
     self.assertEqual(self.se_with_context.schema, self.se.schema)
예제 #6
0
    def test_schema_should_not_merge_validation_property_on_nested_classes_if_flag_set_to_false(
            self):
        """ Testing merge_recursive_parents function implicitly with merging set to false
        """
        nested_schema_path = os.path.join(_CURRENT, 'data',
                                          'nested_schema.json')
        nested_schema = load_json_or_yaml(nested_schema_path)

        # test that data is correctly inserted beforehand
        self.assertEqual(
            len(nested_schema['@graph'][0]['$validation']['properties']), 15)
        self.assertEqual(
            nested_schema['@graph'][0]['$validation']['properties']['name']
            ['description'], 'The name of the Cvisb Dataset')
        self.assertEqual(
            nested_schema['@graph'][0]['$validation']['properties']['name']
            ['type'], 'string')

        self.assertEqual(
            len(nested_schema['@graph'][2]['$validation']['properties']), 1)
        self.assertEqual(
            nested_schema['@graph'][2]['$validation']['properties']['name']
            ['description'], 'Test description')
        self.assertEqual(
            nested_schema['@graph'][2]['$validation']['properties']['name']
            ['type'], 'number')

        self.assertEqual(
            len(nested_schema['@graph'][3]['$validation']['properties']), 1)
        self.assertEqual(
            nested_schema['@graph'][3]['$validation']['properties']['name']
            ['type'], 'boolean')

        schema_nx = Schema(nested_schema, validation_merge=False)

        # data should remain the same after schema creation
        self.assertEqual(
            len(nested_schema['@graph'][0]['$validation']['properties']), 15)
        self.assertEqual(
            nested_schema['@graph'][0]['$validation']['properties']['name']
            ['description'], 'The name of the Cvisb Dataset')
        self.assertEqual(
            nested_schema['@graph'][0]['$validation']['properties']['name']
            ['type'], 'string')

        self.assertEqual(
            len(nested_schema['@graph'][2]['$validation']['properties']), 1)
        self.assertEqual(
            nested_schema['@graph'][2]['$validation']['properties']['name']
            ['description'], 'Test description')
        self.assertEqual(
            nested_schema['@graph'][2]['$validation']['properties']['name']
            ['type'], 'number')

        self.assertEqual(
            len(nested_schema['@graph'][3]['$validation']['properties']), 1)
        self.assertEqual(
            nested_schema['@graph'][3]['$validation']['properties']['name']
            ['type'], 'boolean')
예제 #7
0
class TestSchemaClass(unittest.TestCase):
    """Test Schema Validator Class
    """
    def setUp(self):
        schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
        self.se = Schema(schema_url)

    def test_list_all_classes(self):
        """ Test list_all_classes function
        """
        all_cls = self.se.list_all_classes()
        all_cls_names = [_cls.name for _cls in all_cls]
        # assert root level Class in all classes
        self.assertIn('schema:Thing', all_cls_names)
        # assert class "Gene" in all classes
        self.assertIn('bts:Gene', all_cls_names)
        # class 'ffff' should not be one of the classes
        self.assertNotIn('bts:ffff', all_cls_names)
        # class name should be curie
        self.assertNotIn('Thing', all_cls_names)
        # assert type of the class is SchemaClass
        self.assertEqual(SchemaClass, type(all_cls[0]))

    def test_list_all_properties(self):
        """ Test list_all_properties function"""
        all_props = self.se.list_all_properties()
        all_prop_names = [_prop.name for _prop in all_props]
        # assert "name" in all props
        self.assertIn('schema:name', all_prop_names)
        # property name should be curie
        self.assertNotIn('name', all_prop_names)
        # assert "ffff" should not be one of the props
        self.assertNotIn('bts:ffff', all_prop_names)
        # assert type of the property is SchemaProperty
        self.assertEqual(SchemaProperty, type(all_props[0]))

    def test_get_class(self):
        """ Test get_class function"""
        scls = self.se.get_class("schema:Gene")
        self.assertEqual(SchemaClass, type(scls))

    def test_get_property(self):
        """ Test get_property function"""
        sp = self.se.get_property("ensembl")
        self.assertEqual(SchemaProperty, type(sp))
예제 #8
0
 def setUp(self):
     schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
     self.se = Schema(schema_url)
     # test response if input is NAME only
     sp = self.se.get_property("ensembl")
     self.assertEqual(sp.name, "bts:ensembl")
     self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl")
     self.assertEqual(sp.label, "ensembl")
     # test response if input is CURIE only
     sp = self.se.get_property("bts:ensembl")
     self.assertEqual(sp.name, "bts:ensembl")
     self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl")
     self.assertEqual(sp.label, "ensembl")
     # test response if input is URI only
     sp = self.se.get_property("http://schema.biothings.io/ensembl")
     self.assertEqual(sp.name, "bts:ensembl")
     self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl")
     self.assertEqual(sp.label, "ensembl")
class TestSchemaOrg(unittest.TestCase):
    """Using SchemaOrg Schema to test all functions in biothings_schema
    """
    def setUp(self):
        # preload schemaorg-only schema
        self.se = Schema(base_schema=["schema.org"])
        # test list_all_classes
        self.clses = self.se.list_all_classes()
        # test list_all_properties
        self.props = self.se.list_all_properties()

    def test_schemaclass_class(self):
        """ Test the SchemaClass Class using all classes in Schemaorg schema"""
        # loop through all classes
        for _cls in self.clses:
            # test get_class
            scls = self.se.get_class(_cls.name)
            # test describe function
            describe = scls.describe()
            scls = self.se.get_class(_cls.name, output_type="curie")
            describe = scls.describe()
            scls = self.se.get_class(_cls.name, output_type="uri")
            describe = scls.describe()
            scls = self.se.get_class(_cls.name, output_type="label")
            describe = scls.describe()

    def test_schemaproperty_class(self):
        """ Test the SchemaProperty Class using all classes in Schemaorg schema
        """
        # loop through all properties
        for _prop in self.props:
            # test get_property
            sp = self.se.get_property(_prop.name)
            # test describe function
            describe = sp.describe()
            sp = self.se.get_property(_prop.name, output_type="curie")
            # test describe function
            describe = sp.describe()
            sp = self.se.get_property(_prop.name, output_type="uri")
            # test describe function
            describe = sp.describe()
            sp = self.se.get_property(_prop.name, output_type="label")
            # test describe function
            describe = sp.describe()
예제 #10
0
 def setUp(self):
     biothings_jsonld_path = os.path.join(_CURRENT, 'data',
                                          'biothings_test.jsonld')
     biothings_schema = load_json_or_yaml(biothings_jsonld_path)
     schema_nx = Schema(biothings_schema)
     self.sv = SchemaValidator(biothings_schema, schema_nx)
     biothings_duplicate = os.path.join(_CURRENT, 'data',
                                        'biothings_duplicate_test.jsonld')
     duplicate_schema = load_json_or_yaml(biothings_duplicate)
     self.sv_duplicate = SchemaValidator(duplicate_schema, schema_nx)
예제 #11
0
class TestSchemaClass(unittest.TestCase):
    """Test Schema Validator Class
    """
    def setUp(self):
        schema_file = os.path.join(_CURRENT, 'data',
                                   'extend_from_bioschemas.json')
        self.se = Schema(schema_file)

    def test_list_all_classes(self):
        """ Test list_all_classes function
        """
        all_cls = self.se.list_all_classes()
        all_cls_names = [_cls.name for _cls in all_cls]
        # assert root level Class in all classes
        self.assertIn('bioschemas:Gene', all_cls_names)
        # class name should be curie
        self.assertNotIn('Gene', all_cls_names)
        # assert type of the class is SchemaClass
        self.assertEqual(SchemaClass, type(all_cls[0]))

    def test_list_all_properties(self):
        """ Test list_all_properties function"""
        all_props = self.se.list_all_properties()
        all_prop_names = [_prop.name for _prop in all_props]
        # assert "name" in all props
        self.assertIn('schema:name', all_prop_names)
        # property name should be curie
        self.assertNotIn('name', all_prop_names)
        # assert "ffff" should not be one of the props
        self.assertNotIn('bts:ffff', all_prop_names)
        # assert type of the property is SchemaProperty
        self.assertEqual(SchemaProperty, type(all_props[0]))

    def test_get_class(self):
        """ Test get_class function"""
        scls = self.se.get_class("bioschemas:Gene")
        self.assertEqual(SchemaClass, type(scls))

    def test_get_property(self):
        """ Test get_property function"""
        sp = self.se.get_property("bioschemas:encodesBioChemEntity")
        self.assertEqual(SchemaProperty, type(sp))
예제 #12
0
def timeit():
    start = time.time()
    PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
    se = Schema(PATH)
    clses = se.list_all_classes()
    for _cls in clses:
        es_class = {'schema': None, 'name': None, 'clses': None, 'props': []}
        es_class['schema'] = _cls.prefix
        es_class['name'] = _cls.label
        es_class['clses'] = [', '.join(map(str, schemas))
                             for schemas in _cls.parent_classes]
        for prop in _cls.list_properties(group_by_class=False):
            info = prop.describe()
            _property = {'name': str(prop),
                         'value_types': [str(_type) for _type in info['range']],
                         'description': info.get('description')}

            es_class['props'].append(_property)
    end = time.time()
    print(end - start)
예제 #13
0
 def load_biothings(self):
     """Load biothings API into registry network graph."""
     # load biothings schema
     BIOTHINGS_SCHEMA_PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
     se = Schema(BIOTHINGS_SCHEMA_PATH)
     self.mp = MappingParser(se)
     # loop through API metadata
     for _api, _info in metadata.items():
         # use the mapping parser module to load relationship of each API
         # into the network
         if 'mapping_url' in _info:
             self.registry[_api] = {}
             self.mp.load_mapping(_info['mapping_url'], _api)
             self.registry[_api]['mapping'] = self.mp.mapping
             self.registry[_api]['graph'] = self.mp.connect()
             self.registry[_api]['type'] = self.mp.type
             self.G.add_edges_from(
                 self.registry[_api]['graph'].edges(data=True))
     return self.G
예제 #14
0
class SchemaExtractor():
    """Extract BioThings Schema and construct networkx graph."""

    def __init__(self, schema):
        """Load biothings schema."""
        self.se = Schema(schema)
        # get all properties which are descendants of "identifier" property
        self.all_ids = self.se.get_property('identifier',
                                            output_type="curie").descendant_properties

    def find_descendants(self, lst):
        """Find all descendants for a list of schemaclass classes.

        :arg list lst: a list of schemaclass classes
        """
        # if input is empty list, return an empty set
        if not lst:
            return set()
        # find descendant of each class and then merge together into a set
        dsc_lst = set(itertools.chain.from_iterable([self.se.get_class(_cls, output_type="curie").descendant_classes for _cls in lst]))
        return dsc_lst

    def find_cls_ids(self, _cls):
        """Find all identifiers which belongs to a class.
    
        :arg cls _cls: a SchemaClass instance
        """
        # get all properties belong to the cls which are descendants of "identifiers"
        properties = [_prop['curie'] for _prop in self.se.get_class(_cls).list_properties(group_by_class=False) if _prop and _prop['curie'] in self.all_ids]
        return properties

    def schema2networkx(self):
        """Convert schema into a networkx graph.

        Logics
        ~~~~~~
        Each identifier represents a node
        node properties include its semantic type (class name)
        The edge is represented by non-identifier properties
        """
        G = nx.DiGraph()
        # list all properties defined in the schema
        properties = self.se.list_all_defined_properties()
        for _property in properties:
            if _property not in self.all_ids:
                # find all descendants of domain classes
                input_clses = set([_cls.name for _cls in _property.domain if _cls.uri in self.se.full_class_only_graph])
                input_clses |= self.find_descendants(input_clses)
                # find all descendants of range classes
                output_clses = set([_cls.name for _cls in _property.range if _cls.uri in self.se.full_class_only_graph])
                output_clses |= self.find_descendants(output_clses)
                if input_clses and output_clses:
                    input_ids = set(itertools.chain.from_iterable([self.find_cls_ids(_cls) for _cls in input_clses]))
                    output_ids = set(itertools.chain.from_iterable([self.find_cls_ids(_cls) for _cls in output_clses]))
                    if input_ids and output_ids:
                        G.add_edges_from(zip(input_ids, output_ids),
                                         label=_property.label)
                else:
                    continue
            else:
                continue
        return G
예제 #15
0
class MappingParser():
    """Parse the mapping file between biothings schema and biothings API"""
    BIOTHINGS_SCHEMA_PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'

    def __init__(self, se=None):
        if not se:
            self.se = Schema(self.BIOTHINGS_SCHEMA_PATH)
        else:
            self.se = se
        # list all properties which are descendants of identifiers
        self.id_list = self.se.get_property("identifier",
                                            output_type="curie").descendant_properties
        # get all classes defined in biothings schema JSON-LD file
        self.defined_clses = [_item.name for _item in self.se.list_all_defined_classes()]
        # list of properties whose "range" is among defined classes
        self.linked_prop_list = [_prop.name for _prop in self.se.list_all_defined_properties() if set([_item.name for _item in _prop.range]) & set(self.defined_clses)]
        self.cls_prop_clsf = {}

    def load_mapping(self, mapping, api=None):
        self.mapping = load_json_or_yaml(mapping)
        self.api = api

    def classify_keys_in_json(self, json_doc):
        """ classify the keys in a json doc"""
        result = defaultdict(list)
        for _key in json_doc.keys():
            if _key in self.id_list:
                result['id'].append(_key)
            elif _key in self.linked_prop_list:
                result['links'].append(_key)
        return result

    def connect(self):
        G = nx.MultiDiGraph()
        self.type = self.mapping.get("@type")
        # classify the keys in the JSON doc
        clsf = self.classify_keys_in_json(self.mapping)
        # for each "links" properties, find its ids
        for predicate in clsf['links']:
            if type(self.mapping[predicate]) == dict:
                self.mapping[predicate] = [self.mapping[predicate]]
            for _pred in self.mapping[predicate]:
                if "@type" in _pred:
                    sp = self.se.get_property(predicate)
                    obj_clsf = self.classify_keys_in_json(_pred)
                    common_prefix = find_common_path(get_dict_values(_pred))
                    input_id = [_pred['$input']] if '$input' in _pred else clsf['id']
                    source = _pred['$source'] if '$source' in _pred else self.api
                    for _edge in itertools.product(input_id, obj_clsf['id']):
                        output_field = _pred[_edge[1]]
                        input_field = self.mapping[_edge[0]]
                        if type(input_field) == list:
                            input_field = ','.join(input_field)
                        if type(output_field) == list:
                            output_field = ','.join(output_field)
                        G.add_edge(_edge[0], _edge[1], label=predicate,
                                   mapping_key=predicate,
                                   api=self.api,
                                   source=source,
                                   input_field=input_field,
                                   input_type=self.mapping["@type"],
                                   input_id=_edge[0],
                                   output_id=_edge[1],
                                   output_type=_pred["@type"],
                                   output_field=common_prefix if common_prefix else output_field)
                        if metadata[self.api].get('api_type') == 'biothings':
                          inverse_property = None if not sp.inverse_property else sp.inverse_property.name
                          if not inverse_property:
                              print(predicate)
                          G.add_edge(_edge[1], _edge[0], api=self.api,
                                     input_field=output_field,
                                     input_type=_pred["@type"],
                                     source=source,
                                     input_id=_edge[1],
                                     output_id=_edge[0],
                                     output_type=self.mapping["@type"],
                                     output_field=input_field,
                                     label=inverse_property,
                                     mapping_key=_edge[0])
        return G
예제 #16
0
    def test_schema_should_correctly_merge_validation_property_on_nested_classes(
            self):
        """ Testing merge_recursive_parents function implicitly
        """
        nested_schema_path = os.path.join(_CURRENT, 'data',
                                          'nested_schema.json')
        nested_schema = load_json_or_yaml(nested_schema_path)

        # test that data is correctly inserted beforehand
        self.assertEqual(
            len(nested_schema['@graph'][0]['$validation']['properties']), 15)
        self.assertEqual(
            nested_schema['@graph'][0]['$validation']['properties']['name']
            ['description'], 'The name of the Cvisb Dataset')
        self.assertEqual(
            nested_schema['@graph'][0]['$validation']['properties']['name']
            ['type'], 'string')

        self.assertEqual(
            len(nested_schema['@graph'][2]['$validation']['properties']), 1)
        self.assertEqual(
            nested_schema['@graph'][2]['$validation']['properties']['name']
            ['description'], 'Test description')
        self.assertEqual(
            nested_schema['@graph'][2]['$validation']['properties']['name']
            ['type'], 'number')

        self.assertEqual(
            len(nested_schema['@graph'][3]['$validation']['properties']), 1)
        self.assertEqual(
            nested_schema['@graph'][3]['$validation']['properties']['name']
            ['type'], 'boolean')

        schema_nx = Schema(nested_schema)

        # make sure schema is correctly merged after

        # Root class should stay the same
        self.assertEqual(
            len(nested_schema['@graph'][0]['$validation']['properties']), 15)
        self.assertEqual(
            nested_schema['@graph'][0]['$validation']['properties']['name']
            ['description'], 'The name of the Cvisb Dataset')
        self.assertEqual(
            nested_schema['@graph'][0]['$validation']['properties']['name']
            ['type'], 'string')

        # the rest of the properties should be inherited from the root class
        self.assertEqual(
            len(nested_schema['@graph'][2]['$validation']['properties']), 15)
        # description and type should override parent class
        self.assertEqual(
            nested_schema['@graph'][2]['$validation']['properties']['name']
            ['description'], 'Test description')
        self.assertEqual(
            nested_schema['@graph'][2]['$validation']['properties']['name']
            ['type'], 'number')

        # the rest of the properties should be inherited from the root class
        self.assertEqual(
            len(nested_schema['@graph'][3]['$validation']['properties']), 15)
        # description should be inherited from the parent class
        self.assertEqual(
            nested_schema['@graph'][3]['$validation']['properties']['name']
            ['description'], 'Test description')
        self.assertEqual(
            nested_schema['@graph'][3]['$validation']['properties']['name']
            ['type'], 'boolean')
예제 #17
0
class TestSchemaPropertyClass(unittest.TestCase):
    """Test SchemaProperty Class
    """
    def setUp(self):
        schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
        self.se = Schema(schema_url)
        # test response if input is NAME only
        sp = self.se.get_property("ensembl")
        self.assertEqual(sp.name, "bts:ensembl")
        self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl")
        self.assertEqual(sp.label, "ensembl")
        # test response if input is CURIE only
        sp = self.se.get_property("bts:ensembl")
        self.assertEqual(sp.name, "bts:ensembl")
        self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl")
        self.assertEqual(sp.label, "ensembl")
        # test response if input is URI only
        sp = self.se.get_property("http://schema.biothings.io/ensembl")
        self.assertEqual(sp.name, "bts:ensembl")
        self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl")
        self.assertEqual(sp.label, "ensembl")

    def test_initialization(self):
        # if input property is not in schema, defined_in_schema should be False
        sp = SchemaProperty('dd', self.se)
        self.assertFalse(sp.defined_in_schema)

    def test_parent_properties(self):
        """ Test parent_properties function
        """
        sp = self.se.get_property("ensembl")
        parents = sp.parent_properties
        # check the first item of should be 'Thing'
        self.assertIn("schema:identifier", [_item.name for _item in parents])
        # check negative cases
        self.assertNotIn("bts:sgd", [_item.name for _item in parents])
        # if input doesn't have parent properties, should return empty list
        sp = self.se.get_property("identifier")
        parents = sp.parent_properties
        self.assertEqual(parents, [])
        # test if input is not defined
        sp = self.se.get_property('dd')
        parents = sp.parent_properties
        self.assertEqual(parents, [])

    def test_child_properties(self):
        """ Test child_properties function"""
        sp = self.se.get_property("identifier")
        children = sp.child_properties
        child_names = [_item.name for _item in children]
        # check if ensembl is in descendants
        self.assertIn('bts:ensembl', child_names)
        # check if affectsExpressionOf is in descendants
        self.assertNotIn('bts:affectsExpressionOf', child_names)
        # check itself should not in descendants
        self.assertNotIn('schema:identifier', child_names)
        # test if input property is the leaf property
        sp = self.se.get_property("ensembl")
        children = sp.child_properties
        self.assertEqual(children, [])
        # test if input is not defined
        sp = self.se.get_property("dd")
        children = sp.child_properties
        self.assertEqual(children, [])

    def test_describe(self):
        """test describe function"""
        sp = self.se.get_property("dd")
        describe = sp.describe()
        self.assertEqual(describe, {})
예제 #18
0
class SchemaAdapter():
    """
    Manage a biothings_schema.Schema instance.
    Provide native type custom format schema class lists.
    """
    def __init__(self, doc=None, **kwargs):
        contexts = ESSchema.gather_field('@context')
        self._schema = SchemaParser(schema=doc, context=contexts, **kwargs)
        self._classes_defs = self._schema.list_all_defined_classes()
        self._classes_refs = self._schema.list_all_referenced_classes()

    def __getattr__(self, attr):
        return getattr(self._schema, attr)

    def get_class_defs(self):
        """get only classes defined in this schema
           each {} will have a field ref: false"""
        return list(self._get_class_defs().values())

    def get_class_refs(self):
        """get only classes referenced outside this schema
           each {} will have a field ref: true"""
        return list(self._get_class_refs().values())

    def get_classes(self, include_ref=True):
        """get all classes and label them if they are referenced
           if include_ref is False, only "defined" classes are included.
        """
        defs = self._get_class_defs()
        ans = {}
        ans.update(defs)
        if include_ref:
            refs = self._get_class_refs()
            ans.update(refs)
        return list(ans.values())

    @staticmethod
    def _get_class_info(schema_class):
        ans = {}  # biothings_schema.SchemaClass -> { ... }
        schema_class = SchemaClassWrapper(schema_class)
        for key in ('name', 'uri', 'prefix', 'label', 'description',
                    'parent_classes', 'properties', 'validation'):
            try:
                ans[key] = getattr(schema_class, key)
            except AttributeError:
                pass
        logging.info(ans['name'])
        return ans

    def _get_class_defs(self):
        ans = {}
        for schema_class in self._classes_defs:
            if schema_class.name not in ans:
                _schema_class = self._get_class_info(schema_class)
                _schema_class['ref'] = False
                ans[schema_class.name] = _schema_class
        return ans

    def _get_class_refs(self):
        ans = {}
        for schema_class in self._classes_refs:
            if schema_class.name not in ans:
                _schema_class = self._get_class_info(schema_class)
                _schema_class['ref'] = True
                ans[schema_class.name] = _schema_class
        return ans

    def has_validation_error(self):
        """return True if there is at least one validation error."""
        for err in self._schema.validator.validation_errors:
            if not err.warning:
                return True
        return False

    def get_validation_errors(self):
        """return validation errors as a list of dictionaries"""
        return [
            err.to_dict() for err in self._schema.validator.validation_errors
        ]
예제 #19
0
 def test_extended_schema_validator_works_as_expected(self):
     schema_extended_url = 'https://raw.githubusercontent.com/BioSchemas/specifications/master/Gene/jsonld/Gene_v1.0-RELEASE.json'
     schema = Schema(schema_extended_url)
예제 #20
0
class SchemaAdapter():
    """
    Manage a biothings_schema.Schema instance.
    Provide native type custom format schema class lists.
    """
    def __init__(self, doc=None):
        contexts = ESSchema.gather_field('@context')
        self._schema = SchemaParser(doc, contexts)
        self._classes_defs = self._schema.list_all_defined_classes()
        self._classes_refs = self._schema.list_all_referenced_classes()

    def __getattr__(self, attr):
        return getattr(self._schema, attr)

    def get_class_defs(self):
        # get only classes defined in this schema
        # each {} will have a field ref: false
        return list(self._get_class_defs().values())

    def get_class_refs(self):
        # get only classes referenced outside this schema
        # each {} will have a field ref: true
        return list(self._get_class_refs().values())

    def get_classes(self):
        # get all classes and label them if they are referenced

        defs = self._get_class_defs()
        refs = self._get_class_refs()
        ans = {}
        ans.update(defs)
        ans.update(refs)
        return list(ans.values())

    @staticmethod
    def _get_class_info(schema_class):
        ans = {}  # biothings_schema.SchemaClass -> { ... }
        schema_class = SchemaClassWrapper(schema_class)
        for key in ('name', 'uri', 'prefix', 'label', 'description',
                    'parent_classes', 'properties', 'validation'):
            try:
                ans[key] = getattr(schema_class, key)
            except AttributeError:
                pass
        logging.info(ans['name'])
        return ans

    def _get_class_defs(self):
        ans = {}
        for schema_class in self._classes_defs:
            if schema_class.name not in ans:
                _schema_class = self._get_class_info(schema_class)
                _schema_class['ref'] = False
                ans[schema_class.name] = _schema_class
        return ans

    def _get_class_refs(self):
        ans = {}
        for schema_class in self._classes_refs:
            if schema_class.name not in ans:
                _schema_class = self._get_class_info(schema_class)
                _schema_class['ref'] = True
                ans[schema_class.name] = _schema_class
        return ans
예제 #21
0
 def setUp(self):
     schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
     self.se = Schema(schema_url)
class TestSchemaClassClass(unittest.TestCase):
    """Test SchemaClass Class
    """
    def setUp(self):
        schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
        self.se = Schema(schema_url)

    def test_initialization_with_context_works(self):
        biothings_jsonld_path = os.path.join(_CURRENT, 'data',
                                             'biothings_test.jsonld')
        schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
        biothings_schema = load_json_or_yaml(biothings_jsonld_path)
        self.se_with_context = Schema(schema_url, biothings_schema['@context'])
        self.assertEqual(self.se_with_context.schema, self.se.schema)

    def test_initialization(self):
        # if input class is not in schema, defined_in_schema should be False
        scls = self.se.get_class("dd")
        self.assertFalse(scls.defined_in_schema)
        # test response if input is NAME only
        scls = self.se.get_class("bts:Gene")
        self.assertEqual(scls.name, "bts:Gene")
        self.assertEqual(scls.uri, "http://schema.biothings.io/Gene")
        self.assertEqual(scls.label, "Gene")
        # test response if input is CURIE only
        scls = self.se.get_class("bts:Gene")
        self.assertEqual(scls.name, "bts:Gene")
        self.assertEqual(scls.uri, "http://schema.biothings.io/Gene")
        self.assertEqual(scls.label, "Gene")
        # test response if input is URI only
        scls = self.se.get_class("http://schema.biothings.io/Gene")
        self.assertEqual(scls.name, "bts:Gene")
        self.assertEqual(scls.uri, "http://schema.biothings.io/Gene")
        self.assertEqual(scls.label, "Gene")

    def test_parent_classes(self):
        """ Test parent_classes function
        """
        scls = self.se.get_class("bts:Gene")
        parents = scls.parent_classes
        # check the first item of should be 'Thing'
        self.assertEqual(parents[0][0].name, 'schema:Thing')
        # if input is the root class, should return empty list
        scls = self.se.get_class("Thing")
        parents = scls.parent_classes
        self.assertEqual(parents, [])
        # check the response if class not exist
        scls = self.se.get_class("dd")
        parents = scls.parent_classes
        self.assertEqual(parents, [])
        ###############################
        # test if output_type is uri
        scls = self.se.get_class("bts:Gene", output_type="uri")
        parents = scls.parent_classes
        # check the first item of should be 'Thing'
        self.assertEqual(parents[0][0], 'http://schema.org/Thing')
        ###############################
        # test if output_type is label
        scls = self.se.get_class("bts:Gene", output_type="label")
        parents = scls.parent_classes
        # check the first item of should be 'Thing'
        self.assertEqual(parents[0][0], 'Thing')
        ###############################
        # test if output_type is curie
        scls = self.se.get_class("bts:Gene", output_type="curie")
        parents = scls.parent_classes
        # check the first item of should be 'Thing'
        self.assertEqual(parents[0][0], 'schema:Thing')

    def test_ancestor_classes(self):
        """ Test ancestor_classes function"""
        ###############################
        # test if output_type is python class
        scls = self.se.get_class("bts:MolecularEntity")
        ancestors = scls.ancestor_classes
        ancestor_names = [_item.name for _item in ancestors]
        # check if gene is in ancestors
        self.assertIn('schema:Thing', ancestor_names)
        self.assertIn('bts:BiologicalEntity', ancestor_names)
        # check if Gene is in ancestors (Gene is its child classs)
        self.assertNotIn('bts:Gene', ancestor_names)
        # check itself should not in ancestors
        self.assertNotIn('bts:MolecularEntity', ancestor_names)
        # test if input class is the root class
        scls = self.se.get_class("Thing")
        self.assertEqual(scls.ancestor_classes, [])
        # test if input class not exists
        scls = self.se.get_class("dd")
        self.assertEqual(scls.ancestor_classes, [])
        ###############################
        # test if output_type is curie
        scls = self.se.get_class("bts:MolecularEntity", output_type="curie")
        ancestors = scls.ancestor_classes
        # check if BiologicalEntity is in descendants
        self.assertIn('bts:BiologicalEntity', ancestors)
        self.assertIn('schema:Thing', ancestors)
        ###############################
        # test if output_type is label
        scls = self.se.get_class("bts:MolecularEntity", output_type="label")
        ancestors = scls.ancestor_classes
        # check if Thing is in ancestors
        self.assertIn('Thing', ancestors)
        self.assertIn('BiologicalEntity', ancestors)
        ###############################
        # test if output_type is uri
        scls = self.se.get_class("bts:MolecularEntity", output_type="uri")
        ancestors = scls.ancestor_classes
        # check if gene is in descendants
        self.assertIn('http://schema.biothings.io/BiologicalEntity', ancestors)
        self.assertIn('http://schema.org/Thing', ancestors)

    def test_descendant_classes(self):
        """ Test descendant_classes function"""
        ###############################
        # test if output_type is python class
        scls = self.se.get_class("bts:MolecularEntity")
        descendants = scls.descendant_classes
        descendant_names = [_item.name for _item in descendants]
        # check if gene is in descendants
        self.assertIn('bts:Gene', descendant_names)
        # check if Thing is in descendants (Thing is its parent classs)
        self.assertNotIn('schema:Thing', descendant_names)
        # check itself should not in descendants
        self.assertNotIn('bts:MolecularEntity', descendant_names)
        # test if input class is the leaf class
        scls = self.se.get_class("bts:Gene")
        descendants = scls.descendant_classes
        self.assertEqual(descendants, [])
        # test if input class not exists
        scls = self.se.get_class("dd")
        descendants = scls.descendant_classes
        self.assertEqual(descendants, [])
        ###############################
        # test if output_type is curie
        scls = self.se.get_class("bts:MolecularEntity", output_type="curie")
        descendants = scls.descendant_classes
        # check if gene is in descendants
        self.assertIn('bts:Gene', descendants)
        ###############################
        # test if output_type is label
        scls = self.se.get_class("bts:MolecularEntity", output_type="label")
        descendants = scls.descendant_classes
        # check if gene is in descendants
        self.assertIn('Gene', descendants)
        ###############################
        # test if output_type is uri
        scls = self.se.get_class("bts:MolecularEntity", output_type="uri")
        descendants = scls.descendant_classes
        # check if gene is in descendants
        self.assertIn('http://schema.biothings.io/Gene', descendants)

    def test_child_classes(self):
        """ Test child_classes function"""
        ###############################
        # test if output_type is python class
        scls = self.se.get_class("bts:MolecularEntity")
        children = scls.child_classes
        children_names = [_item.name for _item in children]
        # check if GeneFamily is in children
        self.assertIn('bts:GeneFamily', children_names)
        # check if gene is in children (gene is descendant)
        self.assertNotIn('bts:Gene', children_names)
        # check if Thing is in children (Thing is its parent classs)
        self.assertNotIn('schema:Thing', children_names)
        # check itself should not in children
        self.assertNotIn('bts:MolecularEntity', children_names)
        # test if input class is the leaf class
        scls = self.se.get_class("bts:Gene")
        children = scls.child_classes
        self.assertEqual(children, [])
        # test if input class is not defined
        scls = self.se.get_class("dd")
        children = scls.child_classes
        self.assertEqual(children, [])
        ###############################
        # test if output_type is curie
        scls = self.se.get_class("bts:MolecularEntity", output_type="curie")
        children = scls.child_classes
        # check if GeneFamily is in children
        self.assertIn('bts:GeneFamily', children)
        ###############################
        # test if output_type is uri
        scls = self.se.get_class("bts:MolecularEntity", output_type="uri")
        children = scls.child_classes
        # check if GeneFamily is in children
        self.assertIn('http://schema.biothings.io/GeneFamily', children)
        ###############################
        # test if output_type is label
        scls = self.se.get_class("bts:MolecularEntity", output_type="label")
        children = scls.child_classes
        # check if GeneFamily is in children
        self.assertIn('GeneFamily', children)

    def test_used_by(self):
        """ Test used_by function"""
        scls = self.se.get_class("bts:GenomicEntity")
        usage = scls.used_by()
        self.assertTrue(len(usage) > 1)
        self.assertEqual(list, type(usage))
        # test if class is not defined
        scls = self.se.get_class("dd")
        usage = scls.used_by()
        self.assertEqual(usage, [])

    def test_describe(self):
        """test describe function"""
        scls = self.se.get_class("dd")
        describe = scls.describe()
        self.assertEqual(describe, {})
예제 #23
0
 def __init__(self, doc=None):
     contexts = ESSchema.gather_field('@context')
     self._schema = SchemaParser(doc, contexts)
     self._classes_defs = self._schema.list_all_defined_classes()
     self._classes_refs = self._schema.list_all_referenced_classes()
예제 #24
0
 def __init__(self, schema):
     """Load biothings schema."""
     self.se = Schema(schema)
     # get all properties which are descendants of "identifier" property
     self.all_ids = self.se.get_property('identifier',
                                         output_type="curie").descendant_properties
예제 #25
0
# http://su07:9199/omicsdi/_search?q=*


# %% [markdown]
# ## Question
# Thread #cvisb andrew  21 days ago
# > For each repository, show how many datasets have each metadata field populated.

# %%
from collections import defaultdict, Counter
from functools import partial
from elasticsearch_dsl import Search
from elasticsearch import Elasticsearch
from biothings_schema import Schema

schema = Schema()
dataset = schema.get_class("schema:Dataset")
properties = sorted([
    prop['label'] for prop in dataset.list_properties(
        class_specific=False, group_by_class=False)
])

# %%
client = Elasticsearch('su07:9199')
indicies = ('zenodo', 'omicsdi', 'harvard_dataverse','ncbi_geo_transformed')

result = defaultdict(partial(defaultdict, Counter))

count = 0
for index in indicies:
    search = Search(using=client, index=index)
예제 #26
0
 def setUp(self):
     schema_file = os.path.join(_CURRENT, 'data',
                                'extend_from_bioschemas.json')
     self.se = Schema(schema_file)