Esempio n. 1
0
class TestSchemaOrg(unittest.TestCase):
    """Using SchemaOrg Schema to test all functions in biothings_schema
    """
    def setUp(self):
        # preload biothings schema
        PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
        self.se = Schema(PATH)
        # test list_all_classes
        self.clses = self.se.list_all_classes()
        # test list_all_properties
        self.props = self.se.list_all_properties()

    def test_schemaclass_class(self):
        """ Test the SchemaClass Class using all classes in BioThings schema"""
        # loop through all classes
        for _cls in self.clses:
            # test get_class
            scls = self.se.get_class(_cls.name)
            # test describe function
            describe = scls.describe()

    def test_schemaproperty_class(self):
        """ Test the SchemaProperty Class using all classes in BioThings schema
        """
        # loop through all properties
        for _prop in self.props:
            # test get_property
            sp = self.se.get_property(_prop.name)
            # test describe function
            describe = sp.describe()
class TestSchemaOrg(unittest.TestCase):
    """Using SchemaOrg Schema to test all functions in biothings_schema
    """
    def setUp(self):
        # preload schemaorg-only schema
        self.se = Schema(base_schema=["schema.org"])
        # test list_all_classes
        self.clses = self.se.list_all_classes()
        # test list_all_properties
        self.props = self.se.list_all_properties()

    def test_schemaclass_class(self):
        """ Test the SchemaClass Class using all classes in Schemaorg schema"""
        # loop through all classes
        for _cls in self.clses:
            # test get_class
            scls = self.se.get_class(_cls.name)
            # test describe function
            describe = scls.describe()
            scls = self.se.get_class(_cls.name, output_type="curie")
            describe = scls.describe()
            scls = self.se.get_class(_cls.name, output_type="uri")
            describe = scls.describe()
            scls = self.se.get_class(_cls.name, output_type="label")
            describe = scls.describe()

    def test_schemaproperty_class(self):
        """ Test the SchemaProperty Class using all classes in Schemaorg schema
        """
        # loop through all properties
        for _prop in self.props:
            # test get_property
            sp = self.se.get_property(_prop.name)
            # test describe function
            describe = sp.describe()
            sp = self.se.get_property(_prop.name, output_type="curie")
            # test describe function
            describe = sp.describe()
            sp = self.se.get_property(_prop.name, output_type="uri")
            # test describe function
            describe = sp.describe()
            sp = self.se.get_property(_prop.name, output_type="label")
            # test describe function
            describe = sp.describe()
Esempio n. 3
0
class TestSchemaClass(unittest.TestCase):
    """Test Schema Validator Class
    """
    def setUp(self):
        schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
        self.se = Schema(schema_url)

    def test_list_all_classes(self):
        """ Test list_all_classes function
        """
        all_cls = self.se.list_all_classes()
        all_cls_names = [_cls.name for _cls in all_cls]
        # assert root level Class in all classes
        self.assertIn('schema:Thing', all_cls_names)
        # assert class "Gene" in all classes
        self.assertIn('bts:Gene', all_cls_names)
        # class 'ffff' should not be one of the classes
        self.assertNotIn('bts:ffff', all_cls_names)
        # class name should be curie
        self.assertNotIn('Thing', all_cls_names)
        # assert type of the class is SchemaClass
        self.assertEqual(SchemaClass, type(all_cls[0]))

    def test_list_all_properties(self):
        """ Test list_all_properties function"""
        all_props = self.se.list_all_properties()
        all_prop_names = [_prop.name for _prop in all_props]
        # assert "name" in all props
        self.assertIn('schema:name', all_prop_names)
        # property name should be curie
        self.assertNotIn('name', all_prop_names)
        # assert "ffff" should not be one of the props
        self.assertNotIn('bts:ffff', all_prop_names)
        # assert type of the property is SchemaProperty
        self.assertEqual(SchemaProperty, type(all_props[0]))

    def test_get_class(self):
        """ Test get_class function"""
        scls = self.se.get_class("schema:Gene")
        self.assertEqual(SchemaClass, type(scls))

    def test_get_property(self):
        """ Test get_property function"""
        sp = self.se.get_property("ensembl")
        self.assertEqual(SchemaProperty, type(sp))
Esempio n. 4
0
class TestSchemaClass(unittest.TestCase):
    """Test Schema Validator Class
    """
    def setUp(self):
        schema_file = os.path.join(_CURRENT, 'data',
                                   'extend_from_bioschemas.json')
        self.se = Schema(schema_file)

    def test_list_all_classes(self):
        """ Test list_all_classes function
        """
        all_cls = self.se.list_all_classes()
        all_cls_names = [_cls.name for _cls in all_cls]
        # assert root level Class in all classes
        self.assertIn('bioschemas:Gene', all_cls_names)
        # class name should be curie
        self.assertNotIn('Gene', all_cls_names)
        # assert type of the class is SchemaClass
        self.assertEqual(SchemaClass, type(all_cls[0]))

    def test_list_all_properties(self):
        """ Test list_all_properties function"""
        all_props = self.se.list_all_properties()
        all_prop_names = [_prop.name for _prop in all_props]
        # assert "name" in all props
        self.assertIn('schema:name', all_prop_names)
        # property name should be curie
        self.assertNotIn('name', all_prop_names)
        # assert "ffff" should not be one of the props
        self.assertNotIn('bts:ffff', all_prop_names)
        # assert type of the property is SchemaProperty
        self.assertEqual(SchemaProperty, type(all_props[0]))

    def test_get_class(self):
        """ Test get_class function"""
        scls = self.se.get_class("bioschemas:Gene")
        self.assertEqual(SchemaClass, type(scls))

    def test_get_property(self):
        """ Test get_property function"""
        sp = self.se.get_property("bioschemas:encodesBioChemEntity")
        self.assertEqual(SchemaProperty, type(sp))
Esempio n. 5
0
class MappingParser():
    """Parse the mapping file between biothings schema and biothings API"""
    BIOTHINGS_SCHEMA_PATH = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'

    def __init__(self, se=None):
        if not se:
            self.se = Schema(self.BIOTHINGS_SCHEMA_PATH)
        else:
            self.se = se
        # list all properties which are descendants of identifiers
        self.id_list = self.se.get_property("identifier",
                                            output_type="curie").descendant_properties
        # get all classes defined in biothings schema JSON-LD file
        self.defined_clses = [_item.name for _item in self.se.list_all_defined_classes()]
        # list of properties whose "range" is among defined classes
        self.linked_prop_list = [_prop.name for _prop in self.se.list_all_defined_properties() if set([_item.name for _item in _prop.range]) & set(self.defined_clses)]
        self.cls_prop_clsf = {}

    def load_mapping(self, mapping, api=None):
        self.mapping = load_json_or_yaml(mapping)
        self.api = api

    def classify_keys_in_json(self, json_doc):
        """ classify the keys in a json doc"""
        result = defaultdict(list)
        for _key in json_doc.keys():
            if _key in self.id_list:
                result['id'].append(_key)
            elif _key in self.linked_prop_list:
                result['links'].append(_key)
        return result

    def connect(self):
        G = nx.MultiDiGraph()
        self.type = self.mapping.get("@type")
        # classify the keys in the JSON doc
        clsf = self.classify_keys_in_json(self.mapping)
        # for each "links" properties, find its ids
        for predicate in clsf['links']:
            if type(self.mapping[predicate]) == dict:
                self.mapping[predicate] = [self.mapping[predicate]]
            for _pred in self.mapping[predicate]:
                if "@type" in _pred:
                    sp = self.se.get_property(predicate)
                    obj_clsf = self.classify_keys_in_json(_pred)
                    common_prefix = find_common_path(get_dict_values(_pred))
                    input_id = [_pred['$input']] if '$input' in _pred else clsf['id']
                    source = _pred['$source'] if '$source' in _pred else self.api
                    for _edge in itertools.product(input_id, obj_clsf['id']):
                        output_field = _pred[_edge[1]]
                        input_field = self.mapping[_edge[0]]
                        if type(input_field) == list:
                            input_field = ','.join(input_field)
                        if type(output_field) == list:
                            output_field = ','.join(output_field)
                        G.add_edge(_edge[0], _edge[1], label=predicate,
                                   mapping_key=predicate,
                                   api=self.api,
                                   source=source,
                                   input_field=input_field,
                                   input_type=self.mapping["@type"],
                                   input_id=_edge[0],
                                   output_id=_edge[1],
                                   output_type=_pred["@type"],
                                   output_field=common_prefix if common_prefix else output_field)
                        if metadata[self.api].get('api_type') == 'biothings':
                          inverse_property = None if not sp.inverse_property else sp.inverse_property.name
                          if not inverse_property:
                              print(predicate)
                          G.add_edge(_edge[1], _edge[0], api=self.api,
                                     input_field=output_field,
                                     input_type=_pred["@type"],
                                     source=source,
                                     input_id=_edge[1],
                                     output_id=_edge[0],
                                     output_type=self.mapping["@type"],
                                     output_field=input_field,
                                     label=inverse_property,
                                     mapping_key=_edge[0])
        return G
Esempio n. 6
0
class SchemaExtractor():
    """Extract BioThings Schema and construct networkx graph."""

    def __init__(self, schema):
        """Load biothings schema."""
        self.se = Schema(schema)
        # get all properties which are descendants of "identifier" property
        self.all_ids = self.se.get_property('identifier',
                                            output_type="curie").descendant_properties

    def find_descendants(self, lst):
        """Find all descendants for a list of schemaclass classes.

        :arg list lst: a list of schemaclass classes
        """
        # if input is empty list, return an empty set
        if not lst:
            return set()
        # find descendant of each class and then merge together into a set
        dsc_lst = set(itertools.chain.from_iterable([self.se.get_class(_cls, output_type="curie").descendant_classes for _cls in lst]))
        return dsc_lst

    def find_cls_ids(self, _cls):
        """Find all identifiers which belongs to a class.
    
        :arg cls _cls: a SchemaClass instance
        """
        # get all properties belong to the cls which are descendants of "identifiers"
        properties = [_prop['curie'] for _prop in self.se.get_class(_cls).list_properties(group_by_class=False) if _prop and _prop['curie'] in self.all_ids]
        return properties

    def schema2networkx(self):
        """Convert schema into a networkx graph.

        Logics
        ~~~~~~
        Each identifier represents a node
        node properties include its semantic type (class name)
        The edge is represented by non-identifier properties
        """
        G = nx.DiGraph()
        # list all properties defined in the schema
        properties = self.se.list_all_defined_properties()
        for _property in properties:
            if _property not in self.all_ids:
                # find all descendants of domain classes
                input_clses = set([_cls.name for _cls in _property.domain if _cls.uri in self.se.full_class_only_graph])
                input_clses |= self.find_descendants(input_clses)
                # find all descendants of range classes
                output_clses = set([_cls.name for _cls in _property.range if _cls.uri in self.se.full_class_only_graph])
                output_clses |= self.find_descendants(output_clses)
                if input_clses and output_clses:
                    input_ids = set(itertools.chain.from_iterable([self.find_cls_ids(_cls) for _cls in input_clses]))
                    output_ids = set(itertools.chain.from_iterable([self.find_cls_ids(_cls) for _cls in output_clses]))
                    if input_ids and output_ids:
                        G.add_edges_from(zip(input_ids, output_ids),
                                         label=_property.label)
                else:
                    continue
            else:
                continue
        return G
Esempio n. 7
0
class TestSchemaPropertyClass(unittest.TestCase):
    """Test SchemaProperty Class
    """
    def setUp(self):
        schema_url = 'https://raw.githubusercontent.com/data2health/schemas/biothings/biothings/biothings_curie_kevin.jsonld'
        self.se = Schema(schema_url)
        # test response if input is NAME only
        sp = self.se.get_property("ensembl")
        self.assertEqual(sp.name, "bts:ensembl")
        self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl")
        self.assertEqual(sp.label, "ensembl")
        # test response if input is CURIE only
        sp = self.se.get_property("bts:ensembl")
        self.assertEqual(sp.name, "bts:ensembl")
        self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl")
        self.assertEqual(sp.label, "ensembl")
        # test response if input is URI only
        sp = self.se.get_property("http://schema.biothings.io/ensembl")
        self.assertEqual(sp.name, "bts:ensembl")
        self.assertEqual(sp.uri, "http://schema.biothings.io/ensembl")
        self.assertEqual(sp.label, "ensembl")

    def test_initialization(self):
        # if input property is not in schema, defined_in_schema should be False
        sp = SchemaProperty('dd', self.se)
        self.assertFalse(sp.defined_in_schema)

    def test_parent_properties(self):
        """ Test parent_properties function
        """
        sp = self.se.get_property("ensembl")
        parents = sp.parent_properties
        # check the first item of should be 'Thing'
        self.assertIn("schema:identifier", [_item.name for _item in parents])
        # check negative cases
        self.assertNotIn("bts:sgd", [_item.name for _item in parents])
        # if input doesn't have parent properties, should return empty list
        sp = self.se.get_property("identifier")
        parents = sp.parent_properties
        self.assertEqual(parents, [])
        # test if input is not defined
        sp = self.se.get_property('dd')
        parents = sp.parent_properties
        self.assertEqual(parents, [])

    def test_child_properties(self):
        """ Test child_properties function"""
        sp = self.se.get_property("identifier")
        children = sp.child_properties
        child_names = [_item.name for _item in children]
        # check if ensembl is in descendants
        self.assertIn('bts:ensembl', child_names)
        # check if affectsExpressionOf is in descendants
        self.assertNotIn('bts:affectsExpressionOf', child_names)
        # check itself should not in descendants
        self.assertNotIn('schema:identifier', child_names)
        # test if input property is the leaf property
        sp = self.se.get_property("ensembl")
        children = sp.child_properties
        self.assertEqual(children, [])
        # test if input is not defined
        sp = self.se.get_property("dd")
        children = sp.child_properties
        self.assertEqual(children, [])

    def test_describe(self):
        """test describe function"""
        sp = self.se.get_property("dd")
        describe = sp.describe()
        self.assertEqual(describe, {})