Esempio n. 1
0
def get_ontology(dataset: str) -> Ontology:
    """Get ontology of a given dataset"""
    global _data_io_vars
    if dataset not in _data_io_vars["ont"]:
        # if it has been cached ...
        cache_file = get_cache_dir(dataset) / 'ont.pkl'
        cache_file.parent.mkdir(exist_ok=True, parents=True)
        if cache_file.exists():
            ont = deserialize(cache_file)
        else:
            ont = Ontology.from_dataset(dataset)
            serialize(ont, cache_file)
        _data_io_vars["ont"][dataset] = ont

    return _data_io_vars["ont"][dataset]
Esempio n. 2
0
def build_ont_graph(dataset: str) -> OntGraph:
    ont = Ontology.from_dataset(dataset)
    ont_graph: OntGraph = OntGraph(dataset)
    predicates: Dict[str, Predicate] = {}

    for ont_name, ont_conf in config.datasets[dataset].ontology.items():
        fpaths = []
        if 'fpath' in ont_conf:
            fpaths = [ont_conf.fpath]
        elif 'fpaths' in ont_conf:
            fpaths = [ont_conf.fpaths]

        for fpath in fpaths:
            g = ontospy.Ontospy(str(fpath.as_path()))
            is_rdf_type_reliable = False

            for cls in g.classes:
                add_node(ont, ont_graph, cls)

            for prop in g.properties:
                for rg in prop.ranges:
                    add_node(ont, ont_graph, rg)
                for domain in prop.domains:
                    add_node(ont, ont_graph, domain)

                try:
                    predicate = Predicate(str(prop.uri),
                                          [str(x.uri) for x in prop.domains],
                                          [str(x.uri) for x in prop.ranges],
                                          ont.simplify_uri(str(prop.rdftype)),
                                          False, {ont_name})

                    if str(prop.uri) in predicates:
                        predicates[str(prop.uri)].merge(predicate)
                    else:
                        predicates[str(prop.uri)] = predicate

                    if predicate.rdf_type in {
                            PredicateType.OWL_DATA_PROP,
                            PredicateType.OWL_OBJECT_PROP
                    }:
                        is_rdf_type_reliable = True
                except Exception:
                    print(ont_name, prop)
                    print(prop.__dict__)
                    raise

            for uri, predicate in predicates.items():
                if ont_name in predicate.defined_in_onts:
                    predicate.is_rdf_type_reliable = is_rdf_type_reliable

    ont_graph.set_predicates(list(predicates.values()))
    # update parent & children between nodes
    for node in ont_graph.iter_nodes():
        for node_uri in node.parents_uris.union(node.children_uris):
            if not ont_graph.has_node_with_uri(node_uri):
                # node is referred by subClassOf but never been defined before
                ont_graph.add_new_node(
                    GraphNodeType.CLASS_NODE,
                    ont.simplify_uri(node_uri).encode('utf-8'), node_uri,
                    set(), set())

    for node in ont_graph.iter_nodes():
        for parent_uri in node.parents_uris:
            ont_graph.get_node_by_uri(parent_uri).children_uris.add(node.uri)
        for child_uri in node.children_uris:
            ont_graph.get_node_by_uri(child_uri).parents_uris.add(node.uri)
    return ont_graph
Esempio n. 3
0
            semantic_models = []
            tables = []
            for i, raw_tbl in enumerate(raw_tables):
                r2rml_file = mapping_dir / f"{raw_tbl.id}-model.yml"
                tbl, sm = R2RML.load_from_file(r2rml_file).apply_build(raw_tbl)
                semantic_models.append(sm)
                tables.append(tbl)

            serializeJSON(semantic_models, cache_file)
            _data_io_vars["data_tables"][dataset] = tables

        _data_io_vars["semantic_models"][dataset] = semantic_models

    return _data_io_vars["semantic_models"][dataset]


if __name__ == '__main__':
    dataset = 'museum_crm'
    ont = Ontology.from_dataset(dataset)

    data_dir = Path(config.datasets[dataset].as_path())
    (data_dir / "models-viz").mkdir(exist_ok=True, parents=True)
    (data_dir / "tables-viz").mkdir(exist_ok=True, parents=True)

    for sm in get_semantic_models(dataset):
        sm.graph.render2pdf(data_dir / f"models-viz/{sm.id}.pdf")

    for tbl in get_data_tables(dataset):
        with open(data_dir / "tables-viz" / f"{tbl.id}.txt", "wb") as f:
            f.write(tbl.to_string().encode("utf-8"))